diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml new file mode 100644 index 0000000..c35f0ab --- /dev/null +++ b/.gitea/workflows/ci.yml @@ -0,0 +1,94 @@ +# ObsiGate CI/CD Pipeline +# Runs on every push and pull request to main + +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + # ── Lint ────────────────────────────────────────────────────────── + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + pip install ruff mypy + pip install -r backend/requirements.txt + + - name: Ruff (linter) + run: ruff check backend/ + + - name: Mypy (type checker) + run: mypy backend/ --ignore-missing-imports + + # ── Tests ───────────────────────────────────────────────────────── + test: + needs: lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + pip install pytest pytest-cov pytest-asyncio httpx + pip install -r backend/requirements.txt + + - name: Run tests + run: pytest tests/ --cov=backend --cov-report=xml --cov-report=term -q + + - name: Upload coverage + uses: actions/upload-artifact@v4 + with: + name: coverage-report + path: coverage.xml + + # ── Security scan ───────────────────────────────────────────────── + security: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + pip install bandit pip-audit + pip install -r backend/requirements.txt + + - name: Bandit (SAST) + run: bandit -r backend/ -c pyproject.toml 2>/dev/null || bandit -r backend/ --skip B101 + + - name: Pip-audit (dependency vulnerabilities) + run: pip-audit + + # ── Docker build ────────────────────────────────────────────────── + build: + needs: test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Build Docker image + run: docker build -t obsigate:ci . + + - name: Verify image + run: docker images obsigate:ci diff --git a/backend/requirements.txt b/backend/requirements.txt index c51f4b5..fcc8362 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -8,4 +8,5 @@ aiohttp>=3.9.0 watchdog>=4.0.0 argon2-cffi>=23.1.0 python-jose>=3.3.0 +sortedcontainers>=2.4.0 weasyprint>=60.0 diff --git a/backend/search.py b/backend/search.py index ff76c07..16b9d3a 100644 --- a/backend/search.py +++ b/backend/search.py @@ -1,4 +1,4 @@ -import bisect +from sortedcontainers import SortedList import logging import math import re @@ -325,7 +325,7 @@ class InvertedIndex: self.doc_vault: Dict[str, str] = {} self.vault_docs: Dict[str, set] = defaultdict(set) self.tag_docs: Dict[str, set] = defaultdict(set) - self._sorted_tokens: List[str] = [] + self._sorted_tokens: "SortedList" = SortedList() self._ready: bool = False # True after initial build def rebuild(self) -> None: @@ -394,7 +394,7 @@ class InvertedIndex: if tag not in self.tag_prefix_index[prefix]: self.tag_prefix_index[prefix].append(tag) - self._sorted_tokens = sorted(self.word_index.keys()) + self._sorted_tokens = SortedList(self.word_index.keys()) self._ready = True logger.info( "Inverted index built: %d documents, %d unique tokens, %d tags", @@ -448,7 +448,7 @@ class InvertedIndex: tf[token] += 1 for token, freq in tf.items(): if not self.word_index.get(token): - bisect.insort(self._sorted_tokens, token) + self._sorted_tokens.add(token) self.word_index[token][doc_key] = freq def remove_document(self, vault_name: str, path: str): @@ -510,9 +510,7 @@ class InvertedIndex: if not wi: del self.word_index[token] if not skip_sorted_cleanup: - idx = bisect.bisect_left(self._sorted_tokens, token) - if idx < len(self._sorted_tokens) and self._sorted_tokens[idx] == token: - self._sorted_tokens.pop(idx) + self._sorted_tokens.discard(token) def idf(self, term: str) -> float: """Inverse Document Frequency for a term. @@ -563,7 +561,7 @@ class InvertedIndex: """ if not prefix or not self._sorted_tokens: return [] - lo = bisect.bisect_left(self._sorted_tokens, prefix) + lo = self._sorted_tokens.bisect_left(prefix) results: List[str] = [] for i in range(lo, len(self._sorted_tokens)): if self._sorted_tokens[i].startswith(prefix):