feat: CI/CD pipeline + sortedcontainers for O(log n) index ops
CI/CD (.gitea/workflows/ci.yml): - Lint: ruff + mypy on every push/PR - Test: pytest with coverage report (175 tests) - Security: bandit SAST + pip-audit dependency scan - Build: Docker image verification sortedcontainers (backend/search.py): - Replace bisect with SortedList for _sorted_tokens - O(log n) add() / discard() instead of O(n) insort/pop - SortedList.bisect_left() for prefix search - Add sortedcontainers>=2.4.0 to requirements.txt
This commit is contained in:
parent
8d1b766947
commit
7b2da1ff6a
94
.gitea/workflows/ci.yml
Normal file
94
.gitea/workflows/ci.yml
Normal file
@ -0,0 +1,94 @@
|
||||
# ObsiGate CI/CD Pipeline
|
||||
# Runs on every push and pull request to main
|
||||
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
# ── Lint ──────────────────────────────────────────────────────────
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install ruff mypy
|
||||
pip install -r backend/requirements.txt
|
||||
|
||||
- name: Ruff (linter)
|
||||
run: ruff check backend/
|
||||
|
||||
- name: Mypy (type checker)
|
||||
run: mypy backend/ --ignore-missing-imports
|
||||
|
||||
# ── Tests ─────────────────────────────────────────────────────────
|
||||
test:
|
||||
needs: lint
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install pytest pytest-cov pytest-asyncio httpx
|
||||
pip install -r backend/requirements.txt
|
||||
|
||||
- name: Run tests
|
||||
run: pytest tests/ --cov=backend --cov-report=xml --cov-report=term -q
|
||||
|
||||
- name: Upload coverage
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: coverage-report
|
||||
path: coverage.xml
|
||||
|
||||
# ── Security scan ─────────────────────────────────────────────────
|
||||
security:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install bandit pip-audit
|
||||
pip install -r backend/requirements.txt
|
||||
|
||||
- name: Bandit (SAST)
|
||||
run: bandit -r backend/ -c pyproject.toml 2>/dev/null || bandit -r backend/ --skip B101
|
||||
|
||||
- name: Pip-audit (dependency vulnerabilities)
|
||||
run: pip-audit
|
||||
|
||||
# ── Docker build ──────────────────────────────────────────────────
|
||||
build:
|
||||
needs: test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Build Docker image
|
||||
run: docker build -t obsigate:ci .
|
||||
|
||||
- name: Verify image
|
||||
run: docker images obsigate:ci
|
||||
@ -8,4 +8,5 @@ aiohttp>=3.9.0
|
||||
watchdog>=4.0.0
|
||||
argon2-cffi>=23.1.0
|
||||
python-jose>=3.3.0
|
||||
sortedcontainers>=2.4.0
|
||||
weasyprint>=60.0
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import bisect
|
||||
from sortedcontainers import SortedList
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
@ -325,7 +325,7 @@ class InvertedIndex:
|
||||
self.doc_vault: Dict[str, str] = {}
|
||||
self.vault_docs: Dict[str, set] = defaultdict(set)
|
||||
self.tag_docs: Dict[str, set] = defaultdict(set)
|
||||
self._sorted_tokens: List[str] = []
|
||||
self._sorted_tokens: "SortedList" = SortedList()
|
||||
self._ready: bool = False # True after initial build
|
||||
|
||||
def rebuild(self) -> None:
|
||||
@ -394,7 +394,7 @@ class InvertedIndex:
|
||||
if tag not in self.tag_prefix_index[prefix]:
|
||||
self.tag_prefix_index[prefix].append(tag)
|
||||
|
||||
self._sorted_tokens = sorted(self.word_index.keys())
|
||||
self._sorted_tokens = SortedList(self.word_index.keys())
|
||||
self._ready = True
|
||||
logger.info(
|
||||
"Inverted index built: %d documents, %d unique tokens, %d tags",
|
||||
@ -448,7 +448,7 @@ class InvertedIndex:
|
||||
tf[token] += 1
|
||||
for token, freq in tf.items():
|
||||
if not self.word_index.get(token):
|
||||
bisect.insort(self._sorted_tokens, token)
|
||||
self._sorted_tokens.add(token)
|
||||
self.word_index[token][doc_key] = freq
|
||||
|
||||
def remove_document(self, vault_name: str, path: str):
|
||||
@ -510,9 +510,7 @@ class InvertedIndex:
|
||||
if not wi:
|
||||
del self.word_index[token]
|
||||
if not skip_sorted_cleanup:
|
||||
idx = bisect.bisect_left(self._sorted_tokens, token)
|
||||
if idx < len(self._sorted_tokens) and self._sorted_tokens[idx] == token:
|
||||
self._sorted_tokens.pop(idx)
|
||||
self._sorted_tokens.discard(token)
|
||||
|
||||
def idf(self, term: str) -> float:
|
||||
"""Inverse Document Frequency for a term.
|
||||
@ -563,7 +561,7 @@ class InvertedIndex:
|
||||
"""
|
||||
if not prefix or not self._sorted_tokens:
|
||||
return []
|
||||
lo = bisect.bisect_left(self._sorted_tokens, prefix)
|
||||
lo = self._sorted_tokens.bisect_left(prefix)
|
||||
results: List[str] = []
|
||||
for i in range(lo, len(self._sorted_tokens)):
|
||||
if self._sorted_tokens[i].startswith(prefix):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user