ObsiGate/backend/secret_redactor.py
Bruno Charest 1a14927f36
All checks were successful
CI / lint (push) Successful in 11s
CI / security (push) Successful in 7s
CI / test (push) Successful in 13s
CI / build (push) Successful in 1s
fix: resolve all 28 mypy type errors + re-enable coverage in CI
2026-05-28 12:57:30 -04:00

88 lines
3.1 KiB
Python

"""
Secret redactor: masks sensitive patterns in rendered text.
Scans for common secret patterns and replaces them with [MASQUÉ]
before content is served to the frontend. Prevents accidental
exposure of API keys, tokens, and passwords in previews.
Patterns detected:
- Generic API keys (long alphanumeric strings with key/secret/token prefix)
- JWT tokens (eyJ... base64url)
- AWS-style keys (AKIA..., sk-..., etc.)
- Private key blocks (-----BEGIN ... PRIVATE KEY-----)
- Connection strings with passwords
"""
import re
import logging
logger = logging.getLogger("obsigate.redactor")
# --- Patterns ---
# Order matters: more specific patterns first
_PATTERNS = [
# Private key blocks
(re.compile(r'-----BEGIN (?:RSA |EC |DSA |OPENSSH |ENCRYPTED )?PRIVATE KEY-----.*?-----END (?:RSA |EC |DSA |OPENSSH |ENCRYPTED )?PRIVATE KEY-----', re.DOTALL), '[CLÉ PRIVÉE MASQUÉE]'),
# JWT tokens (base64url encoded, starts with eyJ)
(re.compile(r'eyJ[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,}'), '[JWT MASQUÉ]'),
# Connection strings with passwords
(re.compile(r'(?:mongodb|mysql|postgres(?:ql)?|redis|sqlite)://[^:]+:[^@\s]+@'), '[CONNECTION_STRING MASQUÉE]'),
# Generic API key patterns: key=... or token=... or secret=...
(re.compile(r'(?:api[_-]?key|apikey|secret|token|password|passwd|auth[_-]?token)\s*[:=]\s*[\'"]?([^\s\'"]{20,})[\'"]?', re.IGNORECASE),
lambda m: f'{m.group(0).split("=")[0].split(":")[0]}=[MASQUÉ]' if "=" in m.group(0) or ":" in m.group(0) else '[MASQUÉ]'),
# Generic long hex/base64 strings that look like secrets (40+ chars)
(re.compile(r'(?:sk|pk|rk)-[a-zA-Z0-9]{20,}'), '[CLÉ API MASQUÉE]'),
# AWS access keys
(re.compile(r'AKIA[0-9A-Z]{16}'), '[AWS_KEY MASQUÉ]'),
# GitHub tokens (ghp_, gho_, ghu_, ghs_, ghr_)
(re.compile(r'gh[pousr]_[a-zA-Z0-9]{36,}'), '[GITHUB_TOKEN MASQUÉ]'),
# Generic long random-looking strings (40+ hex chars)
(re.compile(r'\b[a-fA-F0-9]{40,64}\b'), '[HEX_KEY MASQUÉ]'),
]
def redact(text: str) -> tuple:
"""Redact sensitive patterns from text.
Args:
text: The raw text content to scan.
Returns:
(redacted_text, redaction_count) tuple.
"""
count = 0
result = text
for pattern, replacement in _PATTERNS:
if callable(replacement):
new_result, n = pattern.subn(replacement, result)
else:
new_result, n = pattern.subn(str(replacement), result)
count += n
result = new_result
if count > 0:
logger.info(f"Redacted {count} secret(s) from content")
return result, count
def redact_file_content(content: str, file_path: str = "") -> str:
"""Redact a file's content for preview rendering.
Args:
content: Raw file content.
file_path: Optional file path for logging context.
Returns:
Redacted content string.
"""
redacted, count = redact(content)
if count > 0:
logger.warning(f"Redacted {count} potential secret(s) from {file_path or '<unknown>'}")
return redacted