""" Secret redactor: masks sensitive patterns in rendered text. Scans for common secret patterns and replaces them with [MASQUÉ] before content is served to the frontend. Prevents accidental exposure of API keys, tokens, and passwords in previews. Patterns detected: - Generic API keys (long alphanumeric strings with key/secret/token prefix) - JWT tokens (eyJ... base64url) - AWS-style keys (AKIA..., sk-..., etc.) - Private key blocks (-----BEGIN ... PRIVATE KEY-----) - Connection strings with passwords """ import re import logging logger = logging.getLogger("obsigate.redactor") # --- Patterns --- # Order matters: more specific patterns first _PATTERNS = [ # Private key blocks (re.compile(r'-----BEGIN (?:RSA |EC |DSA |OPENSSH |ENCRYPTED )?PRIVATE KEY-----.*?-----END (?:RSA |EC |DSA |OPENSSH |ENCRYPTED )?PRIVATE KEY-----', re.DOTALL), '[CLÉ PRIVÉE MASQUÉE]'), # JWT tokens (base64url encoded, starts with eyJ) (re.compile(r'eyJ[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,}'), '[JWT MASQUÉ]'), # Connection strings with passwords (re.compile(r'(?:mongodb|mysql|postgres(?:ql)?|redis|sqlite)://[^:]+:[^@\s]+@'), '[CONNECTION_STRING MASQUÉE]'), # Generic API key patterns: key=... or token=... or secret=... (re.compile(r'(?:api[_-]?key|apikey|secret|token|password|passwd|auth[_-]?token)\s*[:=]\s*[\'"]?([^\s\'"]{20,})[\'"]?', re.IGNORECASE), lambda m: f'{m.group(0).split("=")[0].split(":")[0]}=[MASQUÉ]' if "=" in m.group(0) or ":" in m.group(0) else '[MASQUÉ]'), # Generic long hex/base64 strings that look like secrets (40+ chars) (re.compile(r'(?:sk|pk|rk)-[a-zA-Z0-9]{20,}'), '[CLÉ API MASQUÉE]'), # AWS access keys (re.compile(r'AKIA[0-9A-Z]{16}'), '[AWS_KEY MASQUÉ]'), # GitHub tokens (ghp_, gho_, ghu_, ghs_, ghr_) (re.compile(r'gh[pousr]_[a-zA-Z0-9]{36,}'), '[GITHUB_TOKEN MASQUÉ]'), # Generic long random-looking strings (40+ hex chars) (re.compile(r'\b[a-fA-F0-9]{40,64}\b'), '[HEX_KEY MASQUÉ]'), ] def redact(text: str) -> tuple: """Redact sensitive patterns from text. Args: text: The raw text content to scan. Returns: (redacted_text, redaction_count) tuple. """ count = 0 result = text for pattern, replacement in _PATTERNS: if callable(replacement): new_result, n = pattern.subn(replacement, result) else: new_result, n = pattern.subn(str(replacement), result) count += n result = new_result if count > 0: logger.info(f"Redacted {count} secret(s) from content") return result, count def redact_file_content(content: str, file_path: str = "") -> str: """Redact a file's content for preview rendering. Args: content: Raw file content. file_path: Optional file path for logging context. Returns: Redacted content string. """ redacted, count = redact(content) if count > 0: logger.warning(f"Redacted {count} potential secret(s) from {file_path or ''}") return redacted