"""
Secret Redactor - Redact sensitive information from logs and output.

This module provides pattern-based redaction to prevent secret leakage
in logs, transcripts, and agent outputs. It's a critical security component.

Usage:
    from agent_orchestrator.secrets import SecretRedactor

    # Redact a string
    safe_text = SecretRedactor.redact(potentially_sensitive_text)

    # Wrap a logger
    safe_logger = SecretRedactor.wrap_logger(original_logger)
"""

import re
from typing import Callable


class SecretRedactor:
    """
    Redact secrets from text using pattern matching.

    This class provides static methods for redacting sensitive information
    like API keys, tokens, passwords, and credentials from text.

    Patterns are designed to catch:
    - API keys (Anthropic, OpenAI, Google, AWS)
    - OAuth tokens (GitHub, various providers)
    - Bearer tokens
    - Passwords in URLs and config
    - Generic secret patterns
    """

    # Patterns to redact: (regex_pattern, replacement)
    REDACTION_PATTERNS: list[tuple[str, str]] = [
        # Anthropic API keys (sk-ant-api..., sk-ant-xxx, etc.)
        (r"sk-ant-[a-zA-Z0-9\-_]{3,}", "[REDACTED_ANTHROPIC_KEY]"),

        # OpenAI API keys (sk-proj-xxx, sk-xxx)
        (r"sk-(?!ant-)[a-zA-Z0-9\-_]{3,}", "[REDACTED_OPENAI_KEY]"),

        # Google API keys (AIzaSy...)
        (r"AIza[a-zA-Z0-9_\-]{10,}", "[REDACTED_GOOGLE_KEY]"),

        # GitHub tokens (various lengths for testing)
        (r"ghp_[a-zA-Z0-9]{6,}", "[REDACTED_GITHUB_PAT]"),
        (r"gho_[a-zA-Z0-9]{6,}", "[REDACTED_GITHUB_OAUTH]"),
        (r"ghs_[a-zA-Z0-9]{6,}", "[REDACTED_GITHUB_APP]"),
        (r"ghr_[a-zA-Z0-9]{6,}", "[REDACTED_GITHUB_REFRESH]"),
        (r"github_pat_[a-zA-Z0-9_]{10,}", "[REDACTED_GITHUB_PAT]"),

        # AWS credentials - AKIA pattern first to catch specific format
        (r"AKIA[A-Z0-9]{12,}", "[REDACTED_AWS_ACCESS_KEY]"),
        (r"aws_secret_access_key\s*[=:]\s*['\"]?[^\s'\"]+['\"]?", "aws_secret_access_key=[REDACTED]"),
        # aws_access_key_id only if value doesn't start with AKIA or [REDACTED
        (r"aws_access_key_id\s*[=:]\s*['\"]?(?!AKIA)(?!\[REDACTED)[^\s'\"]+['\"]?", "aws_access_key_id=[REDACTED]"),

        # Azure
        (r"[a-zA-Z0-9_\-]{44}==", "[REDACTED_AZURE_KEY]"),

        # Bearer tokens
        (r"Bearer\s+[a-zA-Z0-9._\-]+", "Bearer [REDACTED]"),
        (r"Authorization:\s*Bearer\s+[a-zA-Z0-9._\-]+", "Authorization: Bearer [REDACTED]"),

        # Basic auth in URLs
        (r"://[^:]+:([^@]+)@", "://[user]:[REDACTED]@"),

        # Generic API key patterns
        (r"api[_-]?key\s*[=:]\s*['\"]?[a-zA-Z0-9_\-]{16,}['\"]?", "api_key=[REDACTED]"),
        (r"apikey\s*[=:]\s*['\"]?[a-zA-Z0-9_\-]{16,}['\"]?", "apikey=[REDACTED]"),

        # Password patterns
        (r"password\s*[=:]\s*['\"]?[^\s'\",]+['\"]?", "password=[REDACTED]"),
        (r"passwd\s*[=:]\s*['\"]?[^\s'\",]+['\"]?", "passwd=[REDACTED]"),
        (r"pwd\s*[=:]\s*['\"]?[^\s'\",]+['\"]?", "pwd=[REDACTED]"),

        # Secret patterns
        (r"secret\s*[=:]\s*['\"]?[^\s'\",]+['\"]?", "secret=[REDACTED]"),
        (r"secret_key\s*[=:]\s*['\"]?[^\s'\",]+['\"]?", "secret_key=[REDACTED]"),

        # Token patterns
        (r"token\s*[=:]\s*['\"]?[a-zA-Z0-9_\-]{16,}['\"]?", "token=[REDACTED]"),
        (r"access_token\s*[=:]\s*['\"]?[a-zA-Z0-9_\-]{16,}['\"]?", "access_token=[REDACTED]"),
        (r"refresh_token\s*[=:]\s*['\"]?[a-zA-Z0-9_\-]{16,}['\"]?", "refresh_token=[REDACTED]"),

        # Private keys (PEM format markers)
        (r"-----BEGIN\s+(?:RSA\s+)?PRIVATE\s+KEY-----[\s\S]*?-----END\s+(?:RSA\s+)?PRIVATE\s+KEY-----",
         "[REDACTED_PRIVATE_KEY]"),
        (r"-----BEGIN\s+ENCRYPTED\s+PRIVATE\s+KEY-----[\s\S]*?-----END\s+ENCRYPTED\s+PRIVATE\s+KEY-----",
         "[REDACTED_ENCRYPTED_KEY]"),

        # SSH keys
        (r"ssh-rsa\s+[A-Za-z0-9+/=]+", "[REDACTED_SSH_KEY]"),
        (r"ssh-ed25519\s+[A-Za-z0-9+/=]+", "[REDACTED_SSH_KEY]"),

        # Database connection strings
        (r"mongodb(\+srv)?://[^:]+:[^@]+@", "mongodb://[user]:[REDACTED]@"),
        (r"postgres://[^:]+:[^@]+@", "postgres://[user]:[REDACTED]@"),
        (r"mysql://[^:]+:[^@]+@", "mysql://[user]:[REDACTED]@"),
        (r"redis://:[^@]+@", "redis://:[REDACTED]@"),

        # Environment variable patterns with secrets
        (r"export\s+\w*(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL)\w*\s*=\s*['\"]?[^\s'\";]+['\"]?",
         "export [VAR]=[REDACTED]"),
    ]

    @classmethod
    def redact(cls, text: str) -> str:
        """
        Redact all sensitive patterns from text.

        Args:
            text: The text to redact

        Returns:
            Text with all sensitive patterns replaced
        """
        if not text:
            return text

        result = text
        for pattern, replacement in cls.REDACTION_PATTERNS:
            result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)

        return result

    @classmethod
    def wrap_logger(cls, logger: Callable[[str], None]) -> Callable[[str], None]:
        """
        Wrap a logger function to automatically redact secrets.

        Args:
            logger: A logging function that takes a string

        Returns:
            A wrapped function that redacts before logging

        Usage:
            import logging
            safe_info = SecretRedactor.wrap_logger(logging.info)
            safe_info("Key is sk-ant-abc123...")  # Redacted automatically
        """

        def wrapped(msg: str, *args, **kwargs) -> None:
            return logger(cls.redact(msg), *args, **kwargs)

        return wrapped

    @classmethod
    def add_pattern(cls, pattern: str, replacement: str) -> None:
        """
        Add a custom redaction pattern.

        Args:
            pattern: Regex pattern to match
            replacement: Replacement string
        """
        cls.REDACTION_PATTERNS.append((pattern, replacement))

    @classmethod
    def is_sensitive(cls, text: str) -> bool:
        """
        Check if text contains any sensitive patterns.

        Args:
            text: Text to check

        Returns:
            True if any sensitive patterns are found
        """
        if not text:
            return False

        for pattern, _ in cls.REDACTION_PATTERNS:
            if re.search(pattern, text, flags=re.IGNORECASE):
                return True

        return False

    @classmethod
    def find_sensitive_patterns(cls, text: str) -> list[str]:
        """
        Find which sensitive patterns are present in text.

        Args:
            text: Text to check

        Returns:
            List of pattern descriptions that matched
        """
        if not text:
            return []

        found = []
        pattern_descriptions = {
            "sk-ant-": "Anthropic API key",
            "sk-": "OpenAI API key",
            "AIza": "Google API key",
            "ghp_": "GitHub PAT",
            "AKIA": "AWS access key",
            "Bearer": "Bearer token",
            "password": "Password",
            "secret": "Secret",
            "token": "Token",
            "PRIVATE KEY": "Private key",
        }

        for pattern, _ in cls.REDACTION_PATTERNS:
            if re.search(pattern, text, flags=re.IGNORECASE):
                # Find a human-readable description
                for key, desc in pattern_descriptions.items():
                    if key.lower() in pattern.lower():
                        if desc not in found:
                            found.append(desc)
                        break
                else:
                    found.append(f"Pattern: {pattern[:30]}...")

        return found


def redact_dict(data: dict, keys_to_redact: list[str] = None) -> dict:
    """
    Redact sensitive values from a dictionary.

    Args:
        data: Dictionary to redact
        keys_to_redact: List of keys whose values should be fully redacted
                       (in addition to pattern-based redaction)

    Returns:
        New dictionary with redacted values
    """
    keys_to_redact = keys_to_redact or [
        "password", "secret", "api_key", "apikey",
        "access_token", "refresh_token", "private_key",
        "aws_secret_access_key", "credentials",
    ]

    result = {}
    for key, value in data.items():
        key_lower = key.lower()

        # Check if this key should be fully redacted (exact match or underscore-separated)
        should_redact = False
        for k in keys_to_redact:
            # Match exact key or key with underscores (e.g., "api_key" matches "api_key" but not "tokens")
            if key_lower == k or key_lower.endswith(f"_{k}") or key_lower.startswith(f"{k}_"):
                should_redact = True
                break

        if should_redact:
            result[key] = "[REDACTED]"
        elif isinstance(value, str):
            result[key] = SecretRedactor.redact(value)
        elif isinstance(value, dict):
            result[key] = redact_dict(value, keys_to_redact)
        elif isinstance(value, list):
            result[key] = [
                SecretRedactor.redact(item) if isinstance(item, str)
                else redact_dict(item, keys_to_redact) if isinstance(item, dict)
                else item
                for item in value
            ]
        else:
            result[key] = value

    return result
