"""
Task Domain Analyzer

Provides semantic understanding of task domains to replace keyword-only matching.
Analyzes user requests to determine the appropriate task domain(s) with confidence scores.
"""

from enum import Enum
from dataclasses import dataclass, field
from typing import List, Dict, Tuple, Optional
import re


class TaskDomain(Enum):
    """Primary task domains for intelligent agent routing."""

    CREATIVE_WRITING = "creative_writing"      # Stories, poetry, scripts, narratives
    TECHNICAL_CODING = "technical_coding"      # Software development, debugging, coding
    DATA_ANALYSIS = "data_analysis"            # Research, analytics, data processing
    DESIGN_VISUAL = "design_visual"            # UI/UX, graphics, layouts, styling
    DOCUMENTATION = "documentation"            # Technical writing, manuals, docs
    RESEARCH = "research"                      # Investigation, fact-finding, learning
    PLANNING = "planning"                      # Strategy, architecture, roadmaps
    REVIEW_CRITIQUE = "review_critique"        # Code review, editing, feedback
    AUTOMATION = "automation"                  # Scripts, workflows, DevOps, CI/CD
    COMMUNICATION = "communication"            # Emails, presentations, messaging
    GENERAL = "general"                        # Catch-all for unclassified tasks


@dataclass
class DomainMatch:
    """A matched domain with confidence score."""
    domain: TaskDomain
    confidence: float  # 0.0 to 1.0
    matched_patterns: List[str] = field(default_factory=list)


@dataclass
class DomainAnalysis:
    """Complete analysis result for a task."""
    primary_domain: TaskDomain
    primary_confidence: float
    all_domains: List[DomainMatch]
    task_text: str
    complexity: str  # "simple", "medium", "complex"
    suggested_team_size: int

    @property
    def is_creative(self) -> bool:
        """Check if task is primarily creative."""
        return self.primary_domain == TaskDomain.CREATIVE_WRITING

    @property
    def is_technical(self) -> bool:
        """Check if task is primarily technical/coding."""
        return self.primary_domain == TaskDomain.TECHNICAL_CODING

    @property
    def requires_multiple_domains(self) -> bool:
        """Check if task spans multiple domains."""
        high_confidence = [d for d in self.all_domains if d.confidence >= 0.5]
        return len(high_confidence) > 1


# Domain detection patterns - intent phrases, not just keywords
# Each pattern has (regex, weight) - weight determines confidence contribution
DOMAIN_PATTERNS: Dict[TaskDomain, List[Tuple[str, float]]] = {
    TaskDomain.CREATIVE_WRITING: [
        # Strong indicators
        (r'\b(write|create|compose)\s+(a\s+)?(short\s+)?stor(y|ies)\b', 1.0),
        (r'\b(write|create|compose)\s+(a\s+)?(poem|poetry|verse|sonnet)\b', 1.0),
        (r'\b(write|create|compose)\s+(a\s+)?(novel|novella|fiction)\b', 1.0),
        (r'\b(write|create)\s+(a\s+)?(script|screenplay|dialogue)\b', 1.0),
        (r'\b(creative\s+writing|fiction\s+writing)\b', 1.0),
        (r'\b(creative\s+stor(y|ies))\b', 1.0),
        (r'\bwrite\s+(me\s+)?(a\s+)?creative\b', 0.9),
        (r'\b(narrative|storytelling|plot)\b', 0.8),
        # Fiction indicators
        (r'\bcompose\s+.*(fiction|story|narrative)\b', 0.9),
        (r'\b(short\s+)?fiction\s+(piece|story)\b', 0.9),
        # Story by itself when in creative context
        (r'\bstory\s+about\b', 0.8),
        (r'\b(comprehensive|complete|full)\s+(short\s+)?story\b', 0.9),
        (r'\bshort\s+story\s+with\b', 0.9),
        # Medium indicators
        (r'\b(write|draft)\s+(a\s+)?(blog\s+post|article|essay)\b', 0.7),
        (r'\b(character|protagonist|antagonist)\s+(development|arc)\b', 0.9),
        (r'\b(write|create)\s+(content|copy)\b', 0.6),
        (r'\bflash\s+fiction\b', 1.0),
        (r'\b(memoir|autobiography|biography)\b', 0.8),
        # Weak but relevant
        (r'\b(imagine|envision|dream\s+up)\b', 0.4),
        (r'\b(once\s+upon\s+a\s+time|happily\s+ever\s+after)\b', 0.9),
        # Help me write patterns
        (r'\bhelp\s+(me\s+)?write\s+(a\s+)?(story|creative|fiction)\b', 0.9),
    ],

    TaskDomain.TECHNICAL_CODING: [
        # Strong indicators
        (r'\b(implement|code|program|develop)\s+(a\s+)?(function|class|module|api)\b', 1.0),
        (r'\b(fix|debug|resolve)\s+(the\s+)?(bug|error|issue|crash)\b', 1.0),
        (r'\b(refactor|optimize|improve)\s+(the\s+)?(code|function|class)\b', 0.9),
        (r'\b(add|create|implement)\s+(a\s+)?(feature|functionality|endpoint)\b', 0.9),
        (r'\b(build|develop|create)\s+(a\s+)?(app|application|service|backend|frontend)\b', 0.9),
        (r'\b(write|add)\s+(unit\s+)?tests?\b', 0.8),
        (r'\bpull\s+request\b', 0.8),
        # API/Backend specific - higher weight to distinguish from frontend
        (r'\b(rest|restful)\s+api\b', 0.9),
        (r'\bapi\s+endpoint\b', 0.9),
        (r'\bimplement\s+.*\s*api\b', 0.9),
        # Language/framework specific
        (r'\b(python|javascript|typescript|java|rust|go|ruby|php|c\+\+|swift)\b', 0.7),
        (r'\b(react|vue|angular|django|flask|fastapi|express|spring)\b', 0.8),
        (r'\b(npm|pip|cargo|maven|gradle)\s+(install|update|build)\b', 0.7),
        # Code patterns
        (r'\b(function|method|class|interface|enum|struct)\b', 0.5),
        (r'\b(async|await|promise|callback)\b', 0.6),
        (r'\b(api|rest|graphql|grpc)\b', 0.6),
        (r'\b(database|sql|query|schema)\b', 0.5),
    ],

    TaskDomain.DATA_ANALYSIS: [
        (r'\b(analyze|analyse)\s+(the\s+)?(data|dataset|results|metrics)\b', 1.0),
        (r'\b(create|generate|build)\s+(a\s+)?(report|dashboard|visualization)\b', 0.8),
        (r'\b(statistical|statistics|correlation|regression)\b', 0.9),
        (r'\b(machine\s+learning|ml|deep\s+learning|ai\s+model)\b', 0.8),
        (r'\b(pandas|numpy|scipy|matplotlib|seaborn|jupyter)\b', 0.8),
        (r'\b(csv|excel|json|parquet)\s+(file|data)\b', 0.6),
        (r'\b(trend|pattern|insight|anomaly)\s+(detection|analysis)\b', 0.8),
        (r'\b(etl|data\s+pipeline|data\s+processing)\b', 0.8),
    ],

    TaskDomain.DESIGN_VISUAL: [
        (r'\b(design|create)\s+(a\s+)?(ui|ux|interface|layout)\b', 1.0),
        (r'\b(improve|enhance)\s+(the\s+)?(user\s+experience|usability)\b', 0.9),
        (r'\b(wireframe|mockup|prototype)\b', 0.9),
        (r'\b(css|styling|responsive\s+design|flexbox|grid)\b', 0.7),
        (r'\b(color\s+scheme|typography|font|visual\s+design)\b', 0.8),
        (r'\b(figma|sketch|adobe\s+xd|invision)\b', 0.9),
        (r'\b(component\s+library|design\s+system)\b', 0.8),
        (r'\b(accessibility|a11y|wcag)\b', 0.7),
    ],

    TaskDomain.DOCUMENTATION: [
        (r'\b(write|create|update)\s+(the\s+)?(documentation|docs|readme)\b', 1.0),
        (r'\b(document|explain)\s+(the\s+)?(api|code|function|process)\b', 0.8),
        (r'\b(technical\s+writing|user\s+guide|manual)\b', 0.9),
        (r'\b(api\s+documentation|swagger|openapi)\b', 0.8),
        (r'\b(jsdoc|docstring|pydoc|rustdoc)\b', 0.8),
        (r'\b(changelog|release\s+notes)\b', 0.7),
        (r'\b(tutorial|how-to|getting\s+started)\b', 0.7),
    ],

    TaskDomain.RESEARCH: [
        (r'\b(research|investigate|explore)\s+(how|what|why|the)\b', 0.9),
        (r'\bresearch\s+\w+\s+(trends?|patterns?|data)\b', 0.9),
        (r'\b(find\s+out|look\s+into|learn\s+about)\b', 0.7),
        (r'\b(compare|evaluate|assess)\s+(different|various|multiple)\b', 0.7),
        (r'\b(best\s+practices|industry\s+standard)\b', 0.6),
        (r'\b(literature\s+review|state\s+of\s+the\s+art)\b', 0.9),
        (r'\b(competitor\s+analysis|market\s+research)\b', 0.8),
        (r'\b(feasibility\s+study|proof\s+of\s+concept)\b', 0.8),
        (r'\bresearch\b', 0.6),  # "Research" as a verb by itself
        (r'\b(market|industry|trend)\s+research\b', 0.9),
    ],

    TaskDomain.PLANNING: [
        (r'\b(plan|design|architect)\s+(the\s+)?(system|architecture|structure)\b', 1.0),
        (r'\b(create|develop)\s+(a\s+)?(roadmap|strategy|plan)\b', 0.9),
        (r'\b(project\s+plan|sprint\s+planning|backlog)\b', 0.8),
        (r'\b(requirements|specification|spec)\b', 0.7),
        (r'\b(estimate|timeline|milestone)\b', 0.6),
        (r'\b(break\s+down|decompose)\s+(the\s+)?(task|project|work)\b', 0.8),
        (r'\b(high-level|overview|architecture\s+diagram)\b', 0.7),
    ],

    TaskDomain.REVIEW_CRITIQUE: [
        (r'\b(review|critique|evaluate)\s+(the\s+)?(code|pr|pull\s+request)\b', 1.0),
        (r'\b(code\s+review|peer\s+review)\b', 1.0),
        (r'\b(proofread|edit|revise)\s+(the\s+)?(text|document|content)\b', 0.9),
        (r'\b(feedback|suggestions|improvements)\s+(on|for)\b', 0.7),
        (r'\b(check|verify|validate)\s+(the\s+)?(implementation|solution)\b', 0.7),
        (r'\b(quality\s+assurance|qa)\b', 0.8),
        (r'\b(lint|format|style\s+check)\b', 0.6),
    ],

    TaskDomain.AUTOMATION: [
        (r'\b(automate|script)\s+(the\s+)?(process|workflow|task)\b', 1.0),
        (r'\b(ci/cd|continuous\s+integration|continuous\s+deployment)\b', 1.0),
        (r'\b(github\s+actions|gitlab\s+ci|jenkins|circleci)\b', 0.9),
        (r'\b(docker|kubernetes|k8s|container)\b', 0.8),
        (r'\b(terraform|ansible|puppet|chef)\b', 0.9),
        (r'\b(deploy|deployment|release\s+pipeline)\b', 0.7),
        (r'\b(cron|scheduled\s+job|batch\s+process)\b', 0.8),
        (r'\b(devops|infrastructure\s+as\s+code)\b', 0.9),
    ],

    TaskDomain.COMMUNICATION: [
        (r'\b(write|draft|compose)\s+(an?\s+)?(email|message|letter)\b', 1.0),
        (r'\b(create|prepare)\s+(a\s+)?(presentation|slides|deck)\b', 0.9),
        (r'\b(announcement|notification|update)\s+(to|for)\s+(team|users)\b', 0.8),
        (r'\b(stakeholder\s+communication|status\s+update)\b', 0.8),
        (r'\b(meeting\s+notes|minutes|summary)\b', 0.7),
        (r'\b(proposal|pitch|business\s+case)\b', 0.7),
    ],
}

# Negative patterns - reduce confidence when these appear
NEGATIVE_PATTERNS: Dict[TaskDomain, List[Tuple[str, float]]] = {
    TaskDomain.CREATIVE_WRITING: [
        (r'\b(test|testing|unit\s+test|integration\s+test)\b', -0.5),
        (r'\b(bug|error|fix|debug)\b', -0.4),
        (r'\b(api|endpoint|database)\b', -0.3),
    ],
    TaskDomain.TECHNICAL_CODING: [
        (r'\b(story|poem|novel|fiction)\b', -0.5),
        (r'\b(creative|artistic|narrative)\b', -0.4),
    ],
}


class DomainAnalyzer:
    """
    Analyzes task descriptions to determine appropriate domain(s).

    Uses pattern matching with weighted confidence scores to provide
    semantic understanding beyond simple keyword matching.
    """

    def __init__(self):
        self.patterns = DOMAIN_PATTERNS
        self.negative_patterns = NEGATIVE_PATTERNS

    def analyze(self, task_text: str) -> DomainAnalysis:
        """
        Analyze a task description and return domain classification.

        Args:
            task_text: The user's task description

        Returns:
            DomainAnalysis with primary domain, confidence, and metadata
        """
        task_lower = task_text.lower()
        domain_scores: Dict[TaskDomain, DomainMatch] = {}

        # Calculate positive matches for each domain
        for domain, patterns in self.patterns.items():
            matches = []
            total_score = 0.0
            match_count = 0

            for pattern, weight in patterns:
                if re.search(pattern, task_lower, re.IGNORECASE):
                    matches.append(pattern)
                    total_score += weight
                    match_count += 1

            if match_count > 0:
                # Normalize score based on number of matches
                # More matches = higher confidence, but with diminishing returns
                confidence = min(1.0, total_score / max(1, match_count * 0.8))
                domain_scores[domain] = DomainMatch(
                    domain=domain,
                    confidence=confidence,
                    matched_patterns=matches
                )

        # Apply negative patterns to reduce false positives
        for domain, neg_patterns in self.negative_patterns.items():
            if domain in domain_scores:
                for pattern, penalty in neg_patterns:
                    if re.search(pattern, task_lower, re.IGNORECASE):
                        domain_scores[domain].confidence = max(
                            0.0,
                            domain_scores[domain].confidence + penalty
                        )

        # Sort domains by confidence
        sorted_domains = sorted(
            domain_scores.values(),
            key=lambda x: x.confidence,
            reverse=True
        )

        # Determine primary domain
        if sorted_domains and sorted_domains[0].confidence >= 0.3:
            primary = sorted_domains[0]
        else:
            # Fall back to GENERAL if no strong match
            primary = DomainMatch(
                domain=TaskDomain.GENERAL,
                confidence=0.5,
                matched_patterns=[]
            )
            sorted_domains = [primary] + sorted_domains

        # Assess complexity
        complexity = self._assess_complexity(task_text, sorted_domains)
        team_size = self._suggest_team_size(complexity, sorted_domains)

        return DomainAnalysis(
            primary_domain=primary.domain,
            primary_confidence=primary.confidence,
            all_domains=sorted_domains,
            task_text=task_text,
            complexity=complexity,
            suggested_team_size=team_size
        )

    def _assess_complexity(
        self,
        task_text: str,
        domains: List[DomainMatch]
    ) -> str:
        """Assess task complexity based on text and domain analysis."""
        # Simple heuristics for complexity
        word_count = len(task_text.split())
        multi_domain = len([d for d in domains if d.confidence >= 0.5]) > 1

        # Complexity indicators
        complex_words = [
            'comprehensive', 'complete', 'full', 'entire', 'all',
            'integrate', 'migration', 'refactor', 'architecture',
            'system', 'platform', 'framework'
        ]
        has_complex_words = any(w in task_text.lower() for w in complex_words)

        if multi_domain or word_count > 50 or has_complex_words:
            return "complex"
        elif word_count > 20:
            return "medium"
        else:
            return "simple"

    def _suggest_team_size(
        self,
        complexity: str,
        domains: List[DomainMatch]
    ) -> int:
        """Suggest number of agents based on complexity."""
        base_size = {"simple": 1, "medium": 2, "complex": 3}[complexity]

        # Add agents for additional high-confidence domains
        extra_domains = len([d for d in domains if d.confidence >= 0.6]) - 1

        return min(5, base_size + max(0, extra_domains))


# Module-level convenience functions
_analyzer: Optional[DomainAnalyzer] = None


def get_analyzer() -> DomainAnalyzer:
    """Get or create the global domain analyzer instance."""
    global _analyzer
    if _analyzer is None:
        _analyzer = DomainAnalyzer()
    return _analyzer


def analyze_task(task_text: str) -> DomainAnalysis:
    """
    Analyze a task description and return domain classification.

    This is the main entry point for task analysis.

    Args:
        task_text: The user's task description

    Returns:
        DomainAnalysis with domain classification and metadata

    Example:
        >>> analysis = analyze_task("Write a short story about a robot")
        >>> analysis.primary_domain
        TaskDomain.CREATIVE_WRITING
        >>> analysis.primary_confidence
        0.95
    """
    return get_analyzer().analyze(task_text)


def is_creative_task(task_text: str) -> bool:
    """Quick check if a task is primarily creative writing."""
    return analyze_task(task_text).is_creative


def is_technical_task(task_text: str) -> bool:
    """Quick check if a task is primarily technical/coding."""
    return analyze_task(task_text).is_technical
