#!/usr/bin/env python3
"""
Book Workflow Input Parsers

Parse various input formats for book research workflow:
- YAML book outline with chapters and subjects
- JSON book outline
- Simple text file with subjects (one per line)
- Command-line subject list

Each parser creates a BookProject with appropriate ChapterProject entries.
"""

import re
import logging
from pathlib import Path
from typing import List, Dict, Any, Optional

from book_workflow_models import (
    BookProject, ChapterProject, SubjectResearch,
    generate_project_id
)

# Setup logging
logger = logging.getLogger(__name__)

# Try to import YAML
try:
    import yaml
    HAS_YAML = True
except ImportError:
    HAS_YAML = False
    logger.warning("PyYAML not installed. YAML parsing unavailable.")


# =============================================================================
# YAML PARSER
# =============================================================================

def parse_yaml_outline(path: Path) -> BookProject:
    """
    Parse a YAML book outline file.

    Expected format:
    ```yaml
    title: "Book Title"
    author: "Author Name"
    description: "Book description"

    chapters:
      - title: "Chapter 1 Title"
        subjects:
          - "Subject 1"
          - "Subject 2"

      - title: "Chapter 2 Title"
        subjects:
          - "Subject 3"
    ```

    Args:
        path: Path to YAML file

    Returns:
        BookProject with chapters populated

    Raises:
        ImportError: If PyYAML not installed
        FileNotFoundError: If file doesn't exist
        ValueError: If YAML structure is invalid
    """
    if not HAS_YAML:
        raise ImportError("PyYAML is required for YAML parsing. Install with: pip install pyyaml")

    if not path.exists():
        raise FileNotFoundError(f"YAML file not found: {path}")

    with open(path, 'r', encoding='utf-8') as f:
        data = yaml.safe_load(f)

    if not isinstance(data, dict):
        raise ValueError(f"Invalid YAML structure: expected dict, got {type(data)}")

    # Extract metadata
    title = data.get('title', path.stem)
    author = data.get('author', '')
    description = data.get('description', '')

    # Parse chapters
    chapters_data = data.get('chapters', [])
    if not chapters_data:
        raise ValueError("No chapters found in YAML file")

    # Create project
    project_id = generate_project_id('BOOK')
    project = BookProject(
        project_id=project_id,
        title=title,
        author=author,
        description=description,
    )

    # Create chapter projects
    for i, chapter_data in enumerate(chapters_data, 1):
        if isinstance(chapter_data, dict):
            chapter_title = chapter_data.get('title', f"Chapter {i}")
            subjects = chapter_data.get('subjects', [])
        elif isinstance(chapter_data, str):
            # Simple string format: chapter title is the subject
            chapter_title = chapter_data
            subjects = [chapter_data]
        else:
            logger.warning(f"Skipping invalid chapter entry: {chapter_data}")
            continue

        if not subjects:
            logger.warning(f"Chapter '{chapter_title}' has no subjects, skipping")
            continue

        chapter = ChapterProject(
            chapter_id=f"{project_id}_CH{i:02d}",
            chapter_number=i,
            title=chapter_title,
            subjects=subjects,
            book_project_id=project_id,
        )
        project.chapters.append(chapter)

    if not project.chapters:
        raise ValueError("No valid chapters found in YAML file")

    logger.info(f"Parsed YAML outline: {len(project.chapters)} chapters, {project.total_subjects} subjects")
    return project


# =============================================================================
# JSON PARSER
# =============================================================================

def parse_json_outline(path: Path) -> BookProject:
    """
    Parse a JSON book outline file.

    Expected format:
    ```json
    {
      "title": "Book Title",
      "author": "Author Name",
      "description": "Description",
      "chapters": [
        {
          "title": "Chapter 1",
          "subjects": ["Subject 1", "Subject 2"]
        }
      ]
    }
    ```

    Args:
        path: Path to JSON file

    Returns:
        BookProject with chapters populated

    Raises:
        FileNotFoundError: If file doesn't exist
        ValueError: If JSON structure is invalid
    """
    import json

    if not path.exists():
        raise FileNotFoundError(f"JSON file not found: {path}")

    with open(path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    if not isinstance(data, dict):
        raise ValueError(f"Invalid JSON structure: expected dict, got {type(data)}")

    # Extract metadata
    title = data.get('title', path.stem)
    author = data.get('author', '')
    description = data.get('description', '')

    # Parse chapters
    chapters_data = data.get('chapters', [])
    if not chapters_data:
        raise ValueError("No chapters found in JSON file")

    # Create project
    project_id = generate_project_id('BOOK')
    project = BookProject(
        project_id=project_id,
        title=title,
        author=author,
        description=description,
    )

    # Create chapter projects
    for i, chapter_data in enumerate(chapters_data, 1):
        if isinstance(chapter_data, dict):
            chapter_title = chapter_data.get('title', f"Chapter {i}")
            subjects = chapter_data.get('subjects', [])
        elif isinstance(chapter_data, str):
            chapter_title = chapter_data
            subjects = [chapter_data]
        else:
            continue

        if not subjects:
            continue

        chapter = ChapterProject(
            chapter_id=f"{project_id}_CH{i:02d}",
            chapter_number=i,
            title=chapter_title,
            subjects=subjects,
            book_project_id=project_id,
        )
        project.chapters.append(chapter)

    if not project.chapters:
        raise ValueError("No valid chapters found in JSON file")

    logger.info(f"Parsed JSON outline: {len(project.chapters)} chapters, {project.total_subjects} subjects")
    return project


# =============================================================================
# TEXT FILE PARSER
# =============================================================================

def parse_subjects_file(path: Path, title: str = "Research Project") -> BookProject:
    """
    Parse a simple text file with subjects (one per line).

    Format:
    ```
    # Comments start with #
    Subject 1
    Subject 2
    Subject 3

    ## Chapter: Custom Chapter Title
    Subject 4
    Subject 5
    ```

    Special syntax:
    - Lines starting with # are comments (ignored)
    - Lines starting with ## Chapter: create new chapters
    - Empty lines are ignored
    - All other lines are subjects

    Args:
        path: Path to text file
        title: Project title

    Returns:
        BookProject with subjects as chapters or grouped by ## Chapter markers
    """
    if not path.exists():
        raise FileNotFoundError(f"Subjects file not found: {path}")

    with open(path, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    # Parse lines
    current_chapter_title = "Research Topics"
    current_subjects = []
    chapters = []

    for line in lines:
        line = line.strip()

        # Skip empty lines and comments
        if not line or line.startswith('#') and not line.startswith('## Chapter:'):
            continue

        # Check for chapter marker
        if line.startswith('## Chapter:'):
            # Save previous chapter if has subjects
            if current_subjects:
                chapters.append({
                    'title': current_chapter_title,
                    'subjects': current_subjects.copy()
                })
                current_subjects = []

            # Extract new chapter title
            current_chapter_title = line.replace('## Chapter:', '').strip()
            if not current_chapter_title:
                current_chapter_title = f"Chapter {len(chapters) + 1}"
            continue

        # Regular subject line
        current_subjects.append(line)

    # Don't forget last chapter
    if current_subjects:
        chapters.append({
            'title': current_chapter_title,
            'subjects': current_subjects.copy()
        })

    if not chapters:
        raise ValueError("No subjects found in file")

    # Create project
    project_id = generate_project_id('BOOK')
    project = BookProject(
        project_id=project_id,
        title=title,
    )

    # Create chapter projects
    for i, chapter_data in enumerate(chapters, 1):
        chapter = ChapterProject(
            chapter_id=f"{project_id}_CH{i:02d}",
            chapter_number=i,
            title=chapter_data['title'],
            subjects=chapter_data['subjects'],
            book_project_id=project_id,
        )
        project.chapters.append(chapter)

    logger.info(f"Parsed subjects file: {len(project.chapters)} chapters, {project.total_subjects} subjects")
    return project


# =============================================================================
# COMMAND-LINE PARSER
# =============================================================================

def parse_subjects_list(
    subjects: List[str],
    title: str = "Research Project",
    chapter_title: str = "Research Topics"
) -> BookProject:
    """
    Create a BookProject from a list of subjects (command-line input).

    Args:
        subjects: List of subject strings
        title: Project title
        chapter_title: Title for the single chapter

    Returns:
        BookProject with one chapter containing all subjects
    """
    if not subjects:
        raise ValueError("No subjects provided")

    # Filter empty strings
    subjects = [s.strip() for s in subjects if s.strip()]
    if not subjects:
        raise ValueError("No valid subjects after filtering")

    # Create project
    project_id = generate_project_id('BOOK')
    project = BookProject(
        project_id=project_id,
        title=title,
    )

    # Create single chapter with all subjects
    chapter = ChapterProject(
        chapter_id=f"{project_id}_CH01",
        chapter_number=1,
        title=chapter_title,
        subjects=subjects,
        book_project_id=project_id,
    )
    project.chapters.append(chapter)

    logger.info(f"Created project from subject list: {len(subjects)} subjects")
    return project


# =============================================================================
# AUTO-DETECT PARSER
# =============================================================================

def parse_outline_file(path: Path, title: str = None) -> BookProject:
    """
    Auto-detect file format and parse appropriately.

    Args:
        path: Path to outline file
        title: Override title (optional)

    Returns:
        BookProject

    Raises:
        ValueError: If format cannot be determined
    """
    path = Path(path)
    suffix = path.suffix.lower()

    if suffix in ['.yaml', '.yml']:
        project = parse_yaml_outline(path)
    elif suffix == '.json':
        project = parse_json_outline(path)
    elif suffix in ['.txt', '.text', '']:
        project = parse_subjects_file(path, title or path.stem)
    else:
        # Try to detect from content
        with open(path, 'r', encoding='utf-8') as f:
            content = f.read(100)

        if content.strip().startswith('{'):
            project = parse_json_outline(path)
        elif ':' in content and ('title:' in content.lower() or 'chapters:' in content.lower()):
            project = parse_yaml_outline(path)
        else:
            project = parse_subjects_file(path, title or path.stem)

    # Override title if provided
    if title:
        project.title = title

    return project


# =============================================================================
# VALIDATION
# =============================================================================

def validate_project(project: BookProject) -> List[str]:
    """
    Validate a BookProject for common issues.

    Args:
        project: BookProject to validate

    Returns:
        List of warning messages (empty if valid)
    """
    warnings = []

    if not project.title:
        warnings.append("Project has no title")

    if not project.chapters:
        warnings.append("Project has no chapters")
        return warnings

    for chapter in project.chapters:
        if not chapter.title:
            warnings.append(f"Chapter {chapter.chapter_number} has no title")

        if not chapter.subjects:
            warnings.append(f"Chapter '{chapter.title}' has no subjects")

        # Check for duplicate subjects within chapter
        seen = set()
        for subject in chapter.subjects:
            if subject.lower() in seen:
                warnings.append(f"Duplicate subject in chapter '{chapter.title}': {subject}")
            seen.add(subject.lower())

    # Check for very short subjects (likely parsing errors)
    for chapter in project.chapters:
        for subject in chapter.subjects:
            if len(subject) < 3:
                warnings.append(f"Very short subject '{subject}' in chapter '{chapter.title}'")

    return warnings


# =============================================================================
# EXPORTS
# =============================================================================

__all__ = [
    'parse_yaml_outline',
    'parse_json_outline',
    'parse_subjects_file',
    'parse_subjects_list',
    'parse_outline_file',
    'validate_project',
]
