#!/usr/bin/env python3
"""
Book Workflow Output Generators

Generate output files for the research workflow:
- gaps.md - User-editable gap review file
- research_summary.md - Compiled research by chapter
- sources.json - All sources with metadata
- Chapter drafts

All outputs are designed for both human readability and
programmatic parsing by Claude Code.
"""

import json
import logging
import re
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Any, Optional

from book_workflow_models import BookProject, ChapterProject

# Setup logging
logger = logging.getLogger(__name__)


# =============================================================================
# GAPS.MD GENERATOR
# =============================================================================

def generate_gaps_markdown(project: BookProject) -> str:
    """
    Generate gaps.md for user review.

    The file uses checkboxes that users can edit:
    - [ ] = pending review
    - [x] = approved for search
    - [-] = skip this gap

    Args:
        project: BookProject with collected gaps

    Returns:
        Markdown string for gaps.md
    """
    lines = [
        "# Research Gaps Analysis",
        "",
        f"**Project:** {project.title}",
        f"**Project ID:** {project.project_id}",
        f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
        f"**Total Gaps:** {len(project.all_gaps)}",
        "",
        "---",
        "",
        "## How to Use This File",
        "",
        "1. Review each gap below",
        "2. Mark gaps to research with Tavily: Change `[ ]` to `[x]`",
        "3. Mark gaps to skip: Change `[ ]` to `[-]`",
        "4. Re-run the workflow:",
        "",
        "```bash",
        f"python research_workflow.py --resume {project.project_id} --phase 3",
        "```",
        "",
        "Or use automatic filling (searches all unmarked gaps within budget):",
        "",
        "```bash",
        f"python research_workflow.py --resume {project.project_id} --phase 3 --auto-fill-gaps --tavily-budget 100",
        "```",
        "",
        "---",
        "",
    ]

    # Group gaps by chapter
    gaps_by_chapter: Dict[int, List[Dict]] = {}
    for gap in project.all_gaps:
        chapters = gap.get('chapters', [0])
        for ch in chapters:
            if ch not in gaps_by_chapter:
                gaps_by_chapter[ch] = []
            if gap not in gaps_by_chapter[ch]:
                gaps_by_chapter[ch].append(gap)

    # Sort chapters
    sorted_chapters = sorted(gaps_by_chapter.keys())

    gap_number = 1
    for chapter_num in sorted_chapters:
        # Get chapter title
        chapter = project.get_chapter(chapter_num)
        chapter_title = chapter.title if chapter else f"Chapter {chapter_num}"

        lines.extend([
            f"## Chapter {chapter_num}: {chapter_title}",
            "",
        ])

        for gap in gaps_by_chapter[chapter_num]:
            description = gap.get('description', 'No description')
            query = gap.get('suggested_query', '')
            priority = gap.get('priority', 'medium')
            occurrence = gap.get('occurrence_count', 1)
            searched = gap.get('searched', False)
            source_subject = gap.get('source_subject', '')

            # Checkbox state
            if searched:
                checkbox = "[x]"
                status_note = " *(already searched)*"
            else:
                checkbox = "[ ]"
                status_note = ""

            # Priority indicator
            priority_emoji = "🔴" if priority == 'high' else "🟡"

            lines.extend([
                f"### Gap {gap_number}: {description[:80]}{'...' if len(description) > 80 else ''}{status_note}",
                "",
                f"- {checkbox} **Research this gap** {priority_emoji} {priority.upper()}",
                f"- **Suggested query:** `{query}`",
                f"- **Source subject:** {source_subject}" if source_subject else "",
                f"- **Mentioned in:** {occurrence} subject(s)" if occurrence > 1 else "",
                "",
            ])

            gap_number += 1

        lines.append("---")
        lines.append("")

    # Summary stats
    high_priority = sum(1 for g in project.all_gaps if g.get('priority') == 'high')
    unfilled = sum(1 for g in project.all_gaps if not g.get('searched', False))

    lines.extend([
        "## Summary",
        "",
        f"- **Total gaps:** {len(project.all_gaps)}",
        f"- **High priority:** {high_priority}",
        f"- **Unfilled:** {unfilled}",
        f"- **Already searched:** {len(project.all_gaps) - unfilled}",
        "",
    ])

    # Credit estimate
    try:
        from web_search import estimate_gap_search_cost
        cost = estimate_gap_search_cost(unfilled)
        lines.extend([
            "### Tavily Credit Estimate",
            "",
            f"- **Estimated credits for all unfilled gaps:** {cost['estimated_credits']}",
            f"- **Current credits used:** {cost['current_used']}",
            f"- **Credit limit:** {cost['current_limit']}",
            "",
        ])
    except ImportError:
        pass

    return "\n".join(line for line in lines if line is not None)


def parse_gaps_markdown(content: str) -> List[str]:
    """
    Parse gaps.md to find approved gaps (marked with [x]).

    Args:
        content: Content of gaps.md file

    Returns:
        List of approved gap queries
    """
    approved_queries = []

    # Pattern to match approved gaps and their queries
    # Looking for [x] followed by query on next line
    lines = content.split('\n')

    for i, line in enumerate(lines):
        if '[x]' in line.lower() and 'research this gap' in line.lower():
            # Look for query in following lines
            for j in range(i + 1, min(i + 5, len(lines))):
                if 'suggested query:' in lines[j].lower():
                    # Extract query from backticks
                    match = re.search(r'`([^`]+)`', lines[j])
                    if match:
                        approved_queries.append(match.group(1))
                    break

    return approved_queries


# =============================================================================
# RESEARCH SUMMARY GENERATOR
# =============================================================================

def generate_research_summary(project: BookProject) -> str:
    """
    Generate research_summary.md with compiled research by chapter.

    Args:
        project: BookProject with research completed

    Returns:
        Markdown string for research_summary.md
    """
    lines = [
        "# Research Summary",
        "",
        f"**Project:** {project.title}",
        f"**Author:** {project.author}" if project.author else "",
        f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
        "",
        "---",
        "",
        "## Overview",
        "",
        f"- **Chapters:** {len(project.chapters)}",
        f"- **Total subjects researched:** {project.completed_subjects}",
        f"- **Library sources:** {len(project.all_library_sources)}",
        f"- **Web sources:** {len(project.all_web_sources)}",
        f"- **Research gaps filled:** {sum(1 for g in project.all_gaps if g.get('searched', False))}",
        "",
        "---",
        "",
    ]

    for chapter in project.chapters:
        lines.extend([
            f"## Chapter {chapter.chapter_number}: {chapter.title}",
            "",
        ])

        # Chapter summary if available
        if chapter.research_summary:
            lines.extend([
                "### Summary",
                "",
                chapter.research_summary,
                "",
            ])

        # Subject-by-subject research
        lines.append("### Research by Subject")
        lines.append("")

        for sr in chapter.subject_research:
            status_emoji = "✅" if sr.status == 'completed' else "⏳"
            lines.extend([
                f"#### {status_emoji} {sr.subject}",
                "",
            ])

            if sr.synthesis:
                # Truncate long syntheses
                synthesis = sr.synthesis
                if len(synthesis) > 2000:
                    synthesis = synthesis[:2000] + "\n\n*[Truncated for summary]*"
                lines.extend([
                    synthesis,
                    "",
                ])

            lines.extend([
                f"- **Chunks found:** {sr.chunks_found}",
                f"- **Documents:** {sr.documents_found}",
                f"- **Gaps identified:** {len(sr.gaps)}",
                "",
            ])

        # Web sources for this chapter
        if chapter.web_sources:
            lines.extend([
                "### Web Sources",
                "",
            ])
            for ws in chapter.web_sources[:10]:  # Limit display
                lines.extend([
                    f"- [{ws.get('title', 'Untitled')}]({ws.get('url', '#')})",
                    f"  - Query: {ws.get('query', 'N/A')}",
                    "",
                ])
            if len(chapter.web_sources) > 10:
                lines.append(f"*... and {len(chapter.web_sources) - 10} more*")
                lines.append("")

        lines.extend([
            "---",
            "",
        ])

    return "\n".join(line for line in lines if line is not None)


# =============================================================================
# SOURCES.JSON GENERATOR
# =============================================================================

def generate_sources_json(project: BookProject) -> str:
    """
    Generate sources.json with all sources and metadata.

    Args:
        project: BookProject with sources collected

    Returns:
        JSON string
    """
    # Collect all sources
    project.collect_all_sources()

    data = {
        'project_id': project.project_id,
        'title': project.title,
        'generated_at': datetime.now().isoformat(),
        'total_sources': len(project.all_library_sources) + len(project.all_web_sources),
        'library_sources': project.all_library_sources,
        'web_sources': project.all_web_sources,
        'by_chapter': {},
    }

    # Add per-chapter breakdown
    for chapter in project.chapters:
        data['by_chapter'][chapter.chapter_number] = {
            'title': chapter.title,
            'library_sources': len(chapter.library_sources),
            'web_sources': len(chapter.web_sources),
        }

    return json.dumps(data, indent=2, ensure_ascii=False)


# =============================================================================
# CHAPTER DRAFT OUTPUT
# =============================================================================

def generate_chapter_drafts(project: BookProject, output_dir: Path) -> List[Path]:
    """
    Write chapter drafts to files.

    Args:
        project: BookProject with drafts generated
        output_dir: Directory to write drafts

    Returns:
        List of paths to written draft files
    """
    drafts_dir = output_dir / 'drafts'
    drafts_dir.mkdir(parents=True, exist_ok=True)

    written_files = []

    for chapter in project.chapters:
        if not chapter.draft_content:
            continue

        # Safe filename
        safe_title = re.sub(r'[^\w\s-]', '', chapter.title)[:50]
        filename = f"{chapter.chapter_number:02d}_{safe_title.replace(' ', '_')}.md"
        filepath = drafts_dir / filename

        # Add front matter
        content = [
            "---",
            f"chapter: {chapter.chapter_number}",
            f"title: \"{chapter.title}\"",
            f"project: \"{project.title}\"",
            f"generated: \"{datetime.now().isoformat()}\"",
            f"status: draft",
            "---",
            "",
            chapter.draft_content,
        ]

        with open(filepath, 'w', encoding='utf-8') as f:
            f.write("\n".join(content))

        written_files.append(filepath)
        logger.info(f"Wrote draft: {filepath}")

    return written_files


# =============================================================================
# STATUS OUTPUT (JSON for Claude Code)
# =============================================================================

def generate_status_json(project: BookProject) -> str:
    """
    Generate machine-readable status JSON for Claude Code.

    Args:
        project: BookProject

    Returns:
        JSON string with status information
    """
    status = project.get_status_summary()

    # Add actionable next steps
    if project.current_phase == 1:
        if project.phase_status.get(1) == 'in_progress':
            status['next_action'] = "Continue Phase 1 (Initial Research)"
            status['command'] = f"python research_workflow.py --resume {project.project_id}"
        else:
            status['next_action'] = "Start Phase 1 (Initial Research)"
            status['command'] = f"python research_workflow.py --resume {project.project_id} --phase 1"

    elif project.current_phase == 2:
        status['next_action'] = "Review gaps.md and run Phase 3"
        status['command'] = f"python research_workflow.py --resume {project.project_id} --phase 3"

    elif project.current_phase == 3:
        unfilled = status['gaps']['unfilled']
        if unfilled > 0:
            status['next_action'] = f"Fill {unfilled} remaining gaps"
            status['command'] = f"python research_workflow.py --resume {project.project_id} --phase 3 --auto-fill-gaps"
        else:
            status['next_action'] = "Run Phase 4 (Synthesis)"
            status['command'] = f"python research_workflow.py --resume {project.project_id} --phase 4"

    elif project.current_phase == 4:
        status['next_action'] = "Run Phase 5 (Draft Generation) or export"
        status['command'] = f"python research_workflow.py --resume {project.project_id} --phase 5 --generate-drafts"

    elif project.current_phase == 5:
        if project.completed_at:
            status['next_action'] = "Project complete! Review drafts in drafts/ folder"
            status['command'] = None
        else:
            status['next_action'] = "Generate chapter drafts"
            status['command'] = f"python research_workflow.py --resume {project.project_id} --phase 5 --generate-drafts"

    return json.dumps(status, indent=2)


# =============================================================================
# WRITE ALL OUTPUTS
# =============================================================================

def write_all_outputs(project: BookProject, output_dir: Path = None) -> Dict[str, Path]:
    """
    Write all output files for a project.

    Args:
        project: BookProject
        output_dir: Output directory (defaults to project dir)

    Returns:
        Dict mapping output type to file path
    """
    if output_dir is None:
        output_dir = Path(project.project_dir)

    output_dir.mkdir(parents=True, exist_ok=True)

    outputs = {}

    # gaps.md
    gaps_path = output_dir / 'gaps.md'
    with open(gaps_path, 'w', encoding='utf-8') as f:
        f.write(generate_gaps_markdown(project))
    outputs['gaps'] = gaps_path
    logger.info(f"Wrote gaps.md")

    # research_summary.md
    summary_path = output_dir / 'research_summary.md'
    with open(summary_path, 'w', encoding='utf-8') as f:
        f.write(generate_research_summary(project))
    outputs['summary'] = summary_path
    logger.info(f"Wrote research_summary.md")

    # sources.json
    sources_path = output_dir / 'sources.json'
    with open(sources_path, 'w', encoding='utf-8') as f:
        f.write(generate_sources_json(project))
    outputs['sources'] = sources_path
    logger.info(f"Wrote sources.json")

    # Chapter outputs
    chapters_dir = output_dir / 'chapters'
    chapters_dir.mkdir(exist_ok=True)

    for chapter in project.chapters:
        chapter_dir = chapters_dir / f"{chapter.chapter_number:02d}_{chapter.title[:30].replace(' ', '_')}"
        chapter_dir.mkdir(exist_ok=True)

        # Chapter research.json
        research_path = chapter_dir / 'research.json'
        with open(research_path, 'w', encoding='utf-8') as f:
            json.dump(chapter.to_dict(), f, indent=2, ensure_ascii=False)

        # Chapter summary.md
        if chapter.research_summary:
            summary_path = chapter_dir / 'summary.md'
            with open(summary_path, 'w', encoding='utf-8') as f:
                f.write(f"# {chapter.title}\n\n{chapter.research_summary}")

    outputs['chapters_dir'] = chapters_dir

    # Drafts (if generated)
    drafts = [c for c in project.chapters if c.draft_content]
    if drafts:
        draft_paths = generate_chapter_drafts(project, output_dir)
        outputs['drafts'] = draft_paths

    return outputs


# =============================================================================
# EXPORTS
# =============================================================================

__all__ = [
    'generate_gaps_markdown',
    'parse_gaps_markdown',
    'generate_research_summary',
    'generate_sources_json',
    'generate_chapter_drafts',
    'generate_status_json',
    'write_all_outputs',
]
