#!/usr/bin/env python3
"""
Book Curator - AI-Powered Content Curation and Quality Assessment

This tool moves beyond technical quality assessment ("is it readable?") to
content curation ("is it valuable?"). An AI assumes a configurable Persona
to evaluate books against your library's standards.

The Curation Process:
1. Extract a "taste test" sample (intro, first chapter, middle passage, bibliography)
2. Apply a curator persona (defined in YAML config)
3. Generate structured review with scores and verdict
4. Optionally auto-reject/approve based on thresholds

Usage:
    # Curate a single document
    python book_curator.py DOC_001

    # Curate with specific persona
    python book_curator.py DOC_001 --persona esoteric_scholar

    # Curate all uncurated documents
    python book_curator.py --batch

    # Auto-reject below threshold
    python book_curator.py --batch --auto-reject 4.0

    # List available personas
    python book_curator.py --list-personas

    # Create a new persona interactively
    python book_curator.py --create-persona
"""

import os
import sys
import json
import argparse
import logging
import random
import re
from datetime import datetime
from typing import Dict, List, Any, Optional, Tuple
from pathlib import Path
from dataclasses import dataclass, asdict, field

# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
os.chdir(str(Path(__file__).parent.parent))

from pipeline.db_utils import get_db_connection, execute_query
from pipeline.config import (
    OPENAI_API_KEY, OPENAI_ENABLED, BASE_DIR,
    INTELLIGENCE_MODE, LOCAL_LLM_ENDPOINT, LOCAL_LLM_MODEL,
    CLOUD_MODELS
)

# Try to import OpenAI
try:
    from openai import OpenAI
    HAS_OPENAI = True
except ImportError:
    HAS_OPENAI = False

# Try to import YAML
try:
    import yaml
    HAS_YAML = True
except ImportError:
    HAS_YAML = False

logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)


# =============================================================================
# CONFIGURATION
# =============================================================================

CURATOR_CONFIG_PATH = BASE_DIR / 'config' / 'curator_personas.yaml'
DEFAULT_SAMPLE_CONFIG = {
    'intro_chars': 3000,        # First N chars for introduction
    'first_chapter_chars': 4000, # First chapter sample
    'middle_sample_chars': 2000, # Random middle passage
    'bibliography_chars': 1500,  # Bibliography/references section
    'max_total_tokens': 4000,    # Max tokens for LLM context
}

# Default personas (used if no config file exists)
DEFAULT_PERSONAS = {
    'general_scholar': {
        'name': 'General Scholar',
        'description': 'A balanced academic reviewer evaluating scholarly merit',
        'system_prompt': '''You are an experienced academic reviewer evaluating texts for a research library.
You value clarity, depth, proper citations, and original contribution to knowledge.
You appreciate both accessible introductions and rigorous academic works.
Evaluate texts fairly based on their intended audience and purpose.''',
        'criteria': {
            'depth': 'Intellectual depth and thoroughness of treatment',
            'originality': 'Original insights vs derivative rehashing',
            'rigor': 'Academic rigor, citations, and methodology',
            'clarity': 'Clear writing and logical organization',
            'relevance': 'Relevance to the research collection'
        },
        'reject_phrases': [],
        'prefer_phrases': []
    },
    'esoteric_scholar': {
        'name': 'Esoteric Scholar',
        'description': 'Rigorous scholar of Western Esotericism and Anthroposophy',
        'system_prompt': '''You are a rigorous scholar of Western Esotericism with deep knowledge of
Anthroposophy, Theosophy, Hermeticism, and related traditions. You value:
- Primary sources and original texts over secondary summaries
- Deep philosophical inquiry and spiritual science methodology
- Historical accuracy and proper lineage attribution
- Scholarly apparatus (citations, bibliography, footnotes)

You are skeptical of:
- "New Age" popularizations that lack depth
- Superficial summaries that miss nuance
- Modern self-help derivations of traditional teachings
- Works that misrepresent or oversimplify esoteric concepts

Rate texts based on their value to serious researchers of the Western Mystery Tradition.''',
        'criteria': {
            'depth': 'Depth of esoteric/philosophical content',
            'authenticity': 'Faithfulness to traditional sources and lineages',
            'rigor': 'Scholarly apparatus and citations',
            'originality': 'Original insight vs derivative compilation',
            'relevance': 'Relevance to Western Esoteric research'
        },
        'reject_phrases': ['new age fluff', 'self-help', 'superficial', 'pop spirituality'],
        'prefer_phrases': ['primary source', 'steiner', 'anthroposophy', 'initiation', 'mystery school']
    },
    'practical_librarian': {
        'name': 'Practical Librarian',
        'description': 'Focuses on utility and accessibility for researchers',
        'system_prompt': '''You are a practical research librarian evaluating texts for usefulness.
You care about:
- Will researchers actually use this text?
- Is it well-organized and searchable?
- Does it fill a gap in the collection?
- Is it the best available source on this topic?

You are less concerned with:
- Whether you personally agree with the content
- Stylistic preferences
- The author's credentials (focus on the text itself)

Evaluate texts based on their practical utility to researchers.''',
        'criteria': {
            'utility': 'Practical usefulness for research',
            'organization': 'Structure, index, searchability',
            'uniqueness': 'Does it fill a gap in the collection?',
            'accessibility': 'Readable and well-presented',
            'completeness': 'Comprehensive treatment of topic'
        },
        'reject_phrases': [],
        'prefer_phrases': []
    },
    'strict_academic': {
        'name': 'Strict Academic',
        'description': 'High standards for peer-review quality work',
        'system_prompt': '''You are a strict academic gatekeeper with high standards.
You only approve texts that meet peer-review quality standards:
- Proper academic methodology
- Extensive citations and bibliography
- Clear thesis and argumentation
- Original contribution to the field
- Published by reputable academic press (preferred)

You reject:
- Popular science without rigor
- Self-published works without scholarly apparatus
- Polemical or ideological works masquerading as scholarship
- Compilations without original analysis

Be rigorous but fair. A well-executed introductory text can be valuable.''',
        'criteria': {
            'methodology': 'Sound academic methodology',
            'citations': 'Quality and quantity of citations',
            'argumentation': 'Clear thesis and logical argument',
            'contribution': 'Original contribution to field',
            'credibility': 'Author credentials and publisher reputation'
        },
        'reject_phrases': ['self-published', 'no citations', 'blog post', 'opinion piece'],
        'prefer_phrases': ['peer-reviewed', 'university press', 'methodology', 'thesis']
    }
}


# =============================================================================
# DATA CLASSES
# =============================================================================

@dataclass
class CurationSample:
    """Text samples extracted for curation review."""
    document_id: str
    title: str
    introduction: str
    first_chapter: str
    middle_passage: str
    bibliography: str
    total_chars: int
    total_chunks: int
    extraction_notes: List[str] = field(default_factory=list)


@dataclass
class CriterionScore:
    """Score for a single evaluation criterion."""
    criterion: str
    score: int  # 0-10
    reasoning: str


@dataclass
class CurationReview:
    """Complete curation review for a document."""
    document_id: str
    title: str
    persona_used: str

    # Scores
    criteria_scores: List[CriterionScore]
    overall_score: float  # Average of criteria

    # Qualitative assessment
    tone_assessment: str
    strengths: List[str]
    weaknesses: List[str]

    # Verdict
    verdict: str  # ACCEPT, REJECT, REVIEW
    verdict_reasoning: str

    # Metadata
    reviewed_at: str = field(default_factory=lambda: datetime.now().isoformat())
    sample_size_chars: int = 0
    model_used: str = ""


# =============================================================================
# PERSONA MANAGEMENT
# =============================================================================

def load_personas() -> Dict[str, Dict]:
    """Load curator personas from config file or use defaults."""
    if HAS_YAML and CURATOR_CONFIG_PATH.exists():
        try:
            with open(CURATOR_CONFIG_PATH, 'r') as f:
                config = yaml.safe_load(f) or {}
                return config.get('personas', DEFAULT_PERSONAS)
        except Exception as e:
            logger.warning(f"Error loading persona config: {e}. Using defaults.")

    return DEFAULT_PERSONAS


def save_personas(personas: Dict[str, Dict]) -> None:
    """Save personas to config file."""
    if not HAS_YAML:
        logger.error("PyYAML not installed. Cannot save personas.")
        return

    CURATOR_CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)

    config = {
        'version': '1.0',
        'description': 'Curator persona configurations for book_curator.py',
        'personas': personas
    }

    with open(CURATOR_CONFIG_PATH, 'w') as f:
        yaml.dump(config, f, default_flow_style=False, sort_keys=False)

    logger.info(f"Saved personas to {CURATOR_CONFIG_PATH}")


def list_personas() -> None:
    """Print available personas."""
    personas = load_personas()

    print("\n" + "=" * 70)
    print("AVAILABLE CURATOR PERSONAS")
    print("=" * 70)

    for key, persona in personas.items():
        print(f"\n{key}")
        print(f"  Name: {persona.get('name', key)}")
        print(f"  Description: {persona.get('description', 'No description')}")
        print(f"  Criteria: {', '.join(persona.get('criteria', {}).keys())}")


def create_persona_interactive() -> None:
    """Interactively create a new persona."""
    print("\n" + "=" * 70)
    print("CREATE NEW CURATOR PERSONA")
    print("=" * 70)

    key = input("\nPersona key (lowercase, underscores): ").strip().lower().replace(' ', '_')
    name = input("Display name: ").strip()
    description = input("Brief description: ").strip()

    print("\nEnter the system prompt (the persona's identity and values).")
    print("This tells the AI who it is and what it values. End with empty line.")
    lines = []
    while True:
        line = input()
        if line == '':
            break
        lines.append(line)
    system_prompt = '\n'.join(lines)

    print("\nEnter evaluation criteria (format: 'criterion_name: description')")
    print("Enter empty line when done. Example: 'depth: Intellectual depth of content'")
    criteria = {}
    while True:
        line = input("Criterion: ").strip()
        if not line:
            break
        if ':' in line:
            crit_key, crit_desc = line.split(':', 1)
            criteria[crit_key.strip().lower()] = crit_desc.strip()

    if not criteria:
        criteria = {
            'depth': 'Depth and thoroughness',
            'quality': 'Overall quality',
            'relevance': 'Relevance to collection'
        }

    persona = {
        'name': name,
        'description': description,
        'system_prompt': system_prompt,
        'criteria': criteria,
        'reject_phrases': [],
        'prefer_phrases': []
    }

    # Load existing and add new
    personas = load_personas()
    personas[key] = persona
    save_personas(personas)

    print(f"\nPersona '{key}' created successfully!")
    print(f"Use it with: python book_curator.py DOC_ID --persona {key}")


# =============================================================================
# SAMPLE EXTRACTION
# =============================================================================

def extract_curation_sample(
    document_id: str,
    config: Dict = None
) -> Optional[CurationSample]:
    """
    Extract a representative sample from a document for curation.

    Samples:
    1. Introduction/first pages (what is this book promising?)
    2. First chapter content (writing style and depth)
    3. Random middle passage (consistency and substance)
    4. Bibliography/references (academic rigor check)
    """
    config = config or DEFAULT_SAMPLE_CONFIG
    notes = []

    with get_db_connection() as conn:
        with conn.cursor() as cur:
            # Get document info
            cur.execute("""
                SELECT title, word_count
                FROM documents
                WHERE document_id = %s
            """, (document_id,))
            row = cur.fetchone()
            if not row:
                logger.error(f"Document {document_id} not found")
                return None

            title, word_count = row

            # Get all chunks ordered by sequence
            cur.execute("""
                SELECT chunk_id, chunk_sequence, chunk_text
                FROM chunks
                WHERE document_id = %s
                ORDER BY chunk_sequence
            """, (document_id,))
            chunks = cur.fetchall()

            if not chunks:
                logger.error(f"No chunks found for {document_id}")
                return None

    total_chunks = len(chunks)
    all_text = ' '.join(c[2] for c in chunks)

    # 1. Introduction (first N chars)
    intro_chars = config['intro_chars']
    introduction = all_text[:intro_chars]
    if len(all_text) > intro_chars:
        introduction += "..."
        notes.append(f"Introduction truncated at {intro_chars} chars")

    # 2. First chapter (chunks 2-5 or so, after intro)
    first_chapter = ""
    first_chapter_chars = config['first_chapter_chars']
    chapter_start = min(2, total_chunks - 1)
    chapter_end = min(chapter_start + 4, total_chunks)

    for i in range(chapter_start, chapter_end):
        first_chapter += chunks[i][2] + " "
        if len(first_chapter) >= first_chapter_chars:
            break

    first_chapter = first_chapter[:first_chapter_chars]
    if len(first_chapter) == first_chapter_chars:
        first_chapter += "..."
        notes.append(f"First chapter sample truncated at {first_chapter_chars} chars")

    # 3. Random middle passage
    middle_passage = ""
    middle_sample_chars = config['middle_sample_chars']

    if total_chunks > 10:
        # Pick a random chunk from the middle 50%
        middle_start = total_chunks // 4
        middle_end = (total_chunks * 3) // 4
        random_idx = random.randint(middle_start, middle_end)

        # Get a few chunks around that point
        for i in range(random_idx, min(random_idx + 3, total_chunks)):
            middle_passage += chunks[i][2] + " "
            if len(middle_passage) >= middle_sample_chars:
                break

        middle_passage = middle_passage[:middle_sample_chars]
        notes.append(f"Middle passage from chunk {random_idx}/{total_chunks}")
    else:
        # Document too short, use what we have
        middle_idx = total_chunks // 2
        middle_passage = chunks[middle_idx][2][:middle_sample_chars]
        notes.append("Document short; middle sample may overlap with intro")

    # 4. Bibliography (look for it in last chunks)
    bibliography = ""
    bibliography_chars = config['bibliography_chars']

    # Search last 20% of chunks for bibliography indicators
    bib_search_start = int(total_chunks * 0.8)
    bib_patterns = [
        r'bibliograph', r'references', r'works cited', r'sources',
        r'further reading', r'notes', r'endnotes'
    ]

    found_bib = False
    for i in range(bib_search_start, total_chunks):
        chunk_text = chunks[i][2].lower()
        for pattern in bib_patterns:
            if re.search(pattern, chunk_text):
                # Found bibliography section
                for j in range(i, min(i + 3, total_chunks)):
                    bibliography += chunks[j][2] + " "
                    if len(bibliography) >= bibliography_chars:
                        break
                found_bib = True
                notes.append(f"Bibliography found at chunk {i}")
                break
        if found_bib:
            break

    if not found_bib:
        # No clear bibliography, use last chunk
        bibliography = chunks[-1][2][:bibliography_chars]
        notes.append("No clear bibliography section found; using final content")

    bibliography = bibliography[:bibliography_chars]

    total_sample_chars = len(introduction) + len(first_chapter) + len(middle_passage) + len(bibliography)

    return CurationSample(
        document_id=document_id,
        title=title,
        introduction=introduction,
        first_chapter=first_chapter,
        middle_passage=middle_passage,
        bibliography=bibliography,
        total_chars=total_sample_chars,
        total_chunks=total_chunks,
        extraction_notes=notes
    )


# =============================================================================
# AI CURATION
# =============================================================================

def build_curation_prompt(sample: CurationSample, persona: Dict) -> str:
    """Build the curation prompt for the LLM."""
    criteria = persona.get('criteria', {})
    criteria_list = '\n'.join(f"- {k}: {v}" for k, v in criteria.items())

    prompt = f"""You are evaluating a text for inclusion in a curated research library.

## Document Information
- Title: {sample.title}
- Document ID: {sample.document_id}

## Evaluation Criteria
{criteria_list}

## Text Samples

### INTRODUCTION (First pages - What is this book promising?)
{sample.introduction}

### FIRST CHAPTER (Writing style and depth)
{sample.first_chapter}

### MIDDLE PASSAGE (Consistency and substance)
{sample.middle_passage}

### BIBLIOGRAPHY/END MATTER (Academic rigor)
{sample.bibliography}

---

Based on these samples, provide a structured evaluation. Respond ONLY with valid JSON:

{{
    "criteria_scores": [
        {{"criterion": "criterion_name", "score": 0-10, "reasoning": "brief explanation"}}
    ],
    "tone_assessment": "Description of the writing tone and style",
    "strengths": ["strength 1", "strength 2"],
    "weaknesses": ["weakness 1", "weakness 2"],
    "verdict": "ACCEPT or REJECT or REVIEW",
    "verdict_reasoning": "2-3 sentence explanation of the verdict"
}}

Score guide: 0-2 = Poor, 3-4 = Below Average, 5-6 = Average, 7-8 = Good, 9-10 = Excellent

Be rigorous but fair. Consider the text's intended purpose and audience."""

    return prompt


def curate_document(
    document_id: str,
    persona_key: str = 'general_scholar',
    save_to_db: bool = True,
    auto_quarantine: bool = False
) -> Optional[CurationReview]:
    """
    Curate a document using the specified persona.

    Args:
        document_id: Document to curate
        persona_key: Persona configuration to use
        save_to_db: Whether to save the review to database
        auto_quarantine: If True, automatically quarantine REJECT verdicts

    Returns:
        CurationReview object or None on failure
    """
    # Load persona
    personas = load_personas()
    if persona_key not in personas:
        logger.error(f"Persona '{persona_key}' not found. Use --list-personas to see available.")
        return None

    persona = personas[persona_key]

    # Extract sample
    logger.info(f"Extracting curation sample from {document_id}...")
    sample = extract_curation_sample(document_id)
    if not sample:
        return None

    logger.info(f"Sample extracted: {sample.total_chars} chars from {sample.total_chunks} chunks")

    # Initialize LLM client
    client = None
    model = ""

    if INTELLIGENCE_MODE in ('cloud', 'auto') and OPENAI_ENABLED and HAS_OPENAI:
        client = OpenAI(api_key=OPENAI_API_KEY)
        model = CLOUD_MODELS.get('chat', 'gpt-4o-mini')
        logger.info(f"Using cloud LLM: {model}")
    elif INTELLIGENCE_MODE in ('local', 'auto') and LOCAL_LLM_ENDPOINT:
        client = OpenAI(
            base_url=LOCAL_LLM_ENDPOINT,
            api_key="not-needed"
        )
        model = LOCAL_LLM_MODEL
        logger.info(f"Using local LLM: {model}")
    else:
        logger.error("No LLM available. Curation requires cloud or local intelligence mode.")
        return None

    # Build prompt
    system_prompt = persona.get('system_prompt', '')
    user_prompt = build_curation_prompt(sample, persona)

    # Call LLM
    logger.info("Requesting curation review from LLM...")
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.3,  # More deterministic for evaluation
            max_tokens=2000
        )

        response_text = response.choices[0].message.content.strip()

        # Parse JSON response
        # Handle potential markdown code blocks
        if response_text.startswith('```'):
            response_text = re.sub(r'^```json?\n?', '', response_text)
            response_text = re.sub(r'\n?```$', '', response_text)

        review_data = json.loads(response_text)

    except json.JSONDecodeError as e:
        logger.error(f"Failed to parse LLM response as JSON: {e}")
        logger.debug(f"Response was: {response_text[:500]}")
        return None
    except Exception as e:
        logger.error(f"LLM request failed: {e}")
        return None

    # Build CurationReview
    criteria_scores = [
        CriterionScore(
            criterion=cs['criterion'],
            score=cs['score'],
            reasoning=cs['reasoning']
        )
        for cs in review_data.get('criteria_scores', [])
    ]

    # Calculate overall score
    if criteria_scores:
        overall_score = sum(cs.score for cs in criteria_scores) / len(criteria_scores)
    else:
        overall_score = 0.0

    review = CurationReview(
        document_id=document_id,
        title=sample.title,
        persona_used=persona_key,
        criteria_scores=criteria_scores,
        overall_score=round(overall_score, 2),
        tone_assessment=review_data.get('tone_assessment', ''),
        strengths=review_data.get('strengths', []),
        weaknesses=review_data.get('weaknesses', []),
        verdict=review_data.get('verdict', 'REVIEW'),
        verdict_reasoning=review_data.get('verdict_reasoning', ''),
        sample_size_chars=sample.total_chars,
        model_used=model
    )

    # Save to database
    if save_to_db:
        _save_curation_review(review, auto_quarantine=auto_quarantine)

    return review


def _save_curation_review(review: CurationReview, auto_quarantine: bool = False) -> None:
    """
    Save curation review to database.

    Args:
        review: The curation review to save
        auto_quarantine: If True, automatically quarantine REJECT verdicts
    """
    with get_db_connection() as conn:
        with conn.cursor() as cur:
            # Ensure curation columns exist
            cur.execute("""
                SELECT column_name FROM information_schema.columns
                WHERE table_name = 'documents' AND column_name = 'curation_score'
            """)
            if not cur.fetchone():
                # Add curation columns if they don't exist
                cur.execute("""
                    ALTER TABLE documents
                    ADD COLUMN IF NOT EXISTS curation_status VARCHAR(20) DEFAULT 'pending',
                    ADD COLUMN IF NOT EXISTS curation_score NUMERIC(4,2),
                    ADD COLUMN IF NOT EXISTS quarantined_at TIMESTAMP,
                    ADD COLUMN IF NOT EXISTS quarantine_reason TEXT
                """)

            # Store curation details in notes
            curation_json = json.dumps({
                'curation': {
                    'persona': review.persona_used,
                    'overall_score': review.overall_score,
                    'verdict': review.verdict,
                    'verdict_reasoning': review.verdict_reasoning,
                    'reviewed_at': review.reviewed_at,
                    'criteria': {cs.criterion: cs.score for cs in review.criteria_scores}
                }
            })

            # Determine curation status based on verdict
            if review.verdict == 'ACCEPT':
                curation_status = 'approved'
            elif review.verdict == 'REJECT':
                curation_status = 'quarantined' if auto_quarantine else 'pending'
            else:
                curation_status = 'pending'

            # Update document with curation data
            cur.execute("""
                UPDATE documents
                SET notes = COALESCE(notes, '') || %s,
                    curation_score = %s,
                    curation_status = %s,
                    needs_review = %s,
                    updated_at = NOW()
                WHERE document_id = %s
            """, (
                f"\n\n[CURATION REVIEW]\n{curation_json}",
                review.overall_score,
                curation_status,
                review.verdict in ('REJECT', 'REVIEW'),  # needs_review
                review.document_id
            ))

            # If auto-quarantine is enabled and verdict is REJECT, quarantine the document
            if auto_quarantine and review.verdict == 'REJECT':
                cur.execute("""
                    UPDATE documents
                    SET quarantined_at = NOW(),
                        quarantine_reason = %s
                    WHERE document_id = %s
                """, (
                    f"[curator] {review.verdict_reasoning}",
                    review.document_id
                ))
                logger.info(f"Auto-quarantined document: {review.document_id}")

            conn.commit()
            logger.info(f"Saved curation review to database for {review.document_id}")


# =============================================================================
# BATCH CURATION
# =============================================================================

def get_uncurated_documents(limit: int = 50) -> List[Tuple[str, str]]:
    """Get documents that haven't been curated yet."""
    with get_db_connection() as conn:
        with conn.cursor() as cur:
            cur.execute("""
                SELECT document_id, title
                FROM documents
                WHERE notes NOT LIKE '%%[CURATION REVIEW]%%'
                   OR notes IS NULL
                ORDER BY created_at DESC
                LIMIT %s
            """, (limit,))
            return cur.fetchall()


def batch_curate(
    persona_key: str = 'general_scholar',
    limit: int = 10,
    auto_reject_threshold: float = None,
    auto_quarantine: bool = False,
    dry_run: bool = False
) -> Dict[str, Any]:
    """
    Curate multiple uncurated documents.

    Args:
        persona_key: Persona to use for all reviews
        limit: Maximum documents to process
        auto_reject_threshold: Auto-reject documents below this score
        auto_quarantine: Automatically quarantine rejected documents
        dry_run: Preview without saving

    Returns:
        Summary of batch curation results
    """
    results = {
        'total_processed': 0,
        'accepted': 0,
        'rejected': 0,
        'quarantined': 0,
        'review_needed': 0,
        'errors': 0,
        'auto_rejected': 0,
        'reviews': [],
        'dry_run': dry_run
    }

    # Get uncurated documents
    documents = get_uncurated_documents(limit)
    if not documents:
        logger.info("No uncurated documents found.")
        return results

    logger.info(f"Found {len(documents)} uncurated documents. Processing...")

    for doc_id, title in documents:
        logger.info(f"\nCurating: {title[:50]}...")

        try:
            review = curate_document(
                doc_id,
                persona_key=persona_key,
                save_to_db=not dry_run,
                auto_quarantine=auto_quarantine
            )

            if review:
                results['total_processed'] += 1

                # Apply auto-reject threshold
                if auto_reject_threshold and review.overall_score < auto_reject_threshold:
                    review.verdict = 'REJECT'
                    review.verdict_reasoning = f"Auto-rejected: score {review.overall_score} below threshold {auto_reject_threshold}"
                    results['auto_rejected'] += 1

                    # Update with rejection if not dry run
                    if not dry_run:
                        _save_curation_review(review, auto_quarantine=auto_quarantine)

                # Count verdicts
                if review.verdict == 'ACCEPT':
                    results['accepted'] += 1
                elif review.verdict == 'REJECT':
                    results['rejected'] += 1
                    if auto_quarantine:
                        results['quarantined'] += 1
                else:
                    results['review_needed'] += 1

                results['reviews'].append({
                    'document_id': doc_id,
                    'title': title,
                    'score': review.overall_score,
                    'verdict': review.verdict,
                    'quarantined': auto_quarantine and review.verdict == 'REJECT'
                })
            else:
                results['errors'] += 1

        except Exception as e:
            logger.error(f"Error curating {doc_id}: {e}")
            results['errors'] += 1

    return results


# =============================================================================
# OUTPUT FORMATTERS
# =============================================================================

def print_review(review: CurationReview, format: str = 'text') -> None:
    """Print a curation review."""
    if format == 'json':
        # Convert to dict for JSON serialization
        review_dict = asdict(review)
        # Convert CriterionScore objects
        review_dict['criteria_scores'] = [
            {'criterion': cs.criterion, 'score': cs.score, 'reasoning': cs.reasoning}
            for cs in review.criteria_scores
        ]
        print(json.dumps(review_dict, indent=2))
        return

    print("\n" + "=" * 70)
    print("CURATION REVIEW")
    print("=" * 70)
    print(f"Document: {review.document_id}")
    print(f"Title: {review.title}")
    print(f"Persona: {review.persona_used}")
    print(f"Reviewed: {review.reviewed_at}")
    print(f"Sample Size: {review.sample_size_chars:,} chars")
    print()

    # Criteria scores table
    print("CRITERIA SCORES")
    print("-" * 70)
    print(f"{'Criterion':<15} {'Score':<8} {'Reasoning'}")
    print("-" * 70)
    for cs in review.criteria_scores:
        score_bar = "█" * cs.score + "░" * (10 - cs.score)
        print(f"{cs.criterion:<15} {cs.score}/10 {score_bar}")
        print(f"{'':>15} {cs.reasoning[:50]}...")
    print("-" * 70)
    print(f"{'OVERALL':<15} {review.overall_score:.1f}/10")
    print()

    # Tone
    print(f"TONE: {review.tone_assessment}")
    print()

    # Strengths/Weaknesses
    print("STRENGTHS:")
    for s in review.strengths:
        print(f"  + {s}")
    print()
    print("WEAKNESSES:")
    for w in review.weaknesses:
        print(f"  - {w}")
    print()

    # Verdict
    verdict_emoji = {
        'ACCEPT': '✅',
        'REJECT': '❌',
        'REVIEW': '⚠️'
    }.get(review.verdict, '❓')

    print("=" * 70)
    print(f"VERDICT: {verdict_emoji} {review.verdict}")
    print(f"Reasoning: {review.verdict_reasoning}")
    print("=" * 70)


def print_batch_results(results: Dict[str, Any], format: str = 'text') -> None:
    """Print batch curation results."""
    if format == 'json':
        print(json.dumps(results, indent=2))
        return

    print("\n" + "=" * 70)
    print("BATCH CURATION RESULTS")
    print("=" * 70)
    print(f"Total Processed: {results['total_processed']}")
    print(f"Accepted: {results['accepted']}")
    print(f"Rejected: {results['rejected']} (Auto-rejected: {results['auto_rejected']})")
    if results.get('quarantined', 0) > 0:
        print(f"Quarantined: {results['quarantined']}")
    print(f"Needs Review: {results['review_needed']}")
    print(f"Errors: {results['errors']}")
    print()

    if results['reviews']:
        print("INDIVIDUAL RESULTS:")
        print("-" * 70)
        header = f"{'Document ID':<40} {'Score':<8} {'Verdict':<10}"
        if any(r.get('quarantined') for r in results['reviews']):
            header += " Status"
        print(header)
        print("-" * 70)
        for r in results['reviews']:
            line = f"{r['document_id'][:38]:<40} {r['score']:<8.1f} {r['verdict']:<10}"
            if r.get('quarantined'):
                line += " [QUARANTINED]"
            print(line)

    if results.get('quarantined', 0) > 0:
        print(f"\nQuarantined documents can be reviewed with:")
        print("  python quarantine_manager.py --list")


# =============================================================================
# CLI INTERFACE
# =============================================================================

def main():
    parser = argparse.ArgumentParser(
        description="Book Curator - AI-powered content curation and quality assessment",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python book_curator.py DOC_001
  python book_curator.py DOC_001 --persona esoteric_scholar
  python book_curator.py --batch --limit 10
  python book_curator.py --batch --auto-reject 4.0 --auto-quarantine
  python book_curator.py --list-personas
  python book_curator.py --create-persona

Quarantine Workflow:
  1. Curate with auto-quarantine: --batch --auto-reject 4.0 --auto-quarantine
  2. Review quarantined: python quarantine_manager.py --list
  3. Restore good ones: python quarantine_manager.py --restore DOC_ID
  4. Purge bad ones: python quarantine_manager.py --purge DOC_ID --confirm
        """
    )

    # Positional argument
    parser.add_argument('document_id', nargs='?',
                       help='Document ID to curate')

    # Operations
    ops = parser.add_argument_group('Operations')
    ops.add_argument('--batch', action='store_true',
                    help='Curate all uncurated documents')
    ops.add_argument('--list-personas', action='store_true',
                    help='List available curator personas')
    ops.add_argument('--create-persona', action='store_true',
                    help='Create a new persona interactively')
    ops.add_argument('--show-sample', action='store_true',
                    help='Show extracted sample without curating')

    # Options
    opts = parser.add_argument_group('Options')
    opts.add_argument('--persona', '-p', default='general_scholar',
                     help='Curator persona to use (default: general_scholar)')
    opts.add_argument('--limit', type=int, default=10,
                     help='Max documents for batch processing (default: 10)')
    opts.add_argument('--auto-reject', type=float, metavar='THRESHOLD',
                     help='Auto-reject documents with score below threshold')
    opts.add_argument('--auto-quarantine', action='store_true',
                     help='Automatically quarantine rejected documents (use with --auto-reject)')
    opts.add_argument('--dry-run', action='store_true',
                     help='Preview without saving to database')
    opts.add_argument('--format', '-f', choices=['text', 'json'], default='text',
                     help='Output format (default: text)')

    args = parser.parse_args()

    try:
        # List personas
        if args.list_personas:
            list_personas()
            return

        # Create persona
        if args.create_persona:
            create_persona_interactive()
            return

        # Show sample only
        if args.show_sample:
            if not args.document_id:
                print("Error: document_id required with --show-sample")
                sys.exit(1)
            sample = extract_curation_sample(args.document_id)
            if sample:
                if args.format == 'json':
                    print(json.dumps(asdict(sample), indent=2))
                else:
                    print(f"\nSample from: {sample.title}")
                    print(f"Total chars: {sample.total_chars}")
                    print(f"Notes: {sample.extraction_notes}")
                    print("\n--- INTRODUCTION ---")
                    print(sample.introduction[:500] + "...")
                    print("\n--- FIRST CHAPTER ---")
                    print(sample.first_chapter[:500] + "...")
                    print("\n--- MIDDLE PASSAGE ---")
                    print(sample.middle_passage[:500] + "...")
                    print("\n--- BIBLIOGRAPHY ---")
                    print(sample.bibliography[:500] + "...")
            return

        # Batch curation
        if args.batch:
            results = batch_curate(
                persona_key=args.persona,
                limit=args.limit,
                auto_reject_threshold=args.auto_reject,
                auto_quarantine=args.auto_quarantine,
                dry_run=args.dry_run
            )
            print_batch_results(results, args.format)
            return

        # Single document curation
        if args.document_id:
            review = curate_document(
                args.document_id,
                persona_key=args.persona,
                save_to_db=not args.dry_run,
                auto_quarantine=args.auto_quarantine
            )
            if review:
                print_review(review, args.format)
            else:
                print("Curation failed. Check logs for details.")
                sys.exit(1)
            return

        # No operation specified
        parser.print_help()

    except KeyboardInterrupt:
        print("\nCancelled.")
        sys.exit(1)
    except Exception as e:
        logger.error(f"Error: {e}")
        if args.format == 'json':
            print(json.dumps({'error': str(e)}))
        sys.exit(1)


if __name__ == '__main__':
    main()
