#!/usr/bin/env python3
"""
Search Export Tool - Export search results to various formats.

Usage:
    python search_export.py "search query" --output results.csv
    python search_export.py "search query" --format json --output results.json
    python search_export.py "search query" --format markdown --output results.md
    python search_export.py "search query" --format html --output results.html
    python search_export.py "search query" --category Philosophy --limit 50
"""

import argparse
import csv
import json
import sys
import html as html_module
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Any, Optional

# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))

from db_utils import get_db_connection, execute_query


def search_documents(
    query: str,
    limit: int = 100,
    category: Optional[str] = None,
    year_from: Optional[int] = None,
    year_to: Optional[int] = None,
    search_type: str = 'keyword'
) -> List[Dict[str, Any]]:
    """
    Search documents and return results.

    Args:
        query: Search query string
        limit: Maximum results to return
        category: Filter by category
        year_from: Filter by minimum year
        year_to: Filter by maximum year
        search_type: Type of search (keyword, semantic, hybrid)

    Returns:
        List of search result dictionaries
    """
    # Build the search query using PostgreSQL full-text search
    base_query = """
        SELECT
            c.chunk_id,
            c.document_id,
            d.title,
            d.author,
            d.publication_year,
            d.primary_category,
            c.chunk_text,
            c.chunk_index,
            ts_rank(c.chunk_text_tsv, plainto_tsquery('english', %s)) as score
        FROM chunks c
        JOIN documents d ON c.document_id = d.document_id
        WHERE c.chunk_text_tsv @@ plainto_tsquery('english', %s)
    """

    params = [query, query]

    # Add filters
    if category:
        base_query += " AND d.primary_category = %s"
        params.append(category)

    if year_from:
        base_query += " AND d.publication_year >= %s"
        params.append(year_from)

    if year_to:
        base_query += " AND d.publication_year <= %s"
        params.append(year_to)

    # Exclude archived documents
    base_query += " AND d.quality_status != 'archived'"

    # Order and limit
    base_query += " ORDER BY score DESC LIMIT %s"
    params.append(limit)

    results = execute_query(base_query, tuple(params), fetch='all')

    # Format results
    formatted = []
    for r in results:
        # Create snippet with highlight
        snippet = r['chunk_text'][:300]
        if len(r['chunk_text']) > 300:
            snippet += '...'

        formatted.append({
            'chunk_id': r['chunk_id'],
            'document_id': r['document_id'],
            'title': r['title'] or 'Untitled',
            'author': r['author'] or 'Unknown',
            'year': r['publication_year'],
            'category': r['primary_category'] or 'Uncategorized',
            'snippet': snippet,
            'full_text': r['chunk_text'],
            'chunk_index': r['chunk_index'],
            'score': float(r['score']) if r['score'] else 0.0
        })

    return formatted


def export_csv(results: List[Dict], output_path: Path, include_full_text: bool = False):
    """Export results to CSV format."""
    if not results:
        print("No results to export.")
        return

    fieldnames = ['title', 'author', 'year', 'category', 'score', 'snippet', 'document_id', 'chunk_id']
    if include_full_text:
        fieldnames.append('full_text')

    with open(output_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
        writer.writeheader()
        for r in results:
            row = {k: r.get(k, '') for k in fieldnames}
            writer.writerow(row)

    print(f"Exported {len(results)} results to {output_path}")


def export_json(results: List[Dict], output_path: Path, query: str):
    """Export results to JSON format."""
    output = {
        'query': query,
        'exported_at': datetime.now().isoformat(),
        'total_results': len(results),
        'results': results
    }

    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(output, f, indent=2, ensure_ascii=False)

    print(f"Exported {len(results)} results to {output_path}")


def export_markdown(results: List[Dict], output_path: Path, query: str):
    """Export results to Markdown format."""
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(f"# Search Results: {query}\n\n")
        f.write(f"**Exported:** {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n")
        f.write(f"**Total Results:** {len(results)}\n\n")
        f.write("---\n\n")

        for i, r in enumerate(results, 1):
            f.write(f"## {i}. {r['title']}\n\n")
            f.write(f"**Author:** {r['author']}\n\n")
            if r.get('year'):
                f.write(f"**Year:** {r['year']}\n\n")
            f.write(f"**Category:** {r['category']}\n\n")
            f.write(f"**Relevance Score:** {r['score']:.3f}\n\n")
            f.write(f"> {r['snippet']}\n\n")
            f.write(f"*Source: {r['document_id']}, Chunk: {r['chunk_id']}*\n\n")
            f.write("---\n\n")

    print(f"Exported {len(results)} results to {output_path}")


def export_html(results: List[Dict], output_path: Path, query: str):
    """Export results to HTML format."""
    html_content = f"""<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Search Results: {html_module.escape(query)}</title>
    <style>
        body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif; max-width: 900px; margin: 0 auto; padding: 20px; line-height: 1.6; }}
        h1 {{ color: #333; border-bottom: 2px solid #007acc; padding-bottom: 10px; }}
        .meta {{ color: #666; margin-bottom: 20px; }}
        .result {{ background: #f9f9f9; border-left: 4px solid #007acc; padding: 15px; margin-bottom: 20px; }}
        .result h2 {{ margin-top: 0; color: #333; }}
        .result-meta {{ color: #666; font-size: 0.9em; margin-bottom: 10px; }}
        .snippet {{ background: #fff; padding: 10px; border-radius: 4px; font-style: italic; }}
        .score {{ background: #007acc; color: white; padding: 2px 8px; border-radius: 12px; font-size: 0.8em; }}
        .source {{ font-size: 0.85em; color: #888; margin-top: 10px; }}
    </style>
</head>
<body>
    <h1>Search Results: {html_module.escape(query)}</h1>
    <div class="meta">
        <p>Exported: {datetime.now().strftime('%Y-%m-%d %H:%M')}</p>
        <p>Total Results: {len(results)}</p>
    </div>
"""

    for i, r in enumerate(results, 1):
        html_content += f"""
    <div class="result">
        <h2>{i}. {html_module.escape(r['title'])}</h2>
        <div class="result-meta">
            <strong>Author:</strong> {html_module.escape(r['author'])} |
            <strong>Year:</strong> {r.get('year', 'N/A')} |
            <strong>Category:</strong> {html_module.escape(r['category'])} |
            <span class="score">{r['score']:.3f}</span>
        </div>
        <div class="snippet">{html_module.escape(r['snippet'])}</div>
        <div class="source">Source: {html_module.escape(r['document_id'])}, Chunk: {html_module.escape(r['chunk_id'])}</div>
    </div>
"""

    html_content += """
</body>
</html>
"""

    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(html_content)

    print(f"Exported {len(results)} results to {output_path}")


def export_bibtex(results: List[Dict], output_path: Path):
    """Export unique documents as BibTeX references."""
    # Get unique documents
    seen_docs = set()
    unique_docs = []
    for r in results:
        if r['document_id'] not in seen_docs:
            seen_docs.add(r['document_id'])
            unique_docs.append(r)

    with open(output_path, 'w', encoding='utf-8') as f:
        for doc in unique_docs:
            # Create a citation key
            author_key = doc['author'].split()[0].lower() if doc['author'] != 'Unknown' else 'unknown'
            year_key = doc.get('year', 'nd') or 'nd'
            title_key = ''.join(c for c in doc['title'][:20] if c.isalnum()).lower()
            cite_key = f"{author_key}{year_key}{title_key}"

            f.write(f"@book{{{cite_key},\n")
            f.write(f"  author = {{{doc['author']}}},\n")
            f.write(f"  title = {{{doc['title']}}},\n")
            if doc.get('year'):
                f.write(f"  year = {{{doc['year']}}},\n")
            f.write(f"  note = {{Document ID: {doc['document_id']}}}\n")
            f.write("}\n\n")

    print(f"Exported {len(unique_docs)} unique documents to {output_path}")


def main():
    parser = argparse.ArgumentParser(
        description='Export search results to various formats',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python search_export.py "consciousness" --output results.csv
  python search_export.py "philosophy of freedom" --format markdown --output research.md
  python search_export.py "alchemy" --format json --limit 50 --output alchemy.json
  python search_export.py "ritual" --category "Religion/Spirituality" --format html --output ritual.html
  python search_export.py "steiner" --format bibtex --output references.bib
        """
    )

    parser.add_argument('query', help='Search query')
    parser.add_argument('--output', '-o', required=True, help='Output file path')
    parser.add_argument('--format', '-f', choices=['csv', 'json', 'markdown', 'html', 'bibtex'],
                        default='csv', help='Output format (default: csv)')
    parser.add_argument('--limit', '-l', type=int, default=100, help='Maximum results (default: 100)')
    parser.add_argument('--category', '-c', help='Filter by category')
    parser.add_argument('--year-from', type=int, help='Minimum publication year')
    parser.add_argument('--year-to', type=int, help='Maximum publication year')
    parser.add_argument('--full-text', action='store_true', help='Include full chunk text (CSV only)')
    parser.add_argument('--summary', action='store_true',
                        help='Token-efficient output: titles and scores only (for Claude Code)')

    args = parser.parse_args()

    # Run search
    print(f"Searching for: {args.query}")
    results = search_documents(
        query=args.query,
        limit=args.limit,
        category=args.category,
        year_from=args.year_from,
        year_to=args.year_to
    )

    if not results:
        print("No results found.")
        return

    print(f"Found {len(results)} results")

    # Summary mode: token-efficient output for Claude Code
    if args.summary:
        print("\n--- SUMMARY MODE ---")
        for i, r in enumerate(results[:20], 1):
            score_str = f"({r['score']:.3f})" if r['score'] else ""
            print(f"{i}. {r['title'][:60]} {score_str} [{r['document_id']}]")
        if len(results) > 20:
            print(f"... and {len(results) - 20} more results")
        print(f"\nUse without --summary to see full details or export to file.")
        return

    # Export based on format
    output_path = Path(args.output)
    output_path.parent.mkdir(parents=True, exist_ok=True)

    if args.format == 'csv':
        export_csv(results, output_path, args.full_text)
    elif args.format == 'json':
        export_json(results, output_path, args.query)
    elif args.format == 'markdown':
        export_markdown(results, output_path, args.query)
    elif args.format == 'html':
        export_html(results, output_path, args.query)
    elif args.format == 'bibtex':
        export_bibtex(results, output_path)


if __name__ == '__main__':
    main()
