#!/usr/bin/env python3
"""
Document Search Tool
Search through all converted text documents with context.
"""

import sys
import re
from pathlib import Path
from typing import List, Tuple

BASE_DIR = Path(__file__).resolve().parent
TEXT_DIR = BASE_DIR / "text"
CONTEXT_LINES = 3


def search_file(file_path: Path, pattern: str, case_sensitive: bool = False) -> List[Tuple[int, str, List[str]]]:
    """
    Search for pattern in a file.
    Returns list of (line_number, matched_line, context_lines)
    """
    matches = []
    flags = 0 if case_sensitive else re.IGNORECASE

    try:
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
            lines = f.readlines()

        for i, line in enumerate(lines):
            if re.search(pattern, line, flags):
                # Get context
                start = max(0, i - CONTEXT_LINES)
                end = min(len(lines), i + CONTEXT_LINES + 1)
                context = lines[start:end]

                matches.append((i + 1, line.strip(), context))

    except Exception as e:
        print(f"Error reading {file_path}: {e}", file=sys.stderr)

    return matches


def format_match(file_path: Path, line_num: int, line: str, context: List[str], show_context: bool = True):
    """Format a match for display."""
    print(f"\n{file_path.name}:{line_num}")
    print("─" * 70)

    if show_context:
        for ctx_line in context:
            print(f"  {ctx_line.rstrip()}")
    else:
        print(f"  {line}")


def main():
    if len(sys.argv) < 2:
        print("Usage: search_docs.py <pattern> [--case-sensitive] [--no-context]")
        print("\nExamples:")
        print("  search_docs.py 'Ancient.*Rite'")
        print("  search_docs.py 'degree' --case-sensitive")
        print("  search_docs.py 'ceremony' --no-context")
        sys.exit(1)

    pattern = sys.argv[1]
    case_sensitive = '--case-sensitive' in sys.argv
    show_context = '--no-context' not in sys.argv

    if not TEXT_DIR.exists():
        print(f"Error: Text directory not found: {TEXT_DIR}")
        print("Run process_pdfs.py first to convert PDFs to text.")
        sys.exit(1)

    # Get all text files
    text_files = sorted(TEXT_DIR.glob("*.txt"))

    if not text_files:
        print(f"No text files found in {TEXT_DIR}")
        sys.exit(1)

    print(f"Searching {len(text_files)} documents for: {pattern}")
    print("=" * 70)

    total_matches = 0
    files_with_matches = 0

    for file_path in text_files:
        matches = search_file(file_path, pattern, case_sensitive)

        if matches:
            files_with_matches += 1
            total_matches += len(matches)

            for line_num, line, context in matches:
                format_match(file_path, line_num, line, context, show_context)

    # Summary
    print("\n" + "=" * 70)
    print(f"Found {total_matches} matches in {files_with_matches} documents")
    print("=" * 70)


if __name__ == "__main__":
    main()
