#!/usr/bin/env python3
"""
DOCX Formatting Script for Research Development Framework.

This script converts markdown to properly formatted Word documents:
1. Convert markdown to DOCX via pandoc
2. Apply professional formatting
3. Fix common conversion issues
4. Ensure TOC-compatible heading styles

Usage:
    python format_docx.py input.md                    # Convert with defaults
    python format_docx.py input.md -o output.docx    # Custom output
    python format_docx.py input.md --format-only     # Format existing DOCX
"""

import os
import sys
import argparse
import subprocess
from pathlib import Path
from typing import Optional

try:
    from docx import Document
    from docx.shared import Inches, Pt, RGBColor
    from docx.enum.text import WD_ALIGN_PARAGRAPH
    from docx.enum.style import WD_STYLE_TYPE
    HAS_DOCX = True
except ImportError:
    HAS_DOCX = False
    print("Warning: python-docx not installed. Run: pip install python-docx")


class DocxFormatter:
    """Handles DOCX formatting and cleanup."""

    def __init__(self, doc_path: str):
        if not HAS_DOCX:
            raise ImportError("python-docx is required for formatting")

        self.doc_path = Path(doc_path)
        self.doc = Document(str(self.doc_path))

        # Default formatting settings
        self.font_name = "Times New Roman"
        self.font_size_body = Pt(12)
        self.font_size_h1 = Pt(18)
        self.font_size_h2 = Pt(14)
        self.font_size_h3 = Pt(12)
        self.line_spacing = 1.5

    def remove_blue_text(self):
        """Convert all blue/link-colored text to black."""
        for paragraph in self.doc.paragraphs:
            for run in paragraph.runs:
                if run.font.color.rgb:
                    # Check if color is blue-ish
                    color = run.font.color.rgb
                    if color and (color.blue > color.red or color.blue > color.green):
                        run.font.color.rgb = RGBColor(0, 0, 0)

    def format_title_page(self):
        """Format the first page as a title page."""
        if not self.doc.paragraphs:
            return

        # Find and format title (usually first H1)
        for i, para in enumerate(self.doc.paragraphs[:5]):
            if para.style.name.startswith('Heading 1') or para.text.startswith('#'):
                para.alignment = WD_ALIGN_PARAGRAPH.CENTER
                for run in para.runs:
                    run.font.size = Pt(24)
                    run.font.bold = True
                    run.font.name = self.font_name

                # Add spacing after title
                para.paragraph_format.space_after = Pt(72)  # 1 inch
                break

    def apply_heading_styles(self):
        """Ensure headings use proper Word styles for TOC generation."""
        heading_map = {
            'Heading 1': {'size': self.font_size_h1, 'bold': True},
            'Heading 2': {'size': self.font_size_h2, 'bold': True},
            'Heading 3': {'size': self.font_size_h3, 'bold': True},
        }

        for paragraph in self.doc.paragraphs:
            style_name = paragraph.style.name

            if style_name in heading_map:
                settings = heading_map[style_name]
                for run in paragraph.runs:
                    run.font.name = self.font_name
                    run.font.size = settings['size']
                    run.font.bold = settings['bold']
                    run.font.color.rgb = RGBColor(0, 0, 0)

    def apply_body_formatting(self):
        """Apply consistent formatting to body text."""
        for paragraph in self.doc.paragraphs:
            # Skip headings
            if paragraph.style.name.startswith('Heading'):
                continue

            # Apply font settings
            for run in paragraph.runs:
                run.font.name = self.font_name
                run.font.size = self.font_size_body
                run.font.color.rgb = RGBColor(0, 0, 0)

            # Apply paragraph settings
            paragraph.paragraph_format.line_spacing = self.line_spacing

    def add_page_breaks_before_chapters(self):
        """Add page breaks before each chapter (Heading 1)."""
        for i, paragraph in enumerate(self.doc.paragraphs):
            if i == 0:
                continue  # Skip first heading (title)

            if paragraph.style.name == 'Heading 1':
                # Insert page break before this paragraph
                run = paragraph.insert_paragraph_before().add_run()
                run.add_break(docx.enum.text.WD_BREAK.PAGE)

    def fix_tables(self):
        """Apply consistent formatting to tables."""
        for table in self.doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        for run in paragraph.runs:
                            run.font.name = self.font_name
                            run.font.size = Pt(10)

    def format_all(self):
        """Apply all formatting fixes."""
        print("Applying formatting...")

        print("  - Removing blue text")
        self.remove_blue_text()

        print("  - Formatting title page")
        self.format_title_page()

        print("  - Applying heading styles")
        self.apply_heading_styles()

        print("  - Applying body formatting")
        self.apply_body_formatting()

        print("  - Formatting tables")
        self.fix_tables()

    def save(self, output_path: str = None):
        """Save the formatted document."""
        save_path = output_path or str(self.doc_path)
        self.doc.save(save_path)
        print(f"Saved: {save_path}")


def convert_markdown_to_docx(
    input_path: str,
    output_path: str,
    reference_doc: str = None
) -> bool:
    """
    Convert markdown to DOCX using pandoc.

    Returns True if successful.
    """
    cmd = [
        'pandoc',
        input_path,
        '-f', 'markdown+smart',
        '-t', 'docx',
        '-o', output_path,
        '--standalone',
        '--highlight-style=tango',
        '--toc',
        '--toc-depth=2'
    ]

    if reference_doc and Path(reference_doc).exists():
        cmd.extend(['--reference-doc', reference_doc])

    try:
        result = subprocess.run(cmd, capture_output=True, text=True)
        if result.returncode != 0:
            print(f"Pandoc error: {result.stderr}")
            return False
        return True
    except FileNotFoundError:
        print("Error: pandoc not found. Please install pandoc.")
        return False


def convert_markdown_to_epub(input_path: str, output_path: str) -> bool:
    """
    Convert markdown to EPUB using pandoc.

    Returns True if successful.
    """
    cmd = [
        'pandoc',
        input_path,
        '-f', 'markdown+smart',
        '-t', 'epub3',
        '-o', output_path,
        '--standalone',
        '--toc',
        '--toc-depth=2'
    ]

    try:
        result = subprocess.run(cmd, capture_output=True, text=True)
        if result.returncode != 0:
            print(f"Pandoc error: {result.stderr}")
            return False
        return True
    except FileNotFoundError:
        print("Error: pandoc not found. Please install pandoc.")
        return False


def main():
    parser = argparse.ArgumentParser(
        description='Convert and format documents for the Research Development Framework'
    )
    parser.add_argument(
        'input',
        type=str,
        help='Input file (markdown or docx)'
    )
    parser.add_argument(
        '-o', '--output',
        type=str,
        help='Output file path'
    )
    parser.add_argument(
        '--format-only',
        action='store_true',
        help='Only format existing DOCX (no conversion)'
    )
    parser.add_argument(
        '--reference-doc',
        type=str,
        help='Reference DOCX for styling'
    )
    parser.add_argument(
        '--epub',
        action='store_true',
        help='Also generate EPUB format'
    )

    args = parser.parse_args()

    input_path = Path(args.input)

    if not input_path.exists():
        print(f"Error: File not found: {input_path}")
        sys.exit(1)

    # Determine output path
    if args.output:
        output_path = Path(args.output)
    else:
        output_path = input_path.with_suffix('.docx')

    # Handle markdown input
    if input_path.suffix.lower() == '.md' and not args.format_only:
        print(f"Converting: {input_path} -> {output_path}")

        if not convert_markdown_to_docx(str(input_path), str(output_path), args.reference_doc):
            sys.exit(1)

        print("Conversion complete!")

        # Generate EPUB if requested
        if args.epub:
            epub_path = output_path.with_suffix('.epub')
            print(f"\nGenerating EPUB: {epub_path}")
            convert_markdown_to_epub(str(input_path), str(epub_path))

    # Format the DOCX
    if HAS_DOCX:
        print(f"\nFormatting: {output_path}")
        formatter = DocxFormatter(str(output_path))
        formatter.format_all()
        formatter.save()
    else:
        print("\nSkipping formatting (python-docx not installed)")

    print("\nDone!")


if __name__ == '__main__':
    # Need to import this for page breaks
    try:
        import docx.enum.text
    except ImportError:
        pass

    main()
