#!/usr/bin/env python3
"""
RDF Entity Management CLI

Provides commands for managing knowledge graph entities:
- Find duplicate entities
- Merge entities (with optional queue review)
- Create/manage aliases
- Entity health check
- Show entity details

Usage:
    rdf entity duplicates [--threshold 80] [--format json]
    rdf entity merge <primary_id> <secondary_id> [--queue]
    rdf entity alias <entity_id> <alias_name>
    rdf entity show <entity_id>
    rdf entity health [--format json]
    rdf entity list [--limit 50] [--sort name]
"""

import argparse
import json
import sys
from pathlib import Path
from typing import Dict, Any, List, Optional

# Setup path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
sys.path.insert(0, str(Path(__file__).parent))

from pipeline.cli_utils import success_response, error_response, ErrorCodes, StructuredResponse

# Import existing merge_concepts functionality
try:
    from merge_concepts import (
        get_all_concepts,
        get_concept_by_name,
        get_concept_by_id,
        get_concept_documents,
        find_similar_concepts,
        suggest_primary_concept,
        merge_concepts,
        FUZZY_AVAILABLE
    )
    MERGE_AVAILABLE = True
except ImportError as e:
    MERGE_AVAILABLE = False
    MERGE_ERROR = str(e)
    FUZZY_AVAILABLE = False

# Import review queue for queue-based merges
try:
    from review_queue import (
        ReviewQueue, ConceptMergeQueue, QueueType,
        ConceptMergeReviewItem, ReviewPriority
    )
    QUEUE_AVAILABLE = True
except ImportError:
    QUEUE_AVAILABLE = False


def cmd_duplicates(args) -> StructuredResponse:
    """Find potential duplicate entities using fuzzy matching."""
    if not MERGE_AVAILABLE:
        return error_response(
            ErrorCodes.CONFIGURATION_REQUIRED,
            f"Entity management not available: {MERGE_ERROR}"
        )

    if not FUZZY_AVAILABLE:
        return error_response(
            ErrorCodes.CONFIGURATION_REQUIRED,
            "Fuzzy matching requires 'rapidfuzz' package. Install with: pip install rapidfuzz"
        )

    threshold = args.threshold
    similar = find_similar_concepts(threshold=threshold)

    # Format results
    duplicates = []
    for c1, c2, score in similar:
        suggested = suggest_primary_concept([c1, c2])
        other = c2 if suggested == c1 else c1

        duplicates.append({
            'similarity_score': score,
            'entity_a': {
                'id': c1['concept_id'],
                'name': c1['name'],
                'category': c1.get('category'),
                'document_count': c1.get('document_count', 0),
                'chunk_count': c1.get('chunk_count', 0),
            },
            'entity_b': {
                'id': c2['concept_id'],
                'name': c2['name'],
                'category': c2.get('category'),
                'document_count': c2.get('document_count', 0),
                'chunk_count': c2.get('chunk_count', 0),
            },
            'suggested_primary': suggested['name'],
            'suggested_primary_id': suggested['concept_id'],
            'suggested_merge': other['name'],
            'suggested_merge_id': other['concept_id'],
        })

    return success_response(
        f"Found {len(duplicates)} potential duplicate pairs",
        data={
            'threshold': threshold,
            'duplicate_count': len(duplicates),
            'duplicates': duplicates
        }
    )


def cmd_merge(args) -> StructuredResponse:
    """Merge entities (optionally through review queue)."""
    if not MERGE_AVAILABLE:
        return error_response(
            ErrorCodes.CONFIGURATION_REQUIRED,
            f"Entity management not available: {MERGE_ERROR}"
        )

    primary_id = args.primary_id
    secondary_id = args.secondary_id

    # Get entity info
    primary = get_concept_by_id(primary_id)
    secondary = get_concept_by_id(secondary_id)

    if not primary:
        return error_response(
            ErrorCodes.NOT_FOUND,
            f"Primary entity not found: {primary_id}"
        )

    if not secondary:
        return error_response(
            ErrorCodes.NOT_FOUND,
            f"Secondary entity not found: {secondary_id}"
        )

    if primary_id == secondary_id:
        return error_response(
            ErrorCodes.INVALID_INPUT,
            "Cannot merge entity into itself"
        )

    # Queue mode - add to review queue instead of immediate merge
    if args.queue:
        if not QUEUE_AVAILABLE:
            return error_response(
                ErrorCodes.CONFIGURATION_REQUIRED,
                "Review queue not available"
            )

        base_dir = Path(__file__).parent.parent / "review_queues"
        queue = ConceptMergeQueue(base_dir)

        # Calculate similarity for confidence
        similarity = 0.0
        if FUZZY_AVAILABLE:
            from rapidfuzz import fuzz
            similarity = fuzz.ratio(primary['name'], secondary['name']) / 100.0

        item_id, auto_approved = queue.suggest_merge(
            source_concept=secondary['name'],
            target_concept=primary['name'],
            source_id=secondary_id,
            target_id=primary_id,
            similarity=similarity,
            reason=args.reason or "manual_request",
            affected_docs=secondary.get('document_count', 0)
        )

        return success_response(
            f"Merge request queued for review",
            data={
                'item_id': item_id,
                'auto_approved': auto_approved,
                'primary': primary['name'],
                'secondary': secondary['name'],
                'status': 'auto_approved' if auto_approved else 'pending_review'
            }
        )

    # Direct merge mode
    if args.dry_run:
        docs = get_concept_documents(secondary_id)
        return success_response(
            f"[DRY RUN] Would merge '{secondary['name']}' into '{primary['name']}'",
            data={
                'dry_run': True,
                'primary': primary['name'],
                'secondary': secondary['name'],
                'documents_to_relink': len(docs),
            }
        )

    # Execute merge
    result = merge_concepts(primary_id, [secondary_id], dry_run=False)

    if result['status'] == 'success':
        return success_response(
            f"Merged '{secondary['name']}' into '{primary['name']}'",
            data=result
        )
    else:
        return error_response(
            ErrorCodes.UNKNOWN_ERROR,
            result.get('error', 'Merge failed')
        )


def cmd_alias(args) -> StructuredResponse:
    """Add an alias to an entity."""
    if not MERGE_AVAILABLE:
        return error_response(
            ErrorCodes.CONFIGURATION_REQUIRED,
            f"Entity management not available: {MERGE_ERROR}"
        )

    entity_id = args.entity_id
    alias_name = args.alias_name

    entity = get_concept_by_id(entity_id)
    if not entity:
        return error_response(
            ErrorCodes.NOT_FOUND,
            f"Entity not found: {entity_id}"
        )

    # Check if alias already exists
    existing_aliases = entity.get('aliases') or []
    if alias_name in existing_aliases:
        return success_response(
            f"Alias '{alias_name}' already exists for '{entity['name']}'",
            data={
                'entity_id': entity_id,
                'entity_name': entity['name'],
                'alias': alias_name,
                'status': 'already_exists'
            }
        )

    # Add alias
    try:
        from db_utils import get_db_connection
        with get_db_connection() as conn:
            with conn.cursor() as cur:
                cur.execute("""
                    UPDATE concepts
                    SET aliases = array_append(COALESCE(aliases, ARRAY[]::text[]), %s)
                    WHERE concept_id = %s
                """, (alias_name, entity_id))
                conn.commit()

        return success_response(
            f"Added alias '{alias_name}' to '{entity['name']}'",
            data={
                'entity_id': entity_id,
                'entity_name': entity['name'],
                'alias_added': alias_name,
                'all_aliases': existing_aliases + [alias_name]
            }
        )
    except Exception as e:
        return error_response(
            ErrorCodes.UNKNOWN_ERROR,
            f"Failed to add alias: {e}"
        )


def cmd_show(args) -> StructuredResponse:
    """Show entity details."""
    if not MERGE_AVAILABLE:
        return error_response(
            ErrorCodes.CONFIGURATION_REQUIRED,
            f"Entity management not available: {MERGE_ERROR}"
        )

    entity_id = args.entity_id

    # Try as ID first, then as name
    try:
        entity = get_concept_by_id(int(entity_id))
    except (ValueError, TypeError):
        entity = None

    if not entity:
        entity = get_concept_by_name(str(entity_id))

    if not entity:
        return error_response(
            ErrorCodes.NOT_FOUND,
            f"Entity not found: {entity_id}"
        )

    # Get linked documents
    docs = get_concept_documents(entity['concept_id'])

    return success_response(
        f"Entity: {entity['name']}",
        data={
            'entity': {
                'id': entity['concept_id'],
                'name': entity['name'],
                'category': entity.get('category'),
                'description': entity.get('description'),
                'aliases': entity.get('aliases') or [],
                'created_at': str(entity.get('created_at', '')),
            },
            'statistics': {
                'document_count': len(docs),
                'total_mentions': sum(d.get('mention_count', 0) for d in docs),
            },
            'linked_documents': docs[:20],
            'has_more_documents': len(docs) > 20,
        }
    )


def cmd_health(args) -> StructuredResponse:
    """Entity health check."""
    if not MERGE_AVAILABLE:
        return error_response(
            ErrorCodes.CONFIGURATION_REQUIRED,
            f"Entity management not available: {MERGE_ERROR}"
        )

    health = {
        'total_entities': 0,
        'entities_with_aliases': 0,
        'entities_with_category': 0,
        'entities_with_description': 0,
        'orphan_entities': 0,  # No document or chunk links
        'potential_duplicates': 0,
        'issues': [],
        'recommendations': []
    }

    concepts = get_all_concepts()
    health['total_entities'] = len(concepts)

    orphans = []
    for c in concepts:
        if c.get('aliases'):
            health['entities_with_aliases'] += 1
        if c.get('category'):
            health['entities_with_category'] += 1
        if c.get('description'):
            health['entities_with_description'] += 1

        # Check for orphans
        if c.get('document_count', 0) == 0 and c.get('chunk_count', 0) == 0:
            health['orphan_entities'] += 1
            orphans.append({'id': c['concept_id'], 'name': c['name']})

    # Check for duplicates
    if FUZZY_AVAILABLE:
        similar = find_similar_concepts(threshold=80)
        health['potential_duplicates'] = len(similar)

        if similar:
            health['issues'].append({
                'type': 'potential_duplicates',
                'count': len(similar),
                'message': f"Found {len(similar)} potential duplicate entity pairs"
            })
            health['recommendations'].append(
                f"Review {len(similar)} potential duplicate entities with: rdf entity duplicates"
            )
    else:
        health['issues'].append({
            'type': 'fuzzy_unavailable',
            'message': "Cannot check for duplicates - install rapidfuzz package"
        })

    # Orphan issues
    if orphans:
        health['issues'].append({
            'type': 'orphan_entities',
            'count': len(orphans),
            'message': f"Found {len(orphans)} entities with no document/chunk links",
            'examples': orphans[:5]
        })

    # Missing category issues
    missing_category = health['total_entities'] - health['entities_with_category']
    if missing_category > 0:
        health['issues'].append({
            'type': 'missing_category',
            'count': missing_category,
            'message': f"{missing_category} entities have no category assigned"
        })

    # Overall status
    if not health['issues']:
        health['status'] = 'healthy'
    elif len(health['issues']) <= 2:
        health['status'] = 'needs_attention'
    else:
        health['status'] = 'unhealthy'

    return success_response(
        f"Entity health check: {health['status']}",
        data=health
    )


def cmd_list(args) -> StructuredResponse:
    """List all entities."""
    if not MERGE_AVAILABLE:
        return error_response(
            ErrorCodes.CONFIGURATION_REQUIRED,
            f"Entity management not available: {MERGE_ERROR}"
        )

    concepts = get_all_concepts()

    # Sort
    if args.sort == 'name':
        concepts.sort(key=lambda c: c['name'].lower())
    elif args.sort == 'docs':
        concepts.sort(key=lambda c: c.get('document_count', 0), reverse=True)
    elif args.sort == 'mentions':
        concepts.sort(key=lambda c: (c.get('total_doc_mentions', 0) or 0) + (c.get('total_chunk_mentions', 0) or 0), reverse=True)

    # Filter by category
    if args.category:
        concepts = [c for c in concepts if c.get('category') == args.category]

    # Limit
    total = len(concepts)
    if args.limit:
        concepts = concepts[:args.limit]

    # Format for output
    entities = []
    for c in concepts:
        entities.append({
            'id': c['concept_id'],
            'name': c['name'],
            'category': c.get('category'),
            'document_count': c.get('document_count', 0),
            'chunk_count': c.get('chunk_count', 0),
            'total_mentions': (c.get('total_doc_mentions', 0) or 0) + (c.get('total_chunk_mentions', 0) or 0),
            'aliases': c.get('aliases') or [],
        })

    return success_response(
        f"Listed {len(entities)} of {total} entities",
        data={
            'total_entities': total,
            'returned': len(entities),
            'sort': args.sort,
            'category_filter': args.category,
            'entities': entities
        }
    )


def main():
    parser = argparse.ArgumentParser(
        description="RDF Entity Management",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Find potential duplicate entities
  rdf entity duplicates --threshold 80 --format json

  # Merge two entities directly
  rdf entity merge 123 456

  # Queue merge for review
  rdf entity merge 123 456 --queue

  # Add alias to entity
  rdf entity alias 123 "R. Steiner"

  # Show entity details
  rdf entity show 123

  # Entity health check
  rdf entity health --format json

  # List all entities
  rdf entity list --sort docs --limit 50
        """
    )

    subparsers = parser.add_subparsers(dest="command", help="Commands")

    # duplicates command
    dup_parser = subparsers.add_parser("duplicates", help="Find potential duplicate entities")
    dup_parser.add_argument("--threshold", "-t", type=int, default=80,
                           help="Similarity threshold (0-100, default: 80)")
    dup_parser.add_argument("--format", "-f", choices=["json", "text"], default="json")

    # merge command
    merge_parser = subparsers.add_parser("merge", help="Merge entities")
    merge_parser.add_argument("primary_id", type=int, help="Primary entity ID (to keep)")
    merge_parser.add_argument("secondary_id", type=int, help="Secondary entity ID (to merge)")
    merge_parser.add_argument("--queue", "-q", action="store_true",
                             help="Add to review queue instead of immediate merge")
    merge_parser.add_argument("--reason", help="Reason for merge (for queue)")
    merge_parser.add_argument("--dry-run", "-n", action="store_true",
                             help="Preview without executing")
    merge_parser.add_argument("--format", "-f", choices=["json", "text"], default="json")

    # alias command
    alias_parser = subparsers.add_parser("alias", help="Add alias to entity")
    alias_parser.add_argument("entity_id", type=int, help="Entity ID")
    alias_parser.add_argument("alias_name", help="Alias to add")
    alias_parser.add_argument("--format", "-f", choices=["json", "text"], default="json")

    # show command
    show_parser = subparsers.add_parser("show", help="Show entity details")
    show_parser.add_argument("entity_id", help="Entity ID or name")
    show_parser.add_argument("--format", "-f", choices=["json", "text"], default="json")

    # health command
    health_parser = subparsers.add_parser("health", help="Entity health check")
    health_parser.add_argument("--format", "-f", choices=["json", "text"], default="json")

    # list command
    list_parser = subparsers.add_parser("list", help="List all entities")
    list_parser.add_argument("--sort", "-s", choices=["name", "docs", "mentions"], default="name")
    list_parser.add_argument("--category", "-c", help="Filter by category")
    list_parser.add_argument("--limit", "-l", type=int, default=50, help="Limit results")
    list_parser.add_argument("--format", "-f", choices=["json", "text"], default="json")

    args = parser.parse_args()

    # Dispatch to command
    result = None
    if args.command == "duplicates":
        result = cmd_duplicates(args)
    elif args.command == "merge":
        result = cmd_merge(args)
    elif args.command == "alias":
        result = cmd_alias(args)
    elif args.command == "show":
        result = cmd_show(args)
    elif args.command == "health":
        result = cmd_health(args)
    elif args.command == "list":
        result = cmd_list(args)
    else:
        parser.print_help()
        return 0

    # Output formatting
    if hasattr(args, 'format') and args.format == "json":
        result.print_json()
    else:
        # Text format
        if result.status == 'success':
            print(result.message)
            data = result.data or {}

            if args.command == "duplicates":
                dups = data.get('duplicates', [])
                if dups:
                    print(f"\n{'Score':>5} {'Entity A':<35} {'Entity B':<35}")
                    print("-" * 80)
                    for d in dups:
                        print(f"{d['similarity_score']:>5}% {d['entity_a']['name'][:33]:<35} {d['entity_b']['name'][:33]:<35}")
                    print(f"\n* Suggested primary shown first")
                else:
                    print("No potential duplicates found.")

            elif args.command == "list":
                entities = data.get('entities', [])
                print(f"\n{'ID':>6} {'Name':<40} {'Cat':<12} {'Docs':>5}")
                print("-" * 70)
                for e in entities:
                    cat = (e.get('category') or '-')[:12]
                    name = e['name'][:38]
                    print(f"{e['id']:>6} {name:<40} {cat:<12} {e['document_count']:>5}")

            elif args.command == "health":
                print(f"\nStatus: {data.get('status', 'unknown').upper()}")
                print(f"Total entities: {data.get('total_entities', 0)}")
                print(f"Potential duplicates: {data.get('potential_duplicates', 0)}")
                print(f"Orphan entities: {data.get('orphan_entities', 0)}")
                if data.get('issues'):
                    print("\nIssues:")
                    for issue in data['issues']:
                        print(f"  - {issue.get('message')}")

            elif args.command == "show":
                e = data.get('entity', {})
                print(f"\nID: {e.get('id')}")
                print(f"Name: {e.get('name')}")
                print(f"Category: {e.get('category') or '-'}")
                print(f"Description: {e.get('description') or '-'}")
                print(f"Aliases: {', '.join(e.get('aliases', [])) or '-'}")
                stats = data.get('statistics', {})
                print(f"Documents: {stats.get('document_count', 0)}")
                print(f"Total mentions: {stats.get('total_mentions', 0)}")

        else:
            print(f"Error: {result.message}")
            return 1

    return 0


if __name__ == "__main__":
    sys.exit(main())
