#!/usr/bin/env python3
"""
AEI Photo Listing -Remediation Script

Fixes existing data issues on the remote server (18.225.0.90):
  1. Moves misplaced photos from 2025 Customers/ to 2026 Customers/ (premeasure year bug)
  2. Backfills folder_path in meter_files for rows with NULL
  3. Regenerates missing WebP files from originals in scheduler/uploads/

Usage:
  python3.6 remediate_photos.py --dry-run          # Preview changes (default)
  python3.6 remediate_photos.py --execute           # Apply changes
  python3.6 remediate_photos.py --execute --step 2  # Run only step 2

Must be run on the remote server (18.225.0.90) as a user with access to
/mnt/dropbox/ and the mandhdesign_schedular database.
"""

import argparse
import os
import re
import shutil
import subprocess
import sys

try:
    import pymysql
    pymysql.install_as_MySQLdb()
    import pymysql as MySQLdb
except ImportError:
    print("ERROR: pip3.6 install pymysql")
    sys.exit(1)

# ── Database config ──────────────────────────────────────────────────
DB_HOST = "localhost"
DB_USER = "schedular"
DB_PASS = "M1gif9!6"
DB_NAME = "mandhdesign_schedular"

# ── Paths ────────────────────────────────────────────────────────────
DROPBOX_BASE = "/mnt/dropbox"
SCHEDULER_UPLOADS = "/var/www/vhosts/aeihawaii.com/httpdocs/scheduler/uploads"
GENERATE_WEBP = "/var/www/vhosts/aeihawaii.com/httpdocs/photoapi/generate_webp.py"
PYTHON_PATH = "/usr/local/bin/python3.6"

# Survey job types
SURVEY_TYPES = {"PM", "WM", "AS", "RPM", "GCPM"}


def get_db():
    return MySQLdb.connect(
        host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME,
        charset="utf8"
    )


# ── Step 1: Move misplaced photos ────────────────────────────────────

def step1_move_misplaced(execute=False):
    """Find Survey/Installation subfolders under 2025 Customers/ that have
    a 2026 date in their name, and move them to 2026 Customers/."""
    print("\n" + "=" * 60)
    print("  STEP 1: Move misplaced 2026 photos from 2025 folder")
    print("=" * 60)

    source_base = os.path.join(DROPBOX_BASE, "2025 Customers")
    dest_base = os.path.join(DROPBOX_BASE, "2026 Customers")

    if not os.path.isdir(source_base):
        print(f"  Source not found: {source_base}")
        return 0

    # Date pattern: folders end with MM-DD-YYYY where YYYY=2026
    date_2026 = re.compile(r'\d{2}-\d{2}-2026$')

    moved = 0
    # Walk: 2025 Customers / {Letter} / {Customer} / {Survey|Installation} / {subfolder}
    for letter in sorted(os.listdir(source_base)):
        letter_dir = os.path.join(source_base, letter)
        if not os.path.isdir(letter_dir) or len(letter) != 1:
            continue

        for customer in sorted(os.listdir(letter_dir)):
            customer_dir = os.path.join(letter_dir, customer)
            if not os.path.isdir(customer_dir):
                continue

            for photo_type in ("Survey", "Installation"):
                type_dir = os.path.join(customer_dir, photo_type)
                if not os.path.isdir(type_dir):
                    continue

                for subfolder in sorted(os.listdir(type_dir)):
                    subfolder_path = os.path.join(type_dir, subfolder)
                    if not os.path.isdir(subfolder_path):
                        continue

                    if date_2026.search(subfolder):
                        # Build destination path under 2026 Customers
                        dest_path = os.path.join(
                            dest_base, letter, customer, photo_type, subfolder
                        )

                        file_count = sum(
                            1 for f in os.listdir(subfolder_path)
                            if os.path.isfile(os.path.join(subfolder_path, f))
                        )

                        print(f"  MOVE: {subfolder_path}")
                        print(f"    -> {dest_path}  ({file_count} files)")

                        if execute:
                            dest_parent = os.path.dirname(dest_path)
                            if not os.path.isdir(dest_parent):
                                os.makedirs(dest_parent, 0o777)
                            if os.path.isdir(dest_path):
                                # Merge: copy files into existing dest
                                for f in os.listdir(subfolder_path):
                                    src_f = os.path.join(subfolder_path, f)
                                    dst_f = os.path.join(dest_path, f)
                                    if os.path.isfile(src_f):
                                        shutil.copy2(src_f, dst_f)
                                    elif os.path.isdir(src_f):
                                        if not os.path.isdir(dst_f):
                                            shutil.copytree(src_f, dst_f)
                                shutil.rmtree(subfolder_path)
                            else:
                                shutil.move(subfolder_path, dest_path)

                        moved += 1

    print(f"\n  Total folders to move: {moved}")
    return moved


# ── Step 2: Backfill folder_path ─────────────────────────────────────

def step2_backfill_folder_path(execute=False):
    """Fill in NULL folder_path values in meter_files by reconstructing
    the path from jobs/customers/job_types data. Only processes 2025+ jobs."""
    print("\n" + "=" * 60)
    print("  STEP 2: Backfill folder_path in meter_files (2025+ only)")
    print("=" * 60)

    db = get_db()
    cur = db.cursor()

    # Only backfill 2025+ rows (folder_path feature didn't exist before)
    cur.execute("""
        SELECT mf.id, mf.job_id, cs.first_name, cs.last_name,
               js.job_date, jt.intials
        FROM meter_files mf
        JOIN jobs js ON mf.job_id = js.job_pid
        JOIN customers cs ON js.customer_id = cs.id
        JOIN job_types jt ON js.job_type_id = jt.id
        WHERE mf.file_type = 99
          AND (mf.folder_path IS NULL OR mf.folder_path = '')
          AND js.job_date >= '2025-01-01'
        ORDER BY mf.id
    """)

    rows = cur.fetchall()
    print(f"  Found {len(rows)} rows with NULL folder_path (2025+)")

    updated = 0
    skipped = 0
    for row in rows:
        mf_id, job_pid, first_name, last_name, job_date, initials = row
        if not job_date or not last_name:
            skipped += 1
            continue

        # Handle both datetime objects and strings from pymysql
        if hasattr(job_date, 'strftime'):
            job_year = job_date.strftime("%Y")
            job_date_str = job_date.strftime("%m-%d-%Y")
        else:
            # String format: YYYY-MM-DD
            from datetime import datetime
            dt = datetime.strptime(str(job_date), "%Y-%m-%d")
            job_year = dt.strftime("%Y")
            job_date_str = dt.strftime("%m-%d-%Y")

        customer_name = last_name + ", " + first_name
        index_letter = last_name[0].upper()

        if initials in SURVEY_TYPES:
            photo_type_dir = "Survey"
            photo_s = "S"
        else:
            photo_type_dir = "Installation"
            photo_s = "I"

        folder_path = os.path.join(
            DROPBOX_BASE,
            job_year + " Customers",
            index_letter,
            customer_name,
            photo_type_dir,
            customer_name + ", " + initials + "-" + photo_s + ", " + job_date_str
        ) + "/"

        exists = os.path.isdir(folder_path)

        print(f"  mf.id={mf_id} job_pid={job_pid}: {folder_path}  exists={exists}")

        if execute and exists:
            cur.execute(
                "UPDATE meter_files SET folder_path = %s WHERE id = %s",
                (folder_path, mf_id)
            )
            updated += 1

    if execute:
        db.commit()

    cur.close()
    db.close()

    print(f"\n  Total candidates: {len(rows)}, skipped: {skipped}, updated: {updated}")
    return updated


# ── Step 3: Regenerate missing WebP ──────────────────────────────────

def step3_regenerate_webp(execute=False):
    """For meter_files rows where the WebP file is missing from disk,
    regenerate from the original JPEG in scheduler/uploads/."""
    print("\n" + "=" * 60)
    print("  STEP 3: Regenerate missing WebP files")
    print("=" * 60)

    if not os.path.isfile(GENERATE_WEBP):
        print(f"  ERROR: generate_webp.py not found at {GENERATE_WEBP}")
        return 0

    db = get_db()
    cur = db.cursor()

    # Get all photo meter_files rows with a webpfilename and folder_path
    cur.execute("""
        SELECT mf.id, mf.unique_filename, mf.webpfilename, mf.folder_path
        FROM meter_files mf
        WHERE mf.file_type = 99
          AND mf.webpfilename IS NOT NULL
          AND mf.webpfilename != ''
          AND mf.folder_path IS NOT NULL
          AND mf.folder_path != ''
        ORDER BY mf.id
    """)

    rows = cur.fetchall()
    print(f"  Checking {len(rows)} meter_files rows for missing WebP...")

    regenerated = 0
    missing = 0

    for row in rows:
        mf_id, unique_filename, webpfilename, folder_path = row

        # Expected WebP location
        webp_dir = os.path.join(folder_path.rstrip("/"), "webp")
        webp_path = os.path.join(webp_dir, webpfilename)

        if os.path.isfile(webp_path):
            continue  # WebP exists, skip

        missing += 1

        # Source: original file in scheduler/uploads/
        source_path = os.path.join(SCHEDULER_UPLOADS, unique_filename)
        if not os.path.isfile(source_path):
            print(f"  SKIP mf.id={mf_id}: source missing: {source_path}")
            continue

        # Also copy to scheduler/uploads/webp/
        scheduler_webp = os.path.join(SCHEDULER_UPLOADS, "webp", webpfilename)

        print(f"  REGEN mf.id={mf_id}: {webpfilename}")
        print(f"    source: {source_path}")
        print(f"    dest:   {webp_path}")

        if execute:
            # Ensure webp/ directory exists
            if not os.path.isdir(webp_dir):
                os.makedirs(webp_dir, 0o777)

            # Generate WebP using the same script the upload pipeline uses
            cmd = [
                PYTHON_PATH, GENERATE_WEBP,
                source_path, webp_path, "80"
            ]
            try:
                subprocess.check_call(cmd)
                # Copy to scheduler/uploads/webp/ too
                if os.path.isfile(webp_path):
                    shutil.copy2(webp_path, scheduler_webp)
                    regenerated += 1
            except subprocess.CalledProcessError as e:
                print(f"    ERROR: generate_webp.py failed: {e}")

    cur.close()
    db.close()

    print(f"\n  Missing WebP files: {missing}, regenerated: {regenerated}")
    return regenerated


# ── Main ─────────────────────────────────────────────────────────────

def main():
    parser = argparse.ArgumentParser(
        description="AEI Photo Listing -Remediation Script"
    )
    mode = parser.add_mutually_exclusive_group()
    mode.add_argument("--dry-run", action="store_true", default=True,
                       help="Preview changes without modifying anything (default)")
    mode.add_argument("--execute", action="store_true",
                       help="Apply changes for real")
    parser.add_argument("--step", type=int, choices=[1, 2, 3],
                        help="Run only a specific step (1, 2, or 3)")
    args = parser.parse_args()

    execute = args.execute
    mode_label = "EXECUTE" if execute else "DRY RUN"

    print(f"\nAEI Photo Remediation -{mode_label}")
    print(f"Server: {os.uname()[1]}")
    print(f"Dropbox: {DROPBOX_BASE}")

    if not execute:
        print("\n  *** DRY RUN -no changes will be made ***")
        print("  *** Use --execute to apply changes ***\n")

    steps = [args.step] if args.step else [1, 2, 3]

    if 1 in steps:
        step1_move_misplaced(execute)
    if 2 in steps:
        step2_backfill_folder_path(execute)
    if 3 in steps:
        step3_regenerate_webp(execute)

    print("\n" + "=" * 60)
    print(f"  Done ({mode_label})")
    print("=" * 60 + "\n")


if __name__ == "__main__":
    main()
