#!/usr/bin/env python3
"""
Backfill Thumbnails for AEI Photo System (PHOTO-017)

Batch processes all existing photos in meter_files to generate:
  - 200x200 thumbnail WebP (Q70) in uploads/thumbs/
  - 1024px max standard WebP (Q80) in uploads/webp/
  - 2048px max hi-res WebP (Q82) in uploads/hi-res/

Two-pass operation:
  Pass 1 (default): Generate missing derivatives
  Pass 2 (--verify): Verify all derivatives exist, log any issues

Resumable: skips files that already exist.
Python 3.6 compatible.

Usage:
    python3.6 backfill_thumbnails.py [--workers N] [--batch N] [--dry-run] [--limit N]
    python3.6 backfill_thumbnails.py --verify

Run as:
    nohup /usr/local/bin/python3.6 backfill_thumbnails.py --workers 4 > /tmp/backfill.log 2>&1 &

Author: AEI Photo System
Version: 2.0 (PHOTO-017 3-tier derivatives)
Date: 2026-02-19
"""

import os
import sys
import time
import json
import hashlib
import argparse
from multiprocessing import Pool
from PIL import Image, ImageOps

# Python 3.6 compatibility: no f-strings used

# Configuration
# Legacy JPEGs remain in UPLOADS_DIR — untouched by backfill.
# Backfill generates all 3 derivative tiers: thumb, standard, hi-res.
UPLOADS_DIR = '/var/www/vhosts/aeihawaii.com/httpdocs/scheduler/uploads'
WEBP_DIR = os.path.join(UPLOADS_DIR, 'webp')      # standard (1024px max) — primary serving tier
THUMBS_DIR = os.path.join(UPLOADS_DIR, 'thumbs')   # 200x200 thumbnails
HIRES_DIR = os.path.join(UPLOADS_DIR, 'hi-res')    # hi-res (2048px max) — future downloads/print

THUMBNAIL_SIZE = (200, 200)
MAX_LARGE = 1024
MAX_HIRES = 2048
WEBP_THUMB_QUALITY = 70
WEBP_LARGE_QUALITY = 80
WEBP_HIRES_QUALITY = 82

DB_HOST = 'localhost'
DB_USER = 'schedular'
DB_PASS = 'M1gif9!6'
DB_NAME = 'mandhdesign_schedular'

FAILURE_LOG = '/tmp/backfill_failures.jsonl'


def get_db_connection():
    """Get a fresh MySQL connection."""
    try:
        import MySQLdb
        return MySQLdb.connect(
            host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME,
            charset='utf8'
        )
    except ImportError:
        import pymysql
        return pymysql.connect(
            host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME,
            charset='utf8'
        )


def load_image(file_path):
    """Load image and apply EXIF rotation."""
    img = Image.open(file_path)
    img = ImageOps.exif_transpose(img)
    return img


def generate_webp_name(unique_filename):
    """Generate a webpfilename for records that don't have one."""
    base = os.path.splitext(unique_filename)[0]
    random_part = hashlib.md5(unique_filename.encode()).hexdigest()[:12]
    return base + '_' + random_part + '.webp'


def log_failure(record_id, unique_filename, error_type, detail):
    """Append a structured failure line to the failure log."""
    entry = {
        'id': record_id,
        'file': unique_filename,
        'type': error_type,
        'detail': detail,
        'ts': time.strftime('%Y-%m-%d %H:%M:%S')
    }
    with open(FAILURE_LOG, 'a') as f:
        f.write(json.dumps(entry) + '\n')


def process_record(record):
    """
    Process a single meter_files record.
    record = (id, unique_filename, webpfilename)
    Returns (record_id, status, webpfilename_update_or_None, error_type, error_detail)
    """
    record_id, unique_filename, webpfilename = record

    source_path = os.path.join(UPLOADS_DIR, unique_filename)
    if not os.path.isfile(source_path):
        return (record_id, 'error', None, 'source_missing', unique_filename)

    needs_db_update = False
    created_any = False

    try:
        # Step 1: Ensure webpfilename is set (generate one if missing)
        if not webpfilename:
            webpfilename = generate_webp_name(unique_filename)
            needs_db_update = True

        # Backfill generates all 3 derivative tiers.
        # Original JPEG stays in uploads/ untouched.
        thumb_path = os.path.join(THUMBS_DIR, webpfilename)
        large_path = os.path.join(WEBP_DIR, webpfilename)
        hires_path = os.path.join(HIRES_DIR, webpfilename)

        # Check what already exists
        has_thumb = os.path.isfile(thumb_path)
        has_large = os.path.isfile(large_path)
        has_hires = os.path.isfile(hires_path)

        # Load image once (need dimensions to decide on hi-res)
        img = load_image(source_path)
        width, height = img.size
        needs_hires = width > MAX_LARGE or height > MAX_LARGE

        if has_thumb and has_large and (has_hires or not needs_hires) and not needs_db_update:
            return (record_id, 'skipped', None, None, None)

        # Generate thumbnail if missing (200x200, Q70)
        if not has_thumb:
            thumb = img.copy()
            thumb.thumbnail(THUMBNAIL_SIZE, Image.LANCZOS)
            thumb.save(thumb_path, 'WEBP', quality=WEBP_THUMB_QUALITY)
            os.chmod(thumb_path, 0o644)
            created_any = True

        # Generate standard WebP if missing (1024px max, Q80) → webp/ dir
        if not has_large:
            width, height = img.size
            if width <= MAX_LARGE and height <= MAX_LARGE:
                large = img.copy()
            else:
                if width > height:
                    new_width = MAX_LARGE
                    new_height = int(height * (MAX_LARGE / float(width)))
                else:
                    new_height = MAX_LARGE
                    new_width = int(width * (MAX_LARGE / float(height)))
                large = img.resize((new_width, new_height), Image.LANCZOS)
            large.save(large_path, 'WEBP', quality=WEBP_LARGE_QUALITY)
            os.chmod(large_path, 0o644)
            created_any = True

        # Generate hi-res WebP if missing (2048px max, Q82) → hi-res/ dir
        # Skip if source is not larger than standard tier (no point duplicating)
        if not has_hires and needs_hires:
            if width <= MAX_HIRES and height <= MAX_HIRES:
                hires = img.copy()
            else:
                if width > height:
                    new_width = MAX_HIRES
                    new_height = int(height * (MAX_HIRES / float(width)))
                else:
                    new_height = MAX_HIRES
                    new_width = int(width * (MAX_HIRES / float(height)))
                hires = img.resize((new_width, new_height), Image.LANCZOS)
            hires.save(hires_path, 'WEBP', quality=WEBP_HIRES_QUALITY)
            os.chmod(hires_path, 0o644)
            created_any = True

        update_name = webpfilename if needs_db_update else None
        status = 'processed' if created_any else 'skipped'
        return (record_id, status, update_name, None, None)

    except IOError as e:
        return (record_id, 'error', None, 'corrupt_image', str(e))
    except Exception as e:
        return (record_id, 'error', None, 'pil_error', str(e))


def run_verify(args):
    """Pass 2: Verify all derivatives exist for every meter_files record."""
    print("=== VERIFICATION PASS ===")
    db = get_db_connection()
    cursor = db.cursor()

    cursor.execute("SELECT COUNT(*) FROM meter_files WHERE file_type IN (1,2,3,12,99)")
    total = cursor.fetchone()[0]
    print("Total records: %d" % total)

    missing_source = 0
    missing_thumb = 0
    missing_large = 0
    missing_hires = 0
    no_webpname = 0
    ok = 0
    offset = 0

    while True:
        cursor.execute(
            "SELECT id, unique_filename, webpfilename "
            "FROM meter_files WHERE file_type IN (1,2,3,12,99) "
            "ORDER BY id ASC LIMIT 1000 OFFSET %s",
            (offset,)
        )
        rows = cursor.fetchall()
        if not rows:
            break
        offset += len(rows)

        for (rec_id, uname, wname) in rows:
            source = os.path.join(UPLOADS_DIR, uname)
            if not os.path.isfile(source):
                missing_source += 1
                log_failure(rec_id, uname, 'source_missing', 'Verify pass')
                continue

            if not wname:
                no_webpname += 1
                log_failure(rec_id, uname, 'no_webpfilename', 'DB record has NULL webpfilename')
                continue

            issues = []
            if not os.path.isfile(os.path.join(THUMBS_DIR, wname)):
                missing_thumb += 1
                issues.append('thumb')
            if not os.path.isfile(os.path.join(WEBP_DIR, wname)):
                missing_large += 1
                issues.append('standard(webp/)')
            # Hi-res only expected when source is larger than standard tier
            if not os.path.isfile(os.path.join(HIRES_DIR, wname)):
                try:
                    src_img = Image.open(source)
                    w, h = src_img.size
                    src_img.close()
                    if w > MAX_LARGE or h > MAX_LARGE:
                        missing_hires += 1
                        issues.append('hires(hi-res/)')
                except Exception:
                    pass  # can't determine size — skip hi-res check

            if issues:
                log_failure(rec_id, uname, 'missing_derivatives', ','.join(issues))
            else:
                ok += 1

        # Simple progress
        print("Checked: %d/%d" % (offset, total))

    print("---")
    print("VERIFICATION RESULTS:")
    print("  OK (all derivatives present): %d" % ok)
    print("  Source file missing: %d" % missing_source)
    print("  No webpfilename in DB: %d" % no_webpname)
    print("  Missing thumb (thumbs/): %d" % missing_thumb)
    print("  Missing standard (webp/): %d" % missing_large)
    print("  Missing hi-res (hi-res/): %d" % missing_hires)
    print("Failures logged to: %s" % FAILURE_LOG)

    cursor.close()
    db.close()


def run_generate(args):
    """Pass 1: Generate missing derivatives."""
    print("=== GENERATION PASS ===")
    print("Workers: %d | Batch: %d | Dry run: %s | Limit: %s" % (
        args.workers, args.batch, args.dry_run, args.limit or 'all'))
    print("Standard: %dpx Q%d | Hi-res: %dpx Q%d | Thumb: %dx%d Q%d" % (
        MAX_LARGE, WEBP_LARGE_QUALITY, MAX_HIRES, WEBP_HIRES_QUALITY,
        THUMBNAIL_SIZE[0], THUMBNAIL_SIZE[1], WEBP_THUMB_QUALITY))
    print("Failure log: %s" % FAILURE_LOG)
    print("---")

    # Ensure output directories exist
    for d in [THUMBS_DIR, WEBP_DIR, HIRES_DIR]:
        if not os.path.isdir(d):
            os.makedirs(d, mode=0o777)
            print("Created directory: %s" % d)

    db = get_db_connection()
    cursor = db.cursor()

    cursor.execute("SELECT COUNT(*) FROM meter_files WHERE file_type IN (1,2,3,12,99)")
    total = cursor.fetchone()[0]
    print("Total meter_files image records: %d" % total)

    offset = 0
    processed = 0
    skipped = 0
    errors = 0
    webp_updates = 0
    start_time = time.time()

    if args.limit > 0:
        total = min(total, args.limit)

    while True:
        fetch_limit = args.batch
        if args.limit > 0:
            remaining = args.limit - (processed + skipped + errors)
            if remaining <= 0:
                break
            fetch_limit = min(fetch_limit, remaining)

        cursor.execute(
            "SELECT id, unique_filename, webpfilename "
            "FROM meter_files WHERE file_type IN (1,2,3,12,99) "
            "ORDER BY id ASC LIMIT %s OFFSET %s",
            (fetch_limit, offset)
        )
        rows = cursor.fetchall()
        if not rows:
            break

        offset += len(rows)

        if args.dry_run:
            for row in rows:
                rec_id, uname, wname = row
                status = 'has_webp' if wname else 'needs_webp'
                source = os.path.join(UPLOADS_DIR, uname)
                exists = 'exists' if os.path.isfile(source) else 'MISSING'
                print("[DRY] id=%d unique=%s webp=%s source=%s" % (rec_id, uname, status, exists))
                skipped += 1
            continue

        # Process records
        if args.workers > 1:
            pool = Pool(processes=args.workers)
            results = pool.map(process_record, rows)
            pool.close()
            pool.join()
        else:
            results = [process_record(r) for r in rows]

        # Collect DB updates and stats
        db_updates = []
        for (rec_id, status, update_name, err_type, err_detail) in results:
            if status == 'processed':
                processed += 1
            elif status == 'skipped':
                skipped += 1
            elif status == 'error':
                errors += 1
                # Find the unique_filename for this record
                uname = ''
                for r in rows:
                    if r[0] == rec_id:
                        uname = r[1]
                        break
                log_failure(rec_id, uname, err_type, err_detail)
                print("ERROR id=%d [%s]: %s" % (rec_id, err_type, err_detail))

            if update_name:
                db_updates.append((update_name, rec_id))

        # Batch update webpfilename in DB
        if db_updates:
            update_cursor = db.cursor()
            for (wname, rec_id) in db_updates:
                update_cursor.execute(
                    "UPDATE meter_files SET webpfilename = %s WHERE id = %s",
                    (wname, rec_id)
                )
            db.commit()
            webp_updates += len(db_updates)

        elapsed = time.time() - start_time
        done = processed + skipped + errors
        rate = done / elapsed if elapsed > 0 else 0
        eta = (total - done) / rate if rate > 0 else 0
        print("Progress: %d/%d (%.1f%%) | Processed: %d | Skipped: %d | Errors: %d | DB updates: %d | %.1f rec/s | ETA: %dm" % (
            done, total, 100.0 * done / total if total > 0 else 0,
            processed, skipped, errors, webp_updates, rate, eta / 60
        ))

    elapsed = time.time() - start_time
    print("---")
    print("DONE in %.1f minutes" % (elapsed / 60))
    print("Processed: %d | Skipped: %d | Errors: %d | DB updates: %d" % (
        processed, skipped, errors, webp_updates))
    if errors > 0:
        print("Failures logged to: %s" % FAILURE_LOG)

    cursor.close()
    db.close()


def main():
    parser = argparse.ArgumentParser(description='Backfill thumbnails for existing photos')
    parser.add_argument('--workers', type=int, default=1, help='Number of parallel workers (default: 1)')
    parser.add_argument('--batch', type=int, default=500, help='DB fetch batch size (default: 500)')
    parser.add_argument('--dry-run', action='store_true', help='Show what would be done without doing it')
    parser.add_argument('--limit', type=int, default=0, help='Process only N records (0 = all)')
    parser.add_argument('--verify', action='store_true', help='Run verification pass (check all derivatives exist)')
    args = parser.parse_args()

    print("Backfill Thumbnails v2.0 - PHOTO-017 (3-tier)")

    if args.verify:
        run_verify(args)
    else:
        run_generate(args)


if __name__ == "__main__":
    main()
