#!/usr/bin/env python3
"""
DPP Permit Sync - Salesforce UI API → AEI Production DB

Syncs permit data from DPP Salesforce (honolulu.my.site.com) to the AEI
scheduler production database using Playwright browser sessions and the
Salesforce UI API via in-browser fetch().

Architecture:
  - Runs on WSL2 (Python 3.12 + Playwright + Chromium)
  - Authenticates via Playwright browser login
  - Fetches data via page.evaluate() → fetch() UI API calls
  - Writes to remote production MySQL (18.225.0.90)

Usage:
  python3 dpp_permit_sync.py                  # Sync active permits
  python3 dpp_permit_sync.py --full           # Include completed permits
  python3 dpp_permit_sync.py --dry-run        # Log only, no DB writes
  python3 dpp_permit_sync.py --permit 189630  # Sync single permit
  python3 dpp_permit_sync.py --verbose        # Detailed logging
"""

import argparse
import json
import logging
import re
import sys
import time
from datetime import datetime

import mysql.connector
from playwright.sync_api import sync_playwright

# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------

SF_LOGIN_URL = "https://honolulu.my.site.com/s/login/"
SF_HOME_URL = "https://honolulu.my.site.com/s/"
SF_API_BASE = "/services/data/v62.0/ui-api"

SF_USERNAME = "permits@aeihawaii.com"
SF_PASSWORD = "p455word"

DB_CONFIG = {
    "host": "18.225.0.90",
    "user": "AEI_User",
    "password": "P@55w02d7777",
    "database": "mandhdesign_schedular",
}

# Fields to fetch for each permit record
PERMIT_FIELDS = [
    "MUSW__Permit2__c.Name",
    "MUSW__Permit2__c.MUSW__Phase__c",
    "MUSW__Permit2__c.MUSW__Status__c",
    "MUSW__Permit2__c.MUSW__Type2__c",
    "MUSW__Permit2__c.MUSW__Work_Type2__c",
    "MUSW__Permit2__c.MUSW__Use_Class__c",
    "MUSW__Permit2__c.MUSW__Use_Type__c",
    "MUSW__Permit2__c.MUSW__Valuation__c",
    "MUSW__Permit2__c.Application_Accepted_Date__c",
    "MUSW__Permit2__c.Application_Expiration_Date__c",
    "MUSW__Permit2__c.MUSW__Issue_Date__c",
    "MUSW__Permit2__c.MUSW__DateCompleted__c",
    "MUSW__Permit2__c.MUSW__Expiration_Date__c",
    "MUSW__Permit2__c.MUSW__Total_Fees__c",
    "MUSW__Permit2__c.MUSW__Total_Payments__c",
    "MUSW__Permit2__c.MUSW__Total_Balance__c",
    "MUSW__Permit2__c.MUSW__Address__c",
    "MUSW__Permit2__c.CO_Required__c",
    "MUSW__Permit2__c.Final_CO_Issued__c",
    "MUSW__Permit2__c.MUSW__Description__c",
]

PERMIT_FIELDS_CSV = ",".join(PERMIT_FIELDS)

# Delay between API calls (seconds) to avoid rate limiting
API_DELAY = 0.2

# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------

log = logging.getLogger("dpp_sync")


def setup_logging(verbose=False):
    level = logging.DEBUG if verbose else logging.INFO
    fmt = "%(asctime)s [%(levelname)s] %(message)s"
    logging.basicConfig(level=level, format=fmt, datefmt="%Y-%m-%d %H:%M:%S")


# ---------------------------------------------------------------------------
# Database
# ---------------------------------------------------------------------------

class Database:
    def __init__(self, dry_run=False):
        self.dry_run = dry_run
        self.conn = None

    def connect(self):
        self.conn = mysql.connector.connect(**DB_CONFIG)
        log.info("Connected to production DB (%s)", DB_CONFIG["host"])

    def close(self):
        if self.conn:
            self.conn.close()

    def execute(self, sql, params=None):
        if self.dry_run:
            log.debug("[DRY RUN] SQL: %s | params: %s", sql[:200], params)
            return None
        cursor = self.conn.cursor()
        cursor.execute(sql, params)
        self.conn.commit()
        return cursor

    def fetchall(self, sql, params=None):
        cursor = self.conn.cursor(dictionary=True)
        cursor.execute(sql, params)
        return cursor.fetchall()

    def fetchone(self, sql, params=None):
        cursor = self.conn.cursor(dictionary=True)
        cursor.execute(sql, params)
        return cursor.fetchone()

    def log_action(self, permit_sync_id, action, message):
        self.execute(
            "INSERT INTO permit_sync_log (permit_sync_id, action, message) VALUES (%s, %s, %s)",
            (permit_sync_id, action, message[:2000] if message else None),
        )

    def get_aei_jobs_with_permits(self, include_completed=False):
        """Get all AEI jobs that have building permit numbers."""
        sql = """
            SELECT id, building_permit, building_permit_status
            FROM jobs
            WHERE building_permit > 0
        """
        if not include_completed:
            sql += " AND (building_permit_status IS NULL OR building_permit_status = '')"
        return self.fetchall(sql)

    def match_job_by_address(self, sf_address, sf_city=None, sf_zip=None, parcel_tmk=None):
        """Try to match an SF permit to an AEI job via address.

        Strategy:
        1. First try TMK match (exact, if TMK available)
        2. Then try normalized street address match
        Returns job_id or 0 if no match.
        """
        # Strategy 1: TMK match (most reliable when available)
        if parcel_tmk:
            rows = self.fetchall(
                """SELECT j.id as job_id, c.address, c.cust_tmk
                   FROM jobs j
                   JOIN customers c ON j.customer_id = c.id
                   WHERE c.cust_tmk = %s AND j.building_permit > 0
                   ORDER BY j.id DESC LIMIT 1""",
                (parcel_tmk,),
            )
            if rows:
                log.debug("  TMK match: %s → job %d", parcel_tmk, rows[0]["job_id"])
                return rows[0]["job_id"]

        # Strategy 2: Normalized street address match
        if not sf_address:
            return 0

        # Extract street number for initial DB filter (fast)
        street_num = extract_street_number(sf_address)
        if not street_num:
            return 0

        norm_sf = normalize_address(sf_address)
        if not norm_sf:
            return 0

        # Get candidate jobs whose address starts with the same street number
        rows = self.fetchall(
            """SELECT j.id as job_id, c.address, c.city, c.zip_code
               FROM jobs j
               JOIN customers c ON j.customer_id = c.id
               WHERE j.building_permit > 0
                 AND c.address LIKE %s
               ORDER BY j.id DESC""",
            (street_num + "%",),
        )

        best_job = 0
        best_score = 0

        for row in rows:
            norm_aei = normalize_address(row["address"])
            if not norm_aei:
                continue

            # Exact normalized match
            if norm_sf == norm_aei:
                log.debug("  Address exact match: '%s' → job %d", row["address"], row["job_id"])
                return row["job_id"]

            # Partial match: same street number + street name starts the same
            sf_parts = norm_sf.split()
            aei_parts = norm_aei.split()
            if len(sf_parts) >= 2 and len(aei_parts) >= 2:
                # Match street number exactly and first word of street name
                if sf_parts[0] == aei_parts[0] and sf_parts[1] == aei_parts[1]:
                    score = 2
                    # Bonus for zip match
                    if sf_zip and row.get("zip_code") and sf_zip == str(row["zip_code"]).strip():
                        score += 1
                    if score > best_score:
                        best_score = score
                        best_job = row["job_id"]

        if best_job:
            log.debug("  Address partial match (score %d) → job %d", best_score, best_job)
        return best_job

    def get_existing_sync(self, sf_record_id):
        """Get existing permit_sync record by SF record ID."""
        return self.fetchone(
            "SELECT * FROM permit_sync WHERE sf_record_id = %s", (sf_record_id,)
        )

    def upsert_permit(self, data):
        """Insert or update a permit_sync record. Returns (permit_sync_id, changed)."""
        existing = self.get_existing_sync(data["sf_record_id"])
        now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        if existing:
            # Check for changes
            changed = False
            for key in ["phase", "status", "total_fees", "total_payments", "total_balance",
                        "issue_date", "completion_date", "expiration_date"]:
                old_val = existing.get(key)
                new_val = data.get(key)
                # Normalize for comparison
                if old_val is not None:
                    old_val = str(old_val)
                if new_val is not None:
                    new_val = str(new_val)
                if old_val != new_val:
                    changed = True
                    log.info(
                        "  Change detected: %s [%s] -> [%s]",
                        key, old_val, new_val
                    )

            # Always update
            self.execute(
                """UPDATE permit_sync SET
                    phase=%s, status=%s, permit_type=%s, work_type=%s,
                    use_class=%s, use_type=%s,
                    application_accepted=%s, application_expiration=%s,
                    issue_date=%s, completion_date=%s, expiration_date=%s,
                    total_fees=%s, total_payments=%s, total_balance=%s,
                    valuation=%s, address_display=%s, parcel_tmk=%s,
                    co_required=%s, co_issued=%s,
                    job_id=%s,
                    last_synced=%s, sync_status='ok', sync_error=NULL,
                    updated=%s, raw_json=%s
                WHERE id=%s""",
                (
                    data.get("phase"), data.get("status"),
                    data.get("permit_type"), data.get("work_type"),
                    data.get("use_class"), data.get("use_type"),
                    data.get("application_accepted"), data.get("application_expiration"),
                    data.get("issue_date"), data.get("completion_date"),
                    data.get("expiration_date"),
                    data.get("total_fees"), data.get("total_payments"),
                    data.get("total_balance"), data.get("valuation"),
                    data.get("address_display"), data.get("parcel_tmk"),
                    data.get("co_required"), data.get("co_issued"),
                    data.get("job_id"),
                    now, now, data.get("raw_json"),
                    existing["id"],
                ),
            )
            return existing["id"], changed
        else:
            # Insert new
            self.execute(
                """INSERT INTO permit_sync (
                    job_id, permit_number, sf_record_id, sf_permit_name,
                    phase, status, permit_type, work_type, use_class, use_type,
                    application_accepted, application_expiration,
                    issue_date, completion_date, expiration_date,
                    total_fees, total_payments, total_balance, valuation,
                    address_display, parcel_tmk,
                    co_required, co_issued,
                    last_synced, sync_status, raw_json
                ) VALUES (
                    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
                    %s, %s, %s, %s, %s, %s, %s, %s, %s,
                    %s, %s, %s, %s,
                    %s, 'ok', %s
                )""",
                (
                    data.get("job_id"), data.get("permit_number"),
                    data.get("sf_record_id"), data.get("sf_permit_name"),
                    data.get("phase"), data.get("status"),
                    data.get("permit_type"), data.get("work_type"),
                    data.get("use_class"), data.get("use_type"),
                    data.get("application_accepted"), data.get("application_expiration"),
                    data.get("issue_date"), data.get("completion_date"),
                    data.get("expiration_date"),
                    data.get("total_fees"), data.get("total_payments"),
                    data.get("total_balance"), data.get("valuation"),
                    data.get("address_display"), data.get("parcel_tmk"),
                    data.get("co_required"), data.get("co_issued"),
                    now, data.get("raw_json"),
                ),
            )
            # Get the inserted ID
            row = self.fetchone("SELECT LAST_INSERT_ID() as id")
            return row["id"] if row else None, True


# ---------------------------------------------------------------------------
# Salesforce Browser Session
# ---------------------------------------------------------------------------

class SalesforceSession:
    def __init__(self):
        self.playwright = None
        self.browser = None
        self.context = None
        self.page = None

    def start(self):
        self.playwright = sync_playwright().start()
        self.browser = self.playwright.chromium.launch(
            headless=True,
            args=["--no-sandbox", "--disable-gpu", "--disable-dev-shm-usage"],
        )
        self.context = self.browser.new_context()
        self.page = self.context.new_page()
        log.info("Browser started")

    def login(self):
        log.info("Logging in to Salesforce as %s...", SF_USERNAME)
        self.page.goto(SF_LOGIN_URL, timeout=60000)

        # Wait for LWC login form to render (inputs have no name attr)
        self.page.wait_for_selector('input[placeholder="Username"]', timeout=60000)
        time.sleep(2)  # Let LWC finish rendering

        # Use click + type (not fill) to trigger LWC data binding
        username_input = self.page.locator('input[placeholder="Username"]')
        username_input.click()
        username_input.type(SF_USERNAME, delay=30)

        password_input = self.page.locator('input[placeholder="Password"]')
        password_input.click()
        password_input.type(SF_PASSWORD, delay=30)

        # Click the form's login button (not the nav bar one)
        self.page.locator("button.loginButton").click()

        # Wait for redirect to home page
        try:
            self.page.wait_for_url("**/s/", timeout=60000)
        except Exception:
            pass  # Check title below

        time.sleep(5)  # Let home page load

        title = self.page.title()
        url = self.page.url
        if "Home" not in title and "home" not in title.lower():
            raise RuntimeError(f"Login failed - title: {title}, URL: {url}")

        # Verify sid cookie exists
        cookies = self.page.context.cookies()
        has_sid = any(c["name"] == "sid" for c in cookies)
        if not has_sid:
            raise RuntimeError("Login succeeded but no sid cookie found")

        log.info("Login successful (title: %s)", title)

    def api_fetch(self, endpoint):
        """Execute a fetch() call inside the browser context."""
        result = self.page.evaluate(f"""async () => {{
            const sidMatch = document.cookie.match(/sid=([^;]+)/);
            if (!sidMatch) return {{ok: false, error: 'No sid cookie'}};
            const sid = sidMatch[1];
            try {{
                const r = await fetch('{endpoint}', {{
                    headers: {{'Authorization': 'Bearer ' + sid}}
                }});
                if (r.status !== 200) {{
                    const text = await r.text();
                    return {{ok: false, status: r.status, error: text}};
                }}
                const j = await r.json();
                return {{ok: true, data: j}};
            }} catch (e) {{
                return {{ok: false, error: e.message}};
            }}
        }}""")
        return result

    def fetch_permit_list(self, page_token=0, page_size=50):
        """Fetch a page of permits from the My_Permits list view."""
        endpoint = (
            f"{SF_API_BASE}/list-records/MUSW__Permit2__c/My_Permits"
            f"?pageSize={page_size}&pageToken={page_token}"
        )
        return self.api_fetch(endpoint)

    def fetch_permit_detail(self, sf_record_id):
        """Fetch full details for a single permit record."""
        endpoint = f"{SF_API_BASE}/records/{sf_record_id}?fields={PERMIT_FIELDS_CSV}"
        return self.api_fetch(endpoint)

    def fetch_address_record(self, address_ref_id):
        """Fetch structured address data from a SF Address reference ID.

        Returns dict with street_number, street_name, city, state, zip, tmk, unit,
        or None if fetch fails.
        """
        if not address_ref_id:
            return None
        endpoint = f"{SF_API_BASE}/records/{address_ref_id}?layoutTypes=Full"
        result = self.api_fetch(endpoint)
        if not result.get("ok"):
            return None

        fields = result["data"].get("fields", {})

        def fval(name):
            f = fields.get(name, {})
            return f.get("value") if isinstance(f, dict) else f

        # Extract TMK from the nested Parcel relationship
        tmk = None
        parcel_r = fields.get("MUSW__Parcel__r", {})
        if isinstance(parcel_r, dict):
            pval = parcel_r.get("value")
            if isinstance(pval, dict):
                pfields = pval.get("fields", {})
                pname = pfields.get("Name", {})
                if isinstance(pname, dict):
                    tmk = pname.get("value")

        return {
            "street_number": fval("MUSW__Street_Number__c") or "",
            "street_name": fval("MUSW__Street_Name__c") or "",
            "city": fval("MUSW__City__c") or "",
            "state": fval("MUSW__State__c") or "",
            "zip_code": fval("MUSW__Zip__c") or "",
            "unit": fval("MUSW__Unit_Suite__c") or "",
            "tmk": tmk or "",
            "full_address": fval("Name") or "",
        }

    def close(self):
        if self.browser:
            self.browser.close()
        if self.playwright:
            self.playwright.stop()
        log.info("Browser closed")


# ---------------------------------------------------------------------------
# Sync Logic
# ---------------------------------------------------------------------------

def normalize_address(addr):
    """Normalize an address string for fuzzy comparison.

    Strips unit/apt suffixes, standardizes abbreviations, removes punctuation.
    Returns lowercase normalized string, or empty string if input is empty.
    """
    if not addr:
        return ""
    addr = addr.strip().upper()

    # Remove unit/apt/suite designators and everything after
    addr = re.sub(r'\s+(APT|UNIT|STE|SUITE|#|RM|ROOM)\s*\.?\s*\S*', '', addr)

    # Common abbreviations
    replacements = {
        r'\bST\.?\b': 'ST', r'\bSTREET\b': 'ST',
        r'\bAVE\.?\b': 'AVE', r'\bAVENUE\b': 'AVE',
        r'\bDR\.?\b': 'DR', r'\bDRIVE\b': 'DR',
        r'\bRD\.?\b': 'RD', r'\bROAD\b': 'RD',
        r'\bLN\.?\b': 'LN', r'\bLANE\b': 'LN',
        r'\bCT\.?\b': 'CT', r'\bCOURT\b': 'CT',
        r'\bPL\.?\b': 'PL', r'\bPLACE\b': 'PL',
        r'\bBLVD\.?\b': 'BLVD', r'\bBOULEVARD\b': 'BLVD',
        r'\bCIR\.?\b': 'CIR', r'\bCIRCLE\b': 'CIR',
        r'\bWAY\b': 'WAY',
        r'\bPKWY\.?\b': 'PKWY', r'\bPARKWAY\b': 'PKWY',
        r'\bHWY\.?\b': 'HWY', r'\bHIGHWAY\b': 'HWY',
    }
    for pattern, repl in replacements.items():
        addr = re.sub(pattern, repl, addr)

    # Remove punctuation (periods, commas, hyphens between letters)
    addr = re.sub(r'[.,]', '', addr)
    # Collapse whitespace
    addr = re.sub(r'\s+', ' ', addr).strip()
    return addr.lower()


def extract_street_number(addr):
    """Extract the leading street number from an address."""
    if not addr:
        return ""
    m = re.match(r'^(\d+[-]?\w?)\s', addr.strip())
    return m.group(1).upper() if m else ""


def parse_sf_description(description):
    """Parse TMK and address from SF Description field.

    Example description:
      "(Quick Permit) [TMK: 37015056] (1/21) 503 ANOLANI ST Honolulu / Waialae Kahala, HI 96821 // Mamoru Fujita..."

    Returns dict with 'tmk', 'address', 'city', 'zip_code'.
    """
    result = {"tmk": None, "address": None, "city": None, "zip_code": None}
    if not description:
        return result

    # Extract TMK
    tmk_match = re.search(r'\[TMK:\s*(\d+)\]', description)
    if tmk_match:
        result["tmk"] = tmk_match.group(1)

    # Extract address: after TMK bracket + date, before "//" (applicant separator)
    # Pattern: [TMK: ...] (date) ADDRESS CITY / AREA, HI ZIPCODE // ...
    addr_match = re.search(
        r'\]\s*\(\d+/\d+\)\s*(.+?)(?:\s*//|$)', description
    )
    if not addr_match:
        # Try without TMK prefix: just look for address pattern after any prefix
        addr_match = re.search(
            r'(?:\)\s*)?(\d+[-\w]*\s+.+?)(?:\s*//|$)', description
        )

    if addr_match:
        addr_text = addr_match.group(1).strip()
        # Split on ", HI " to get address+city vs zip
        hi_match = re.search(r'^(.+?),?\s*HI\s+(\d{5})', addr_text)
        if hi_match:
            addr_city_part = hi_match.group(1).strip()
            result["zip_code"] = hi_match.group(2)

            # Split address from city: city is after last "/"  or the last word
            slash_parts = addr_city_part.rsplit('/', 1)
            if len(slash_parts) == 2:
                result["address"] = slash_parts[0].strip().rstrip(',').strip()
                result["city"] = slash_parts[1].strip()
            else:
                result["address"] = addr_city_part
        else:
            result["address"] = addr_text

    return result


def extract_permit_number(sf_name):
    """Extract the full SF permit name as the permit number.

    DPP Salesforce uses new-format numbers (BP-2026-984, A2020-05-0707)
    that do NOT match old AEI building_permit numbers (889536).
    We store the full SF name as the permit_number for new permits.
    """
    return sf_name


def parse_permit_fields(fields):
    """Extract field values from SF UI API response fields dict."""
    def val(field_name):
        short = field_name.split(".")[-1]  # Strip object prefix
        f = fields.get(short, {})
        if isinstance(f, dict):
            return f.get("value")
        return f

    data = {
        "sf_permit_name": val("Name"),
        "phase": val("MUSW__Phase__c"),
        "status": val("MUSW__Status__c"),
        "permit_type": val("MUSW__Type2__c"),
        "work_type": val("MUSW__Work_Type2__c"),
        "use_class": val("MUSW__Use_Class__c"),
        "use_type": val("MUSW__Use_Type__c"),
        "valuation": val("MUSW__Valuation__c"),
        "application_accepted": val("Application_Accepted_Date__c"),
        "application_expiration": val("Application_Expiration_Date__c"),
        "issue_date": val("MUSW__Issue_Date__c"),
        "completion_date": val("MUSW__DateCompleted__c"),
        "expiration_date": val("MUSW__Expiration_Date__c"),
        "total_fees": val("MUSW__Total_Fees__c"),
        "total_payments": val("MUSW__Total_Payments__c"),
        "total_balance": val("MUSW__Total_Balance__c"),
        "co_required": val("CO_Required__c"),
        "co_issued": 1 if val("Final_CO_Issued__c") else 0,
        "description": val("MUSW__Description__c"),
        "address_ref_id": val("MUSW__Address__c"),  # SF Address record reference
    }

    # Parse address and TMK from description as fallback
    parsed = parse_sf_description(data.get("description"))
    data["parcel_tmk"] = parsed["tmk"]
    data["sf_address"] = parsed["address"]
    data["sf_city"] = parsed["city"]
    data["sf_zip"] = parsed["zip_code"]
    # Build display address from description
    if parsed["address"]:
        parts = [parsed["address"]]
        if parsed["city"]:
            parts.append(parsed["city"])
        if parsed["zip_code"]:
            parts.append("HI " + parsed["zip_code"])
        data["address_display"] = ", ".join(parts)

    return data


def harvest_all_permits(sf):
    """Fetch all permits from SF list view. Returns list of {sf_id, name, phase, status, type}."""
    all_permits = []
    token = 0

    while True:
        log.debug("Fetching permit list page (token=%d)...", token)
        result = sf.fetch_permit_list(page_token=token)

        if not result.get("ok"):
            log.error("List fetch failed at token %d: %s", token, result.get("error", ""))
            break

        data = result["data"]
        records = data.get("records", [])
        if not records:
            break

        for rec in records:
            fields = rec.get("fields", {})
            name_val = fields.get("Name", {}).get("value", "") if isinstance(fields.get("Name"), dict) else ""
            phase_val = fields.get("MUSW__Phase__c", {}).get("value", "") if isinstance(fields.get("MUSW__Phase__c"), dict) else ""
            status_val = fields.get("MUSW__Status__c", {}).get("value", "") if isinstance(fields.get("MUSW__Status__c"), dict) else ""
            type_val = fields.get("MUSW__Type2__c", {}).get("value", "") if isinstance(fields.get("MUSW__Type2__c"), dict) else ""

            all_permits.append({
                "sf_id": rec.get("id"),
                "name": name_val,
                "phase": phase_val,
                "status": status_val,
                "type": type_val,
                "permit_number": extract_permit_number(name_val),
            })

        has_next = data.get("nextPageUrl")
        if not has_next:
            break

        token += 50
        if token > 2050:  # SF cap at 2000
            break

        time.sleep(API_DELAY)

    log.info("Harvested %d permits from Salesforce", len(all_permits))
    return all_permits


def run_sync(args):
    """Main sync orchestration.

    Strategy: Harvest all SF permits and store them in permit_sync.
    DPP Salesforce uses a new numbering system (BP-2026-984) that does NOT
    match old AEI building_permit numbers (889536). Matching to AEI jobs
    is done separately (manual or address-based) via the job_id column.
    """
    setup_logging(args.verbose)
    log.info("=" * 60)
    log.info("DPP Permit Sync starting (full=%s, dry_run=%s)", args.full, args.dry_run)
    log.info("=" * 60)

    db = Database(dry_run=args.dry_run)
    sf = SalesforceSession()

    stats = {
        "sf_permits": 0,
        "synced": 0,
        "inserted": 0,
        "changed": 0,
        "unchanged": 0,
        "matched": 0,
        "errors": 0,
        "skipped": 0,
    }

    try:
        # Step 1: Connect to DB
        db.connect()
        db.log_action(None, "start", f"Sync started (full={args.full}, dry_run={args.dry_run})")

        # Step 2: Start browser and login
        sf.start()
        sf.login()
        db.log_action(None, "login", "Salesforce login successful")

        # Step 3: Harvest all permits from SF list view
        if args.permit:
            log.info("Single permit mode: looking for %s", args.permit)
            sf_permits = harvest_all_permits(sf)
            sf_permits = [p for p in sf_permits if args.permit in (p["name"] or "")]
            if not sf_permits:
                log.error("Permit %s not found in Salesforce", args.permit)
                return
        else:
            sf_permits = harvest_all_permits(sf)

        stats["sf_permits"] = len(sf_permits)

        # Step 4: Sync each permit
        for i, sf_permit in enumerate(sf_permits):
            sf_id = sf_permit["sf_id"]
            name = sf_permit["name"]

            if not name:
                stats["skipped"] += 1
                continue

            # Skip completed permits on regular (non-full) sync
            if not args.full and sf_permit["phase"] == "Closed" and sf_permit["status"] == "Completed":
                stats["skipped"] += 1
                continue

            if (i + 1) % 100 == 0 or args.verbose:
                log.info("[%d/%d] Syncing %s...", i + 1, len(sf_permits), name)

            # Fetch full details for this permit
            detail_result = sf.fetch_permit_detail(sf_id)
            if not detail_result.get("ok"):
                log.error("  Detail fetch failed for %s: %s", name, detail_result.get("error", "")[:200])
                stats["errors"] += 1
                continue

            fields = detail_result["data"].get("fields", {})
            permit_data = parse_permit_fields(fields)
            permit_data["permit_number"] = name  # Full SF name (e.g., BP-2026-984)
            permit_data["sf_record_id"] = sf_id
            permit_data["raw_json"] = json.dumps(
                {k: v for k, v in fields.items()}, default=str
            )

            # Fetch structured address from SF Address record (preferred source)
            addr_ref = permit_data.get("address_ref_id")
            if addr_ref:
                addr_data = sf.fetch_address_record(addr_ref)
                if addr_data:
                    # Use structured address data (overrides description-parsed)
                    snum = addr_data["street_number"]
                    sname = addr_data["street_name"]
                    if snum and sname:
                        street = f"{snum} {sname}"
                        if addr_data["unit"]:
                            street += f" {addr_data['unit']}"
                        parts = [street]
                        if addr_data["city"]:
                            parts.append(addr_data["city"])
                        if addr_data["zip_code"]:
                            parts.append("HI " + addr_data["zip_code"])
                        permit_data["address_display"] = ", ".join(parts)
                    if addr_data["tmk"]:
                        permit_data["parcel_tmk"] = addr_data["tmk"]
                    time.sleep(API_DELAY)

            # Preserve existing job_id if already linked; otherwise 0 (unmatched)
            existing = db.get_existing_sync(sf_id)
            if existing and existing.get("job_id"):
                permit_data["job_id"] = existing["job_id"]
            else:
                permit_data["job_id"] = 0

            # Upsert to database
            try:
                sync_id, changed = db.upsert_permit(permit_data)
                stats["synced"] += 1
                if existing is None:
                    stats["inserted"] += 1
                elif changed:
                    stats["changed"] += 1
                    db.log_action(
                        sync_id, "change",
                        f"{name}: phase={permit_data['phase']}, status={permit_data['status']}"
                    )
                else:
                    stats["unchanged"] += 1
            except Exception as e:
                log.error("  DB write failed for %s: %s", name, e)
                stats["errors"] += 1
                db.log_action(None, "error", f"DB write failed for {name}: {e}")

            time.sleep(API_DELAY)

        # Step 5: Summary
        log.info("=" * 60)
        log.info("Sync complete!")
        log.info("  SF permits found:  %d", stats["sf_permits"])
        log.info("  Synced to DB:      %d", stats["synced"])
        log.info("  New inserts:       %d", stats["inserted"])
        log.info("  Changed:           %d", stats["changed"])
        log.info("  Unchanged:         %d", stats["unchanged"])
        log.info("  Matched to job:    %d", stats["matched"])
        log.info("  Errors:            %d", stats["errors"])
        log.info("  Skipped:           %d", stats["skipped"])
        log.info("=" * 60)

        db.log_action(None, "complete", json.dumps(stats))

    except Exception as e:
        log.error("Sync failed: %s", e, exc_info=True)
        try:
            db.log_action(None, "error", f"Sync failed: {e}")
        except Exception:
            pass
        raise
    finally:
        sf.close()
        db.close()


# ---------------------------------------------------------------------------
# Match-only mode (re-run address matching on existing data)
# ---------------------------------------------------------------------------

def run_match_only(args):
    """Re-run address matching on existing permit_sync records without SF fetch."""
    setup_logging(args.verbose)
    log.info("=" * 60)
    log.info("Address matching mode (no Salesforce fetch)")
    log.info("=" * 60)

    db = Database(dry_run=args.dry_run)
    db.connect()

    # Get all unmatched permits that have an address
    permits = db.fetchall(
        """SELECT id, permit_number, address_display, parcel_tmk, job_id
           FROM permit_sync
           WHERE job_id = 0
             AND (address_display IS NOT NULL AND address_display != '')"""
    )
    log.info("Found %d unmatched permits with addresses", len(permits))

    matched = 0
    for p in permits:
        # Parse SF address from address_display
        parsed = parse_sf_description(
            "[TMK: %s] (1/1) %s" % (p["parcel_tmk"] or "0", p["address_display"] or "")
        )
        # Also try direct normalized matching
        sf_addr = p["address_display"] or ""
        # Strip city/zip from display for matching
        sf_addr_parts = sf_addr.split(",")
        sf_street = sf_addr_parts[0].strip() if sf_addr_parts else ""

        job_id = db.match_job_by_address(
            sf_street,
            parsed.get("city"),
            parsed.get("zip_code"),
            p["parcel_tmk"],
        )
        if job_id:
            matched += 1
            log.info("  Matched %s → job %d (addr: %s)", p["permit_number"], job_id, sf_street[:60])
            if not args.dry_run:
                db.execute(
                    "UPDATE permit_sync SET job_id = %s WHERE id = %s",
                    (job_id, p["id"]),
                )

    log.info("=" * 60)
    log.info("Matching complete!")
    log.info("  Total unmatched:   %d", len(permits))
    log.info("  Newly matched:     %d", matched)
    log.info("  Still unmatched:   %d", len(permits) - matched)
    log.info("=" * 60)

    db.close()


# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------

def main():
    parser = argparse.ArgumentParser(description="DPP Permit Sync - Salesforce → AEI DB")
    parser.add_argument("--full", action="store_true", help="Include completed permits")
    parser.add_argument("--dry-run", action="store_true", help="Log only, no DB writes")
    parser.add_argument("--permit", type=str, help="Sync a single permit number (e.g., 189630)")
    parser.add_argument("--match-only", action="store_true",
                        help="Re-run address matching on existing permits (no SF fetch)")
    parser.add_argument("--verbose", action="store_true", help="Debug logging")
    args = parser.parse_args()

    if args.match_only:
        run_match_only(args)
    else:
        run_sync(args)


if __name__ == "__main__":
    main()
