#!/usr/bin/env python3
"""
AEI System Operational Audit — Automated Health Check

51 checks across remote (AWS), local (WSL2), and aei-webserv2 servers.
Read-only — no uploads, no writes, no file modifications.

Phases:
  1  Remote Services (1-5)        — httpd, mysqld, crond, postfix, python3.6
  2  Remote Cron Health (6-11)    — scheduler cron, retry queue, ticketcron, PDF cleanup
  3  Remote Disk & Storage (12-15)— disk free, log sizes, uploads size
  4  Remote Security (16-19)      — known exposures, SSL, PHP extensions
  5  Remote Photo API (20-25)     — upload.php, getimagelisting, fetch_image, delete endpoints
  6  Remote Database (26-28)      — connection, meter_files, jobs table
  7  Local Services (29-33)       — apache, mariadb, fail2ban, cron, /mnt/dropbox
  8  Local Firewall (34-38)       — ipset, fail2ban, jail count, HTTP, SSL
  9  Local Cron Health (39-42)    — DB sync, process_queue, backups, disk
  10 Cross-System (43-44)         — scheduler login, photo API
  11 aei-webserv2 Health (45-51)  — apache, mysql, endpoints, DB, dropbox mount

Usage:
  python3 audit/system_audit.py                 # Full audit (both servers)
  python3 audit/system_audit.py --remote-only   # Remote only
  python3 audit/system_audit.py --local-only    # Local only
  python3 audit/system_audit.py --json          # JSON output
  python3 audit/system_audit.py --warn-only     # Only WARN + FAIL

Prerequisites:
  - pip3 install requests paramiko
  - SSH key at /root/.ssh/aei_production.pem
  - Network access to aeihawaii.com and upload.aeihawaii.com
"""

import argparse
import json
import os
import re
import subprocess
import sys
import time
import warnings
from datetime import datetime, timedelta, timezone

# Suppress InsecureRequestWarning for self-signed local certs
warnings.filterwarnings("ignore", message="Unverified HTTPS request")

try:
    import requests
except ImportError:
    print("ERROR: pip3 install requests")
    sys.exit(1)

# ── Configuration ──────────────────────────────────────────────────

SSH_KEY = "/root/.ssh/aei_production.pem"
SSH_USER = "Julian"
SSH_HOST = "18.225.0.90"
AWS_IP = "18.225.0.90"
DB_USER = "schedular"
DB_PASS = "M1gif9!6"
DB_NAME = "mandhdesign_schedular"
REMOTE_API_BASE = "https://aeihawaii.com/photoapi"
REMOTE_SCHEDULER_URL = "https://aeihawaii.com/scheduler/login.php"
LOCAL_URL = "https://upload.aeihawaii.com"
REMOTE_PHOTOAPI = "/var/www/vhosts/aeihawaii.com/httpdocs/photoapi"
REMOTE_SCHEDULER = "/var/www/vhosts/aeihawaii.com/httpdocs/scheduler"
AUTH_TOKEN = "aei@89806849"

# aei-webserv2 (upload.aeihawaii.com / 192.168.141.219)
LOCAL_SSH_USER = "aeiuser"
LOCAL_SSH_HOST = "192.168.141.219"
LOCAL_SSH_PORT = "55222"
WEBSERV2_UPLOAD_DIR = "/var/www/html/upload"
WEBSERV2_DB_USER = "upload_user"
WEBSERV2_DB_PASS = "P@55w02d778899"
WEBSERV2_DB_NAME = "Schedular"

# ── Helpers ────────────────────────────────────────────────────────

class Colors:
    PASS = "\033[92m"
    FAIL = "\033[91m"
    WARN = "\033[93m"
    INFO = "\033[94m"
    RESET = "\033[0m"
    BOLD = "\033[1m"

class Result:
    """Single check result."""
    def __init__(self, step, name, status, detail="", phase=""):
        self.step = step
        self.name = name
        self.status = status  # "PASS", "WARN", "FAIL", "SKIP"
        self.detail = detail
        self.phase = phase

    def to_dict(self):
        return {
            "step": self.step,
            "name": self.name,
            "status": self.status,
            "detail": self.detail,
            "phase": self.phase,
        }

def ssh_cmd(cmd, timeout=30):
    """Run command on remote server via SSH."""
    try:
        result = subprocess.run(
            ["ssh", "-i", SSH_KEY, "-o", "ConnectTimeout=10",
             "-o", "StrictHostKeyChecking=no", "-o", "BatchMode=yes",
             f"{SSH_USER}@{SSH_HOST}", cmd],
            capture_output=True, text=True, timeout=timeout
        )
        return result.stdout.strip(), result.stderr.strip(), result.returncode
    except subprocess.TimeoutExpired:
        return "", "SSH command timed out", 1
    except Exception as e:
        return "", str(e), 1

def local_cmd(cmd, timeout=30):
    """Run command locally."""
    try:
        result = subprocess.run(
            cmd, shell=True, capture_output=True, text=True, timeout=timeout
        )
        return result.stdout.strip(), result.stderr.strip(), result.returncode
    except subprocess.TimeoutExpired:
        return "", "Command timed out", 1
    except Exception as e:
        return "", str(e), 1

def local_server_ssh_cmd(cmd, timeout=30):
    """Run command on aei-webserv2 (192.168.141.219) via SSH."""
    try:
        result = subprocess.run(
            ["ssh", "-p", LOCAL_SSH_PORT,
             "-o", "ConnectTimeout=10",
             "-o", "StrictHostKeyChecking=no", "-o", "BatchMode=yes",
             f"{LOCAL_SSH_USER}@{LOCAL_SSH_HOST}", cmd],
            capture_output=True, text=True, timeout=timeout
        )
        return result.stdout.strip(), result.stderr.strip(), result.returncode
    except subprocess.TimeoutExpired:
        return "", "SSH command timed out", 1
    except Exception as e:
        return "", str(e), 1

_quiet_mode = False

def print_result(r, warn_only=False):
    """Print a formatted result line."""
    if _quiet_mode:
        return
    if warn_only and r.status == "PASS":
        return
    color = {
        "PASS": Colors.PASS, "FAIL": Colors.FAIL,
        "WARN": Colors.WARN, "SKIP": Colors.INFO,
    }.get(r.status, Colors.RESET)
    detail_str = f" — {r.detail}" if r.detail else ""
    print(f"  [{color}{r.status:4s}{Colors.RESET}] {r.step:>2}. {r.name}{detail_str}")

def print_phase(name):
    """Print phase header."""
    if _quiet_mode:
        return
    print(f"\n{Colors.BOLD}{name}{Colors.RESET}")

def parse_size_mb(size_str):
    """Parse du/df output to MB. Handles K, M, G suffixes."""
    size_str = size_str.strip()
    if not size_str:
        return 0
    # du -sh output like "275G", "1.2M", "450K"
    m = re.match(r'^([\d.]+)([KMGTP]?)$', size_str, re.IGNORECASE)
    if m:
        val = float(m.group(1))
        unit = m.group(2).upper()
        multiplier = {"": 1, "K": 0.001, "M": 1, "G": 1024, "T": 1048576, "P": 1073741824}
        return val * multiplier.get(unit, 1)
    # Might be raw KB from df
    try:
        return float(size_str) / 1024
    except ValueError:
        return 0

# ── Phase 1: Remote Services ──────────────────────────────────────

def phase_remote_services(results):
    """Checks 1-5: Core services running on remote."""
    print_phase("Phase 1 — Remote Services (1-5)")

    # Batch all service checks in one SSH call
    # Note: remote pgrep doesn't support -c, use ps + grep + wc
    cmd = (
        "echo '===HTTPD==='; "
        "ps aux | grep httpd | grep -v grep | wc -l; "
        "echo '===MYSQL==='; "
        "ps aux | grep mysqld | grep -v grep | wc -l; "
        "echo '===CROND==='; "
        "ps aux | grep crond | grep -v grep | wc -l; "
        "echo '===POSTFIX==='; "
        "ps aux | grep 'postfix/master' | grep -v grep | wc -l; "
        "echo '===PYTHON==='; "
        "/usr/local/bin/python3.6 --version 2>&1"
    )
    out, err, rc = ssh_cmd(cmd, timeout=15)

    if rc != 0 and "timed out" in err:
        for i, name in enumerate(["Apache httpd", "MySQL mysqld", "crond", "Postfix", "Python 3.6"], 1):
            results.append(Result(i, name, "FAIL", "SSH connection failed", "Remote Services"))
        return

    sections = out.split("===")
    def section(name):
        for i, s in enumerate(sections):
            if s.strip() == name and i + 1 < len(sections):
                return sections[i + 1].strip()
        return ""

    # 1. Apache
    count = section("HTTPD")
    ok = count.isdigit() and int(count) > 0
    results.append(Result(1, "Apache httpd running", "PASS" if ok else "FAIL",
                          f"{count} processes" if ok else "not running", "Remote Services"))

    # 2. MySQL
    count = section("MYSQL")
    ok = count.isdigit() and int(count) > 0
    results.append(Result(2, "MySQL mysqld running", "PASS" if ok else "FAIL",
                          f"{count} processes" if ok else "not running", "Remote Services"))

    # 3. crond
    count = section("CROND")
    ok = count.isdigit() and int(count) > 0
    results.append(Result(3, "crond running", "PASS" if ok else "FAIL",
                          f"{count} processes" if ok else "not running", "Remote Services"))

    # 4. Postfix
    count = section("POSTFIX")
    ok = count.isdigit() and int(count) > 0
    results.append(Result(4, "Postfix running", "PASS" if ok else "FAIL",
                          f"{count} processes" if ok else "not running", "Remote Services"))

    # 5. Python 3.6
    pyver = section("PYTHON")
    ok = "3.6" in pyver
    results.append(Result(5, "Python 3.6 available", "PASS" if ok else "FAIL",
                          pyver if ok else "not found", "Remote Services"))

# ── Phase 2: Remote Cron Health ───────────────────────────────────

def phase_remote_cron(results):
    """Checks 6-11: Cron jobs running on schedule."""
    print_phase("Phase 2 — Remote Cron Health (6-11)")

    cmd = (
        "echo '===SCHEDULER_CRON==='; "
        "sudo grep -a 'scheduler/cron' /var/log/cron 2>/dev/null | tail -1; "
        "echo '===RETRY_QUEUE==='; "
        "sudo grep -a 'process_retry_queue' /var/log/cron 2>/dev/null | tail -1; "
        "echo '===TICKETCRON==='; "
        "sudo grep -a 'ticketcron' /var/log/cron 2>/dev/null | tail -1; "
        "echo '===PDF_CLEANUP==='; "
        "crontab -l 2>/dev/null | grep -iE 'pdf|cleanup|tmp' || echo ''; "
        "echo '===FAILED_QUEUE==='; "
        "find /var/www/vhosts/aeihawaii.com/httpdocs/photoapi/queue/failed/ -type f -mtime +7 2>/dev/null | wc -l; "
        "echo '===QUEUE_SIZE==='; "
        "ls /var/www/vhosts/aeihawaii.com/httpdocs/photoapi/queue/ 2>/dev/null | grep -v failed | grep -c '.json' 2>/dev/null || echo '0'"
    )
    out, err, rc = ssh_cmd(cmd, timeout=15)

    if rc != 0 and "timed out" in err:
        for i in range(6, 12):
            results.append(Result(i, f"Cron check {i}", "FAIL", "SSH failed", "Remote Cron"))
        return

    sections = out.split("===")
    def section(name):
        for i, s in enumerate(sections):
            if s.strip() == name and i + 1 < len(sections):
                return sections[i + 1].strip()
        return ""

    # 6. scheduler/cron last ran within 10 min
    cron_line = section("SCHEDULER_CRON")
    if cron_line:
        # Parse cron log timestamp: "Feb 17 10:30:01"
        try:
            parts = cron_line.split()
            ts_str = f"{parts[0]} {parts[1]} {parts[2]}"
            now = datetime.now()
            cron_time = datetime.strptime(f"{now.year} {ts_str}", "%Y %b %d %H:%M:%S")
            age_min = (now - cron_time).total_seconds() / 60
            if age_min < 10:
                results.append(Result(6, "scheduler/cron last ran", "PASS", f"{age_min:.0f} min ago", "Remote Cron"))
            else:
                results.append(Result(6, "scheduler/cron last ran", "WARN", f"{age_min:.0f} min ago (>10 min)", "Remote Cron"))
        except (ValueError, IndexError):
            results.append(Result(6, "scheduler/cron last ran", "WARN", f"parse error: {cron_line[:60]}", "Remote Cron"))
    else:
        results.append(Result(6, "scheduler/cron last ran", "FAIL", "no cron log entry found", "Remote Cron"))

    # 7. process_retry_queue.py within 20 min
    retry_line = section("RETRY_QUEUE")
    if retry_line:
        try:
            parts = retry_line.split()
            ts_str = f"{parts[0]} {parts[1]} {parts[2]}"
            now = datetime.now()
            cron_time = datetime.strptime(f"{now.year} {ts_str}", "%Y %b %d %H:%M:%S")
            age_min = (now - cron_time).total_seconds() / 60
            if age_min < 20:
                results.append(Result(7, "process_retry_queue.py last ran", "PASS", f"{age_min:.0f} min ago", "Remote Cron"))
            else:
                results.append(Result(7, "process_retry_queue.py last ran", "WARN", f"{age_min:.0f} min ago (>20 min)", "Remote Cron"))
        except (ValueError, IndexError):
            results.append(Result(7, "process_retry_queue.py last ran", "WARN", f"parse error", "Remote Cron"))
    else:
        results.append(Result(7, "process_retry_queue.py last ran", "FAIL", "no cron log entry found", "Remote Cron"))

    # 8. ticketcron within 10 min
    ticket_line = section("TICKETCRON")
    if ticket_line:
        try:
            parts = ticket_line.split()
            ts_str = f"{parts[0]} {parts[1]} {parts[2]}"
            now = datetime.now()
            cron_time = datetime.strptime(f"{now.year} {ts_str}", "%Y %b %d %H:%M:%S")
            age_min = (now - cron_time).total_seconds() / 60
            if age_min < 10:
                results.append(Result(8, "ticketcron last ran", "PASS", f"{age_min:.0f} min ago", "Remote Cron"))
            else:
                results.append(Result(8, "ticketcron last ran", "WARN", f"{age_min:.0f} min ago (>10 min)", "Remote Cron"))
        except (ValueError, IndexError):
            results.append(Result(8, "ticketcron last ran", "WARN", "parse error", "Remote Cron"))
    else:
        results.append(Result(8, "ticketcron last ran", "WARN", "no cron log entry found", "Remote Cron"))

    # 9. PDF cleanup cron exists
    pdf_cron = section("PDF_CLEANUP")
    if pdf_cron:
        results.append(Result(9, "PDF cleanup cron exists", "PASS", pdf_cron[:60], "Remote Cron"))
    else:
        results.append(Result(9, "PDF cleanup cron exists", "WARN", "not found in Julian's crontab", "Remote Cron"))

    # 10. No failed queue items > 7 days
    failed_old = section("FAILED_QUEUE")
    try:
        count = int(failed_old)
        if count == 0:
            results.append(Result(10, "No stale failed queue items", "PASS", "none >7 days old", "Remote Cron"))
        else:
            results.append(Result(10, "No stale failed queue items", "WARN", f"{count} items >7 days old", "Remote Cron"))
    except ValueError:
        results.append(Result(10, "No stale failed queue items", "WARN", "could not check", "Remote Cron"))

    # 11. Retry queue size
    queue_size = section("QUEUE_SIZE")
    try:
        count = int(queue_size)
        if count <= 10:
            results.append(Result(11, "Retry queue size", "PASS", f"{count} items", "Remote Cron"))
        else:
            results.append(Result(11, "Retry queue size", "WARN", f"{count} items (>10)", "Remote Cron"))
    except ValueError:
        results.append(Result(11, "Retry queue size", "PASS", "0 items", "Remote Cron"))

# ── Phase 3: Remote Disk & Storage ────────────────────────────────

def phase_remote_disk(results):
    """Checks 12-15: Disk space and log sizes on remote."""
    print_phase("Phase 3 — Remote Disk & Storage (12-15)")

    cmd = """
    echo '===DISK_FREE==='
    df -BG / 2>/dev/null | awk 'NR==2{print $4}'

    echo '===MAILLOG==='
    du -sh /var/log/maillog 2>/dev/null | awk '{print $1}'

    echo '===UPLOADS==='
    du -sh /var/www/vhosts/aeihawaii.com/httpdocs/scheduler/uploads/ 2>/dev/null | awk '{print $1}'

    echo '===TMP_FREE==='
    df -BG /tmp 2>/dev/null | awk 'NR==2{print $4}'
    """
    out, err, rc = ssh_cmd(cmd, timeout=30)

    if rc != 0 and "timed out" in err:
        for i in range(12, 16):
            results.append(Result(i, f"Disk check {i}", "FAIL", "SSH failed", "Remote Disk"))
        return

    sections = out.split("===")
    def section(name):
        for i, s in enumerate(sections):
            if s.strip() == name and i + 1 < len(sections):
                return sections[i + 1].strip()
        return ""

    # 12. Disk free > 50GB
    disk_free = section("DISK_FREE").replace("G", "")
    try:
        gb = float(disk_free)
        if gb >= 50:
            results.append(Result(12, "Disk free > 50GB", "PASS", f"{gb:.0f}GB free", "Remote Disk"))
        elif gb >= 20:
            results.append(Result(12, "Disk free > 50GB", "WARN", f"{gb:.0f}GB free (<50GB)", "Remote Disk"))
        else:
            results.append(Result(12, "Disk free > 50GB", "FAIL", f"{gb:.0f}GB free (<20GB)", "Remote Disk"))
    except ValueError:
        results.append(Result(12, "Disk free > 50GB", "WARN", f"parse error: {disk_free}", "Remote Disk"))

    # 13. Maillog < 1GB
    maillog = section("MAILLOG")
    ml_mb = parse_size_mb(maillog)
    if ml_mb < 1024:
        results.append(Result(13, "Maillog size < 1GB", "PASS", maillog or "0", "Remote Disk"))
    else:
        results.append(Result(13, "Maillog size < 1GB", "WARN", f"{maillog} (>=1GB)", "Remote Disk"))

    # 14. Uploads size (informational)
    uploads = section("UPLOADS")
    results.append(Result(14, "scheduler/uploads/ size", "PASS", uploads or "unknown", "Remote Disk"))

    # 15. /tmp > 1GB free
    tmp_free = section("TMP_FREE").replace("G", "")
    try:
        gb = float(tmp_free)
        if gb >= 1:
            results.append(Result(15, "/tmp free > 1GB", "PASS", f"{gb:.0f}GB", "Remote Disk"))
        else:
            results.append(Result(15, "/tmp free > 1GB", "WARN", f"{gb:.0f}GB (<1GB)", "Remote Disk"))
    except ValueError:
        results.append(Result(15, "/tmp free > 1GB", "WARN", f"parse error: {tmp_free}", "Remote Disk"))

# ── Phase 4: Remote Security ──────────────────────────────────────

def phase_remote_security(results):
    """Checks 16-19: Known security exposures and SSL."""
    print_phase("Phase 4 — Remote Security (16-19)")

    cmd = """
    echo '===SLSQL==='
    [ -f /var/www/vhosts/aeihawaii.com/httpdocs/photoapi/sl.sql ] && echo 'EXISTS' || echo 'GONE'

    echo '===MYSQLDBNE==='
    [ -d /var/www/vhosts/aeihawaii.com/httpdocs/mysqldbne ] && echo 'EXISTS' || echo 'GONE'

    echo '===SSL==='
    echo | openssl s_client -servername aeihawaii.com -connect aeihawaii.com:443 2>/dev/null | openssl x509 -noout -enddate 2>/dev/null

    echo '===PHP_EXT==='
    php -m 2>/dev/null | grep -iE '^(mysqli|curl|gd|json|mbstring)$' | sort | tr '\\n' ','
    """
    out, err, rc = ssh_cmd(cmd, timeout=20)

    if rc != 0 and "timed out" in err:
        for i in range(16, 20):
            results.append(Result(i, f"Security check {i}", "FAIL", "SSH failed", "Remote Security"))
        return

    sections = out.split("===")
    def section(name):
        for i, s in enumerate(sections):
            if s.strip() == name and i + 1 < len(sections):
                return sections[i + 1].strip()
        return ""

    # 16. sl.sql removed
    slsql = section("SLSQL")
    if slsql == "GONE":
        results.append(Result(16, "photoapi/sl.sql removed", "PASS", "not present", "Remote Security"))
    else:
        results.append(Result(16, "photoapi/sl.sql removed", "FAIL", "STILL EXISTS — publicly accessible SQL dump!", "Remote Security"))

    # 17. mysqldbne removed
    dbne = section("MYSQLDBNE")
    if dbne == "GONE":
        results.append(Result(17, "mysqldbne/ removed", "PASS", "not present", "Remote Security"))
    else:
        results.append(Result(17, "mysqldbne/ removed", "FAIL", "STILL EXISTS — old DB admin tool!", "Remote Security"))

    # 18. SSL cert > 30 days
    ssl_line = section("SSL")
    if ssl_line and "notAfter=" in ssl_line:
        try:
            date_str = ssl_line.split("=", 1)[1]
            expiry = datetime.strptime(date_str, "%b %d %H:%M:%S %Y %Z")
            days_left = (expiry - datetime.now(timezone.utc).replace(tzinfo=None)).days
            if days_left > 30:
                results.append(Result(18, "SSL cert > 30 days", "PASS", f"{days_left} days left", "Remote Security"))
            elif days_left > 0:
                results.append(Result(18, "SSL cert > 30 days", "WARN", f"{days_left} days left (<30)", "Remote Security"))
            else:
                results.append(Result(18, "SSL cert > 30 days", "FAIL", "EXPIRED", "Remote Security"))
        except (ValueError, IndexError):
            results.append(Result(18, "SSL cert > 30 days", "WARN", f"parse error: {ssl_line[:60]}", "Remote Security"))
    else:
        results.append(Result(18, "SSL cert > 30 days", "WARN", "could not check SSL", "Remote Security"))

    # 19. PHP extensions
    exts = section("PHP_EXT").rstrip(",").split(",")
    expected = {"mysqli", "curl", "gd", "json", "mbstring"}
    found = {e.strip().lower() for e in exts if e.strip()}
    missing = expected - found
    if not missing:
        results.append(Result(19, "PHP extensions present", "PASS", ", ".join(sorted(found)), "Remote Security"))
    else:
        results.append(Result(19, "PHP extensions present", "FAIL", f"missing: {', '.join(sorted(missing))}", "Remote Security"))

# ── Phase 5: Remote Photo API ─────────────────────────────────────

def phase_remote_photo_api(results):
    """Checks 20-25: Photo API endpoints respond correctly."""
    print_phase("Phase 5 — Remote Photo API (20-25)")

    # 20. upload.php returns JSON on POST (empty/bad request — not an actual upload)
    try:
        resp = requests.post(f"{REMOTE_API_BASE}/upload.php",
                             json={"auth_token": "invalid_test"},
                             timeout=15, verify=True)
        try:
            data = resp.json()
            results.append(Result(20, "upload.php returns JSON", "PASS",
                                  f"HTTP {resp.status_code}, JSON response", "Remote Photo API"))
        except ValueError:
            if resp.status_code == 200:
                results.append(Result(20, "upload.php returns JSON", "WARN",
                                      f"HTTP {resp.status_code} but not JSON", "Remote Photo API"))
            else:
                results.append(Result(20, "upload.php returns JSON", "FAIL",
                                      f"HTTP {resp.status_code}, not JSON", "Remote Photo API"))
    except requests.RequestException as e:
        results.append(Result(20, "upload.php returns JSON", "FAIL", str(e)[:80], "Remote Photo API"))

    # 21. getimagelisting.php responds
    try:
        resp = requests.post(f"{REMOTE_API_BASE}/getimagelisting.php",
                             data={"auth_token": AUTH_TOKEN, "job_id": "0"},
                             timeout=15, verify=True)
        try:
            data = resp.json()
            results.append(Result(21, "getimagelisting.php responds", "PASS",
                                  f"HTTP {resp.status_code}, JSON", "Remote Photo API"))
        except ValueError:
            results.append(Result(21, "getimagelisting.php responds", "WARN",
                                  f"HTTP {resp.status_code}, not JSON", "Remote Photo API"))
    except requests.RequestException as e:
        results.append(Result(21, "getimagelisting.php responds", "FAIL", str(e)[:80], "Remote Photo API"))

    # 22. fetch_image.php responds (no file — expect error JSON)
    try:
        resp = requests.get(f"{REMOTE_API_BASE}/fetch_image.php",
                            params={"auth_token": AUTH_TOKEN, "filename": "nonexistent.jpg"},
                            timeout=15, verify=True)
        if resp.status_code in (200, 404):
            results.append(Result(22, "fetch_image.php responds", "PASS",
                                  f"HTTP {resp.status_code}", "Remote Photo API"))
        else:
            results.append(Result(22, "fetch_image.php responds", "WARN",
                                  f"HTTP {resp.status_code}", "Remote Photo API"))
    except requests.RequestException as e:
        results.append(Result(22, "fetch_image.php responds", "FAIL", str(e)[:80], "Remote Photo API"))

    # 23. delete.php rejects bad auth
    try:
        resp = requests.post(f"{REMOTE_API_BASE}/delete.php",
                             json={"auth_token": "bad_token", "filename": "test.jpg"},
                             timeout=15, verify=True)
        try:
            data = resp.json()
            if data.get("success") is not True:
                results.append(Result(23, "delete.php rejects bad auth", "PASS",
                                      "rejected invalid token", "Remote Photo API"))
            else:
                results.append(Result(23, "delete.php rejects bad auth", "FAIL",
                                      "accepted invalid token!", "Remote Photo API"))
        except ValueError:
            results.append(Result(23, "delete.php rejects bad auth", "WARN",
                                  f"HTTP {resp.status_code}, not JSON", "Remote Photo API"))
    except requests.RequestException as e:
        results.append(Result(23, "delete.php rejects bad auth", "FAIL", str(e)[:80], "Remote Photo API"))

    # 24. delete_image.php rejects bad auth (batch delete endpoint)
    try:
        resp = requests.post(f"{REMOTE_API_BASE}/delete_image.php",
                             json={"auth_token": "bad_token", "filenames": ["test.jpg"]},
                             timeout=15, verify=True)
        try:
            data = resp.json()
            if data.get("success") is not True:
                results.append(Result(24, "delete_image.php rejects bad auth", "PASS",
                                      "rejected invalid token", "Remote Photo API"))
            else:
                results.append(Result(24, "delete_image.php rejects bad auth", "FAIL",
                                      "accepted invalid token!", "Remote Photo API"))
        except ValueError:
            # Non-JSON response is acceptable — endpoint exists
            results.append(Result(24, "delete_image.php rejects bad auth", "PASS",
                                  f"HTTP {resp.status_code}, non-JSON rejection", "Remote Photo API"))
    except requests.RequestException as e:
        results.append(Result(24, "delete_image.php rejects bad auth", "FAIL", str(e)[:80], "Remote Photo API"))

    # 25. delete_local_photo.php rejects bad auth
    try:
        resp = requests.get(f"https://upload.aeihawaii.com/upload/delete_local_photo.php",
                            params={"auth_token": "bad_token", "filename": "test.jpg"},
                            timeout=15, verify=False)
        try:
            data = resp.json()
            if data.get("success") is not True:
                results.append(Result(25, "delete_local_photo.php rejects bad auth", "PASS",
                                      "rejected invalid token", "Remote Photo API"))
            else:
                results.append(Result(25, "delete_local_photo.php rejects bad auth", "FAIL",
                                      "accepted invalid token!", "Remote Photo API"))
        except ValueError:
            results.append(Result(25, "delete_local_photo.php rejects bad auth", "PASS",
                                  f"HTTP {resp.status_code}, non-JSON rejection", "Remote Photo API"))
    except requests.RequestException as e:
        results.append(Result(25, "delete_local_photo.php rejects bad auth", "FAIL", str(e)[:80], "Remote Photo API"))

# ── Phase 6: Remote Database ──────────────────────────────────────

def phase_remote_database(results):
    """Checks 26-28: Database connectivity."""
    print_phase("Phase 6 — Remote Database (26-28)")

    cmd = f"""
    echo '===SELECT1==='
    mysql -u {DB_USER} -p'{DB_PASS}' {DB_NAME} -N -e "SELECT 1;" 2>&1

    echo '===METER_FILES==='
    mysql -u {DB_USER} -p'{DB_PASS}' {DB_NAME} -N -e "SELECT COUNT(*) FROM meter_files;" 2>&1

    echo '===JOBS==='
    mysql -u {DB_USER} -p'{DB_PASS}' {DB_NAME} -N -e "SELECT COUNT(*) FROM jobs;" 2>&1
    """
    out, err, rc = ssh_cmd(cmd, timeout=15)

    sections = out.split("===")
    def section(name):
        for i, s in enumerate(sections):
            if s.strip() == name and i + 1 < len(sections):
                return sections[i + 1].strip()
        return ""

    # 26. MySQL connection
    sel1 = section("SELECT1")
    if sel1.strip() == "1":
        results.append(Result(26, "MySQL connection works", "PASS", "SELECT 1 OK", "Remote Database"))
    else:
        results.append(Result(26, "MySQL connection works", "FAIL", sel1[:80], "Remote Database"))

    # 27. meter_files accessible
    mf = section("METER_FILES")
    try:
        count = int(mf)
        results.append(Result(27, "meter_files table accessible", "PASS", f"{count:,} rows", "Remote Database"))
    except ValueError:
        results.append(Result(27, "meter_files table accessible", "FAIL", mf[:80], "Remote Database"))

    # 28. jobs accessible
    jobs = section("JOBS")
    try:
        count = int(jobs)
        results.append(Result(28, "jobs table accessible", "PASS", f"{count:,} rows", "Remote Database"))
    except ValueError:
        results.append(Result(28, "jobs table accessible", "FAIL", jobs[:80], "Remote Database"))

# ── Phase 7: Local Services ───────────────────────────────────────

def phase_local_services(results):
    """Checks 29-33: Core services on local server."""
    print_phase("Phase 7 — Local Services (29-33)")

    # 29. Apache
    out, _, rc = local_cmd("pgrep -c apache2 2>/dev/null || pgrep -c httpd 2>/dev/null || echo 0")
    count = out.strip()
    ok = count.isdigit() and int(count) > 0
    results.append(Result(29, "Apache running", "PASS" if ok else "FAIL",
                          f"{count} processes" if ok else "not running", "Local Services"))

    # 30. MariaDB
    out, _, rc = local_cmd("pgrep -c mariadbd 2>/dev/null || pgrep -c mysqld 2>/dev/null || echo 0")
    count = out.strip()
    ok = count.isdigit() and int(count) > 0
    results.append(Result(30, "MariaDB running", "PASS" if ok else "FAIL",
                          f"{count} processes" if ok else "not running", "Local Services"))

    # 31. fail2ban
    out, _, rc = local_cmd("pgrep -c fail2ban 2>/dev/null || echo 0")
    count = out.strip()
    ok = count.isdigit() and int(count) > 0
    results.append(Result(31, "fail2ban running", "PASS" if ok else "FAIL",
                          f"{count} processes" if ok else "not running", "Local Services"))

    # 32. cron
    out, _, rc = local_cmd("pgrep -c cron 2>/dev/null || echo 0")
    count = out.strip()
    ok = count.isdigit() and int(count) > 0
    results.append(Result(32, "cron running", "PASS" if ok else "FAIL",
                          f"{count} processes" if ok else "not running", "Local Services"))

    # 33. /mnt/dropbox/ accessible
    out, _, rc = local_cmd("[ -d /mnt/dropbox ] && ls /mnt/dropbox/ | head -3 | wc -l")
    ok = rc == 0 and out.strip() != "0"
    results.append(Result(33, "/mnt/dropbox/ accessible", "PASS" if ok else "FAIL",
                          "mounted and readable" if ok else "not accessible", "Local Services"))

# ── Phase 8: Local Firewall & Security ────────────────────────────

def phase_local_firewall(results):
    """Checks 34-38: Firewall, fail2ban, HTTP, SSL on local."""
    print_phase("Phase 8 — Local Firewall & Security (34-38)")

    # 34. AWS IP in trusted_whitelist
    out, _, rc = local_cmd(f"sudo ipset test trusted_whitelist {AWS_IP} 2>&1")
    ok = "is in set" in out
    results.append(Result(34, "AWS IP in trusted_whitelist", "PASS" if ok else "FAIL",
                          f"{AWS_IP} whitelisted" if ok else f"{AWS_IP} NOT in ipset — sync will fail!",
                          "Local Firewall"))

    # 35. AWS IP not banned
    out, _, rc = local_cmd(f"sudo fail2ban-client status 2>/dev/null | grep 'Jail list' | sed 's/.*://;s/ //g'")
    jails = [j.strip() for j in out.split(",") if j.strip()]
    banned = False
    banned_in = []
    for jail in jails[:30]:  # cap to avoid too many checks
        j_out, _, _ = local_cmd(f"sudo fail2ban-client status {jail} 2>/dev/null | grep {AWS_IP}")
        if AWS_IP in j_out:
            banned = True
            banned_in.append(jail)
    if not banned:
        results.append(Result(35, "AWS IP not banned in fail2ban", "PASS",
                              f"checked {len(jails)} jails", "Local Firewall"))
    else:
        results.append(Result(35, "AWS IP not banned in fail2ban", "FAIL",
                              f"BANNED in: {', '.join(banned_in)}", "Local Firewall"))

    # 36. fail2ban jail count
    jail_count = len(jails)
    if jail_count >= 20:
        results.append(Result(36, "fail2ban jail count", "PASS", f"{jail_count} jails", "Local Firewall"))
    elif jail_count >= 10:
        results.append(Result(36, "fail2ban jail count", "WARN", f"{jail_count} jails (expected ~22)", "Local Firewall"))
    else:
        results.append(Result(36, "fail2ban jail count", "FAIL", f"{jail_count} jails (expected ~22)", "Local Firewall"))

    # 37. upload.aeihawaii.com responds
    try:
        resp = requests.get(LOCAL_URL, timeout=10, verify=False)
        results.append(Result(37, "upload.aeihawaii.com responds", "PASS",
                              f"HTTP {resp.status_code}", "Local Firewall"))
    except requests.RequestException as e:
        results.append(Result(37, "upload.aeihawaii.com responds", "FAIL",
                              str(e)[:80], "Local Firewall"))

    # 38. Local SSL cert > 30 days
    out, _, rc = local_cmd(
        "echo | openssl s_client -servername upload.aeihawaii.com -connect localhost:443 2>/dev/null "
        "| openssl x509 -noout -enddate 2>/dev/null"
    )
    if "notAfter=" in out:
        try:
            date_str = out.split("=", 1)[1].strip()
            expiry = datetime.strptime(date_str, "%b %d %H:%M:%S %Y %Z")
            days_left = (expiry - datetime.now(timezone.utc).replace(tzinfo=None)).days
            if days_left > 30:
                results.append(Result(38, "Local SSL cert > 30 days", "PASS",
                                      f"{days_left} days left", "Local Firewall"))
            elif days_left > 0:
                results.append(Result(38, "Local SSL cert > 30 days", "WARN",
                                      f"{days_left} days left (<30)", "Local Firewall"))
            else:
                results.append(Result(38, "Local SSL cert > 30 days", "FAIL", "EXPIRED", "Local Firewall"))
        except (ValueError, IndexError):
            results.append(Result(38, "Local SSL cert > 30 days", "WARN", f"parse error: {out[:60]}", "Local Firewall"))
    else:
        results.append(Result(38, "Local SSL cert > 30 days", "WARN", "could not check", "Local Firewall"))

# ── Phase 9: Local Cron Health ────────────────────────────────────

def phase_local_cron(results):
    """Checks 39-42: Local cron jobs, disk, and database."""
    print_phase("Phase 9 — Local Cron Health (39-42)")

    # 39. daily_database_sync.py ran within 24h
    out, _, rc = local_cmd(
        "find /var/www/html/map_dropbox/logs/ -name 'sync*.log' -mtime -1 2>/dev/null | head -1 || "
        "find /var/log/ -name '*database*sync*' -mtime -1 2>/dev/null | head -1"
    )
    if out:
        results.append(Result(39, "daily_database_sync ran <24h", "PASS", os.path.basename(out), "Local Cron"))
    else:
        # Check map_dropbox cron via syslog
        out2, _, _ = local_cmd("grep -a 'map_dropbox\\|database_sync' /var/log/syslog 2>/dev/null | tail -1")
        if out2:
            results.append(Result(39, "daily_database_sync ran <24h", "WARN", f"last log: {out2[:60]}", "Local Cron"))
        else:
            results.append(Result(39, "daily_database_sync ran <24h", "WARN", "no recent log found", "Local Cron"))

    # 40. process_queue.sh running (security queue)
    out, _, rc = local_cmd("pgrep -f 'process_queue' 2>/dev/null | head -1")
    if out:
        results.append(Result(40, "process_queue.sh running", "PASS", f"PID {out}", "Local Cron"))
    else:
        # Check if it ran recently
        out2, _, _ = local_cmd("grep -a 'process_queue' /var/log/syslog 2>/dev/null | tail -1")
        results.append(Result(40, "process_queue.sh running", "WARN",
                              "not currently running" + (f" — last: {out2[:40]}" if out2 else ""), "Local Cron"))

    # 41. DB backup < 24h old
    out, _, rc = local_cmd(
        "find /var/www/SQL_backups/ -name '*.sql*' -mtime -1 2>/dev/null | head -1 || "
        "ls -lt /var/www/SQL_backups/ 2>/dev/null | head -2 | tail -1"
    )
    if out and rc == 0:
        basename = os.path.basename(out) if "/" in out else out.strip()[:60]
        results.append(Result(41, "DB backup < 24h old", "PASS", basename, "Local Cron"))
    else:
        results.append(Result(41, "DB backup < 24h old", "WARN", "no recent backup found", "Local Cron"))

    # 42. Local disk free > 50GB
    out, _, rc = local_cmd("df -BG / | awk 'NR==2{print $4}'")
    disk_free = out.replace("G", "")
    try:
        gb = float(disk_free)
        if gb >= 50:
            results.append(Result(42, "Local disk free > 50GB", "PASS", f"{gb:.0f}GB", "Local Cron"))
        elif gb >= 20:
            results.append(Result(42, "Local disk free > 50GB", "WARN", f"{gb:.0f}GB (<50GB)", "Local Cron"))
        else:
            results.append(Result(42, "Local disk free > 50GB", "FAIL", f"{gb:.0f}GB (<20GB)", "Local Cron"))
    except ValueError:
        results.append(Result(42, "Local disk free > 50GB", "WARN", f"parse error: {disk_free}", "Local Cron"))

# ── Phase 10: Cross-System Connectivity ───────────────────────────

def phase_cross_system(results):
    """Checks 43-44: Cross-server connectivity."""
    print_phase("Phase 10 — Cross-System Connectivity (43-44)")

    # 43. Scheduler login page responds
    try:
        resp = requests.get(REMOTE_SCHEDULER_URL, timeout=15, verify=True)
        if resp.status_code == 200:
            results.append(Result(43, "Scheduler login page responds", "PASS",
                                  f"HTTP {resp.status_code}", "Cross-System"))
        else:
            results.append(Result(43, "Scheduler login page responds", "WARN",
                                  f"HTTP {resp.status_code}", "Cross-System"))
    except requests.RequestException as e:
        results.append(Result(43, "Scheduler login page responds", "FAIL",
                              str(e)[:80], "Cross-System"))

    # 44. Photo API health (POST to upload.php, expect JSON — no actual upload)
    try:
        resp = requests.post(f"{REMOTE_API_BASE}/upload.php",
                             json={"auth_token": AUTH_TOKEN},
                             timeout=15, verify=True)
        try:
            data = resp.json()
            results.append(Result(44, "Photo API responds (JSON)", "PASS",
                                  f"HTTP {resp.status_code}", "Cross-System"))
        except ValueError:
            results.append(Result(44, "Photo API responds (JSON)", "WARN",
                                  f"HTTP {resp.status_code}, not JSON", "Cross-System"))
    except requests.RequestException as e:
        results.append(Result(44, "Photo API responds (JSON)", "FAIL",
                              str(e)[:80], "Cross-System"))

# ── Phase 11: aei-webserv2 Health ─────────────────────────────────

def phase_webserv2_health(results):
    """Checks 45-51: aei-webserv2 (upload.aeihawaii.com / 192.168.141.219)."""
    print_phase("Phase 11 — aei-webserv2 Health (45-51)")

    # Batch service + file checks in one SSH call
    cmd = (
        "echo '===APACHE==='; "
        "pgrep -c apache2 2>/dev/null || echo 0; "
        "echo '===MYSQL==='; "
        "pgrep -c mysqld 2>/dev/null || pgrep -c mariadbd 2>/dev/null || echo 0; "
        "echo '===UPLOAD_PHP==='; "
        f"[ -f {WEBSERV2_UPLOAD_DIR}/uploadlocallat_kuldeep.php ] && echo 'EXISTS' || echo 'MISSING'; "
        "echo '===DELETE_LOCAL==='; "
        f"[ -f {WEBSERV2_UPLOAD_DIR}/delete_local_photo.php ] && echo 'EXISTS' || echo 'MISSING'; "
        "echo '===CHECK_PHOTOS==='; "
        f"[ -f {WEBSERV2_UPLOAD_DIR}/check_photos.php ] && echo 'EXISTS' || echo 'MISSING'; "
        "echo '===LOCAL_PHOTOS==='; "
        f"mysql -u {WEBSERV2_DB_USER} -p'{WEBSERV2_DB_PASS}' {WEBSERV2_DB_NAME} -N -e 'SELECT COUNT(*) FROM local_photos;' 2>&1; "
        "echo '===DROPBOX==='; "
        "ls /mnt/dropbox/ 2>&1 | head -3 | wc -l"
    )
    out, err, rc = local_server_ssh_cmd(cmd, timeout=20)

    if rc != 0 and ("timed out" in err or "Connection refused" in err or "No route" in err):
        for i in range(45, 52):
            results.append(Result(i, f"webserv2 check {i}", "FAIL",
                                  f"SSH to {LOCAL_SSH_HOST} failed: {err[:60]}", "webserv2 Health"))
        return

    sections = out.split("===")
    def section(name):
        for i, s in enumerate(sections):
            if s.strip() == name and i + 1 < len(sections):
                return sections[i + 1].strip()
        return ""

    # 45. Apache running on aei-webserv2
    count = section("APACHE")
    ok = count.isdigit() and int(count) > 0
    results.append(Result(45, "Apache running (webserv2)", "PASS" if ok else "FAIL",
                          f"{count} processes" if ok else "not running", "webserv2 Health"))

    # 46. MySQL running on aei-webserv2
    count = section("MYSQL")
    ok = count.isdigit() and int(count) > 0
    results.append(Result(46, "MySQL running (webserv2)", "PASS" if ok else "FAIL",
                          f"{count} processes" if ok else "not running", "webserv2 Health"))

    # 47. uploadlocallat_kuldeep.php exists
    status = section("UPLOAD_PHP")
    ok = status == "EXISTS"
    results.append(Result(47, "uploadlocallat_kuldeep.php exists", "PASS" if ok else "FAIL",
                          "present" if ok else "MISSING", "webserv2 Health"))

    # 48. delete_local_photo.php exists
    status = section("DELETE_LOCAL")
    ok = status == "EXISTS"
    results.append(Result(48, "delete_local_photo.php exists", "PASS" if ok else "FAIL",
                          "present" if ok else "MISSING", "webserv2 Health"))

    # 49. check_photos.php exists
    status = section("CHECK_PHOTOS")
    ok = status == "EXISTS"
    results.append(Result(49, "check_photos.php exists", "PASS" if ok else "FAIL",
                          "present" if ok else "MISSING", "webserv2 Health"))

    # 50. local_photos table accessible
    lp = section("LOCAL_PHOTOS")
    try:
        count = int(lp)
        results.append(Result(50, "local_photos table accessible", "PASS", f"{count:,} rows", "webserv2 Health"))
    except ValueError:
        results.append(Result(50, "local_photos table accessible", "FAIL", lp[:80], "webserv2 Health"))

    # 51. /mnt/dropbox accessible from aei-webserv2
    dropbox = section("DROPBOX")
    try:
        count = int(dropbox)
        ok = count > 0
    except ValueError:
        ok = False
    results.append(Result(51, "/mnt/dropbox accessible (webserv2)", "PASS" if ok else "FAIL",
                          "mounted and readable" if ok else "not accessible", "webserv2 Health"))

# ── Main ──────────────────────────────────────────────────────────

def main():
    parser = argparse.ArgumentParser(description="AEI System Operational Audit")
    parser.add_argument("--remote-only", action="store_true", help="Skip local checks (phases 7-9, 11)")
    parser.add_argument("--local-only", action="store_true", help="Skip remote checks (phases 1-6)")
    parser.add_argument("--json", action="store_true", help="Output JSON instead of text")
    parser.add_argument("--warn-only", action="store_true", help="Only show WARN and FAIL results")
    args = parser.parse_args()

    global _quiet_mode
    _quiet_mode = args.json

    results = []
    start_time = time.time()

    if not args.json:
        print(f"\n{Colors.BOLD}AEI System Operational Audit{Colors.RESET}")
        print(f"{'─' * 60}")
        print(f"  Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        if args.remote_only:
            print(f"  Mode: Remote only")
        elif args.local_only:
            print(f"  Mode: Local only")
        else:
            print(f"  Mode: Full audit (both servers)")

    # Remote phases (1-6)
    if not args.local_only:
        phase_remote_services(results)
        if not args.json:
            for r in results[-5:]:
                print_result(r, args.warn_only)

        phase_remote_cron(results)
        if not args.json:
            for r in results[-6:]:
                print_result(r, args.warn_only)

        phase_remote_disk(results)
        if not args.json:
            for r in results[-4:]:
                print_result(r, args.warn_only)

        phase_remote_security(results)
        if not args.json:
            for r in results[-4:]:
                print_result(r, args.warn_only)

        phase_remote_photo_api(results)
        if not args.json:
            for r in results[-6:]:
                print_result(r, args.warn_only)

        phase_remote_database(results)
        if not args.json:
            for r in results[-3:]:
                print_result(r, args.warn_only)

    # Local phases (7-9)
    if not args.remote_only:
        phase_local_services(results)
        if not args.json:
            for r in results[-5:]:
                print_result(r, args.warn_only)

        phase_local_firewall(results)
        if not args.json:
            for r in results[-5:]:
                print_result(r, args.warn_only)

        phase_local_cron(results)
        if not args.json:
            for r in results[-4:]:
                print_result(r, args.warn_only)

    # Cross-system (always runs unless single-server mode)
    if not args.remote_only and not args.local_only:
        phase_cross_system(results)
        if not args.json:
            for r in results[-2:]:
                print_result(r, args.warn_only)

    # aei-webserv2 (runs with local or full audit)
    if not args.remote_only:
        phase_webserv2_health(results)
        if not args.json:
            for r in results[-7:]:
                print_result(r, args.warn_only)

    elapsed = time.time() - start_time

    # Summary
    pass_count = sum(1 for r in results if r.status == "PASS")
    warn_count = sum(1 for r in results if r.status == "WARN")
    fail_count = sum(1 for r in results if r.status == "FAIL")
    skip_count = sum(1 for r in results if r.status == "SKIP")
    total = len(results)

    if args.json:
        output = {
            "timestamp": datetime.now().isoformat(),
            "elapsed_seconds": round(elapsed, 1),
            "summary": {
                "total": total,
                "pass": pass_count,
                "warn": warn_count,
                "fail": fail_count,
                "skip": skip_count,
            },
            "results": [r.to_dict() for r in results],
        }
        print(json.dumps(output, indent=2))
    else:
        print(f"\n{'─' * 60}")
        print(f"  {Colors.BOLD}Summary:{Colors.RESET} {total} checks in {elapsed:.1f}s")
        print(f"    {Colors.PASS}PASS: {pass_count}{Colors.RESET}  "
              f"{Colors.WARN}WARN: {warn_count}{Colors.RESET}  "
              f"{Colors.FAIL}FAIL: {fail_count}{Colors.RESET}"
              + (f"  SKIP: {skip_count}" if skip_count else ""))

        if fail_count > 0:
            print(f"\n  {Colors.FAIL}Failed checks:{Colors.RESET}")
            for r in results:
                if r.status == "FAIL":
                    print(f"    {r.step:>2}. {r.name} — {r.detail}")

        if warn_count > 0 and not args.warn_only:
            print(f"\n  {Colors.WARN}Warnings:{Colors.RESET}")
            for r in results:
                if r.status == "WARN":
                    print(f"    {r.step:>2}. {r.name} — {r.detail}")

        print()

    sys.exit(1 if fail_count > 0 else 0)

if __name__ == "__main__":
    main()
