#!/usr/bin/env python3
"""
SCH-033: Eliminate All /dev/scheduler/ References
Applies all replacements and creates original/modified backups.
"""
import os
import shutil
import re

BASE = '/var/www/html/dev_scheduler/SCHEDULER'
ENH_DIR = '/var/www/html/dev_scheduler/ENHANCEMENTS/SCH-033_dev_scheduler_cleanup'
ORIG_DIR = os.path.join(ENH_DIR, 'original')
MOD_DIR = os.path.join(ENH_DIR, 'modified')

# Track stats
stats = {'files_backed_up': 0, 'files_modified': 0, 'replacements': 0, 'skipped_backup': 0}

def rel(path):
    """Get path relative to SCHEDULER/"""
    return os.path.relpath(path, BASE)

def backup_original(filepath):
    """Copy file to original/ preserving directory structure."""
    relpath = rel(filepath)
    dest = os.path.join(ORIG_DIR, relpath)
    os.makedirs(os.path.dirname(dest), exist_ok=True)
    if not os.path.exists(dest):
        shutil.copy2(filepath, dest)
        stats['files_backed_up'] += 1
    else:
        stats['skipped_backup'] += 1

def save_modified(filepath):
    """Copy file to modified/ preserving directory structure."""
    relpath = rel(filepath)
    dest = os.path.join(MOD_DIR, relpath)
    os.makedirs(os.path.dirname(dest), exist_ok=True)
    shutil.copy2(filepath, dest)

def process_file(filepath, replacements):
    """Apply replacements to a file. Returns number of replacements made."""
    try:
        with open(filepath, 'r', errors='replace') as f:
            content = f.read()
    except Exception as e:
        print(f"  ERROR reading {filepath}: {e}")
        return 0

    original_content = content
    total_replacements = 0

    for old, new in replacements:
        count = content.count(old)
        if count > 0:
            content = content.replace(old, new)
            total_replacements += count

    if total_replacements > 0:
        backup_original(filepath)
        with open(filepath, 'w') as f:
            f.write(content)
        save_modified(filepath)
        stats['files_modified'] += 1
        stats['replacements'] += total_replacements
        print(f"  Modified: {rel(filepath)} ({total_replacements} replacements)")

    return total_replacements


def find_files(directory, extensions, exclude_patterns=None):
    """Find files with given extensions, excluding backup controller dirs and _svn."""
    exclude_patterns = exclude_patterns or []
    # Always exclude backup/dated controller files and _svn dirs
    exclude_dirs = {'_svn', '.svn'}

    results = []
    for root, dirs, files in os.walk(directory):
        # Skip excluded directories
        dirs[:] = [d for d in dirs if d not in exclude_dirs]

        for f in files:
            if not any(f.endswith(ext) for ext in extensions):
                continue
            filepath = os.path.join(root, f)

            # Check exclude patterns
            skip = False
            for pattern in exclude_patterns:
                if pattern in filepath:
                    skip = True
                    break
            if not skip:
                results.append(filepath)
    return sorted(results)


def is_backup_controller(filename):
    """Check if a controller is a dated backup file."""
    basename = os.path.basename(filename)
    # Files with dates in them like admin_24march2014.php, proposal_13dec2013.php, etc.
    # Also files ending in _bak.php
    import re
    if re.search(r'\d{1,2}(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|april|june|july|march|sept)\d{2,4}', basename, re.I):
        return True
    if re.search(r'_\d{1,2}(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d{2,4}', basename, re.I):
        return True
    if basename.endswith('_bak.php'):
        return True
    # proposal-20-04-2013.php
    if re.search(r'-\d{2}-\d{2}-\d{4}', basename):
        return True
    return False


def main():
    os.makedirs(ORIG_DIR, exist_ok=True)
    os.makedirs(MOD_DIR, exist_ok=True)

    # =========================================================================
    # PATTERN A: Filesystem paths in PHP files (controllers + admin.php + views)
    # /var/www/vhosts/aeihawaii.com/httpdocs/dev/scheduler/ -> FCPATH . '
    # =========================================================================
    print("=" * 70)
    print("STEP 1: PHP filesystem path replacements")
    print("=" * 70)

    # The key replacement: filesystem path -> FCPATH concatenation
    # In PHP context: "/var/www/vhosts/aeihawaii.com/httpdocs/dev/scheduler/pdftemp/..."
    # becomes: "' . FCPATH . 'pdftemp/..."
    # Then we clean up: "" . FCPATH . " -> FCPATH . "
    #                    . FCPATH . "" -> . FCPATH

    php_fs_replacement = (
        '/var/www/vhosts/aeihawaii.com/httpdocs/dev/scheduler/',
        "' . FCPATH . '"
    )

    # Find all PHP files with filesystem path pattern
    php_files_with_fs = []
    for root, dirs, files in os.walk(BASE):
        dirs[:] = [d for d in dirs if d not in {'_svn', '.svn'}]
        for f in files:
            if f.endswith('.php'):
                filepath = os.path.join(root, f)
                try:
                    with open(filepath, 'r', errors='replace') as fh:
                        if '/var/www/vhosts/aeihawaii.com/httpdocs/dev/scheduler/' in fh.read():
                            php_files_with_fs.append(filepath)
                except:
                    pass

    active_php_count = 0
    backup_php_count = 0

    for filepath in sorted(php_files_with_fs):
        basename = os.path.basename(filepath)
        is_backup = is_backup_controller(basename)

        if is_backup:
            backup_php_count += 1
        else:
            active_php_count += 1

        # Process ALL files (active + backup) - they all need fixing
        try:
            with open(filepath, 'r', errors='replace') as f:
                content = f.read()
        except:
            continue

        original_content = content
        count = content.count('/var/www/vhosts/aeihawaii.com/httpdocs/dev/scheduler/')

        if count == 0:
            continue

        # Apply the main replacement
        content = content.replace(
            '/var/www/vhosts/aeihawaii.com/httpdocs/dev/scheduler/',
            "' . FCPATH . '"
        )

        # Clean up string concatenation artifacts:
        # 1. "' . FCPATH . '" at start of string -> FCPATH . '
        #    Pattern: "' . FCPATH . ' -> FCPATH . '  (opening quote + replacement)
        content = content.replace('"\' . FCPATH . \'', 'FCPATH . \'')

        # 2. At end of string: ...FCPATH . '") -> ...FCPATH)  -- NOT common, but handle:
        #    ' . FCPATH . '"  should become . FCPATH (the closing quote was the end)
        #    Actually let's think more carefully...

        # The pattern in code is like:
        #   fopen("/var/www/.../dev/scheduler/pdftemp/temp$t.html", "w+")
        # After replacement:
        #   fopen("' . FCPATH . 'pdftemp/temp$t.html", "w+")
        # We fix opening:
        #   fopen(FCPATH . 'pdftemp/temp$t.html", "w+")
        # The closing is fine - it ends like: ...temp$t.html", "w+")
        # Wait - the variable interpolation means these use double-quoted strings.

        # Let's re-examine. The original code uses double-quoted strings with variables:
        #   "/var/www/.../dev/scheduler/pdftemp/temp$t-$id.html"
        # After our replacement:
        #   "' . FCPATH . 'pdftemp/temp$t-$id.html"
        # After opening-quote cleanup:
        #   FCPATH . 'pdftemp/temp$t-$id.html"
        #
        # This has mismatched quotes! The original was a double-quoted string with
        # variable interpolation. We need to use double quotes for the FCPATH part:
        #   FCPATH . "pdftemp/temp$t-$id.html"

        # Actually, looking more carefully at the patterns:
        # fopen("/var/www/.../pdftemp/temp$t-$id.html","w+")
        # The variable $t is interpolated in double-quoted string.
        #
        # With FCPATH:
        # fopen(FCPATH . "pdftemp/temp$t-$id.html","w+")
        # This works! Variables still interpolate in double-quoted string.

        # So the cleanup needs to be:
        # Step 1: "' . FCPATH . ' -> FCPATH . "  (replace opening double-quote + single-quote noise)
        # But wait, there are also single-quoted originals:
        # '/var/www/.../uploads/'.$data['file_name']
        # becomes: '' . FCPATH . 'uploads/'.$data['file_name']
        # cleanup: FCPATH . 'uploads/'.$data['file_name']   ← perfect!

        # Let me handle both quote styles:

        # For double-quoted strings (most common in these files):
        # "' . FCPATH . '...stuff..." -> FCPATH . "...stuff..."
        # Already did: "' . FCPATH . ' -> FCPATH . '
        # But this leaves single quotes inside what was a double-quoted string.
        # We need: FCPATH . 'pdftemp/temp$t-$id.html"
        # The closing " needs to become '
        #
        # Hmm, this is getting complex. Let me take a different approach.
        # Instead of the naive replacement, let me do a smarter one.

        # Reset and redo
        content = original_content

        # Strategy: Replace the path in context, handling each quote style.
        # For double-quoted strings: "..PATH..rest" -> FCPATH . "rest"
        # For single-quoted strings: '..PATH..rest' -> FCPATH . 'rest'
        # For concatenated strings: '..PATH..' . $var -> FCPATH . $var

        # Simpler approach: Just replace the path literal, then fix quote issues
        # The path always appears as a string literal prefix.

        # Pattern: "/var/www/.../dev/scheduler/xxx" -> FCPATH . "xxx"
        # Pattern: '/var/www/.../dev/scheduler/xxx' -> FCPATH . 'xxx'
        # Pattern: "/var/www/.../dev/scheduler/xxx -> FCPATH . "xxx  (in exec() mid-string)

        # Use regex to handle this properly
        def replace_fs_path(match):
            """Replace filesystem path, handling surrounding quotes."""
            return "' . FCPATH . '"

        # Actually, let me just do the simple replacement and then clean up artifacts.
        content = content.replace(
            '/var/www/vhosts/aeihawaii.com/httpdocs/dev/scheduler/',
            "' . FCPATH . '"
        )

        # Now clean up the quote artifacts:
        # 1. Double-quoted string start: "' . FCPATH . ' -> FCPATH . '
        content = content.replace('"' + "' . FCPATH . '", "FCPATH . '")

        # 2. Single-quoted string start: '' . FCPATH . ' -> FCPATH . '
        content = content.replace("'' . FCPATH . '", "FCPATH . '")

        # 3. Closing double quote after single-quoted FCPATH concat:
        #    FCPATH . 'pdftemp/temp$t-$id.html"  (mismatched quotes)
        #    This happens when original was: "/var/.../pdftemp/temp$t-$id.html"
        #    We need to find these and fix the closing quote.
        #    Actually, with the cleanup in step 1, we get:
        #    FCPATH . 'pdftemp/temp$t-$id.html"
        #    We should change the single quote to double: FCPATH . "pdftemp/temp$t-$id.html"
        #
        #    Better approach: after step 1, change FCPATH . ' to FCPATH . "
        #    when the original context was double-quoted.
        #
        #    This is getting too complex for simple string replacement.
        #    Let me use a different strategy entirely.

        # RESET AGAIN - use a regex-based approach
        content = original_content

        DEV_PATH = '/var/www/vhosts/aeihawaii.com/httpdocs/dev/scheduler/'

        # Approach: Find each occurrence with surrounding context and fix appropriately
        # Most patterns are one of:
        # 1. "...DEV_PATH...rest..."  (double-quoted, possibly with $vars)
        # 2. '...DEV_PATH...' . $var  (single-quoted concat)
        # 3. DEV_PATH appears mid-string in exec() calls with multiple paths

        # The safest universal approach: replace DEV_PATH with FCPATH literal,
        # handling quote boundaries.

        # Let's find positions and replace with context awareness
        result_parts = []
        pos = 0
        replacements_done = 0

        while True:
            idx = content.find(DEV_PATH, pos)
            if idx == -1:
                result_parts.append(content[pos:])
                break

            # Add content before this match
            result_parts.append(content[pos:idx])

            # Look at preceding character to determine quote context
            # and the character right before the path
            pre_char = content[idx-1] if idx > 0 else ''

            # Look at what comes after the path replacement
            after_idx = idx + len(DEV_PATH)

            if pre_char == '"':
                # Double-quoted string: "DEV_PATH...
                # Remove the opening " and emit: FCPATH . "
                # (we need to pop the last char which was the ")
                if result_parts[-1] and result_parts[-1][-1] == '"':
                    result_parts[-1] = result_parts[-1][:-1]
                result_parts.append('FCPATH . "')
            elif pre_char == "'":
                # Single-quoted string: 'DEV_PATH...
                # Remove the opening ' and emit: FCPATH . '
                if result_parts[-1] and result_parts[-1][-1] == "'":
                    result_parts[-1] = result_parts[-1][:-1]
                result_parts.append("FCPATH . '")
            elif pre_char == '=':
                # Assignment: =DEV_PATH (in shell script or similar)
                result_parts.append("FCPATH . '")
            else:
                # Mid-string (inside an exec() call or similar)
                # e.g.: exec("... DEV_PATH/pdftemp/... DEV_PATH/pdftemp/...")
                # The path appears mid-string, so we need to break out and back:
                # ... " . FCPATH . "pdftemp/...

                # Find what quote type we're inside
                # Look backwards for the opening quote
                preceding = content[max(0,idx-200):idx]

                # Count unescaped quotes to determine if we're in " or '
                in_double = False
                in_single = False
                for c in preceding:
                    if c == '"' and not in_single:
                        in_double = not in_double
                    elif c == "'" and not in_double:
                        in_single = not in_single

                if in_double:
                    result_parts.append('" . FCPATH . "')
                elif in_single:
                    result_parts.append("' . FCPATH . '")
                else:
                    # Not in a string? Just use FCPATH directly
                    result_parts.append('FCPATH . "')

            replacements_done += 1
            pos = after_idx

        content = ''.join(result_parts)

        if content != original_content:
            backup_original(filepath)
            with open(filepath, 'w') as f:
                f.write(content)
            save_modified(filepath)
            stats['files_modified'] += 1
            stats['replacements'] += replacements_done
            tag = " [BACKUP]" if is_backup else ""
            print(f"  Modified: {rel(filepath)} ({replacements_done} replacements){tag}")

    print(f"\n  Active PHP files: {active_php_count}, Backup PHP files: {backup_php_count}")

    # =========================================================================
    # PATTERN B: HTTP URLs in PHP view files
    # http://aeihawaii.com/dev/scheduler/ -> <?=base_url()?>
    # =========================================================================
    print("\n" + "=" * 70)
    print("STEP 2: HTTP URL replacements in PHP files")
    print("=" * 70)

    http_replacement = (
        'http://aeihawaii.com/dev/scheduler/',
        '<?=base_url()?>'
    )

    php_files = find_files(BASE, ['.php'], exclude_patterns=['_svn/', '.svn/'])

    http_count = 0
    for filepath in php_files:
        count = process_file(filepath, [http_replacement])
        http_count += count

    print(f"\n  HTTP URL replacements: {http_count}")

    # =========================================================================
    # PATTERN C: CSS files - use relative paths
    # =========================================================================
    print("\n" + "=" * 70)
    print("STEP 3: CSS file replacements")
    print("=" * 70)

    # lightbox.css
    css_lightbox = os.path.join(BASE, 'assets/new_phototab/css/lightbox.css')
    if os.path.exists(css_lightbox):
        process_file(css_lightbox, [
            ('http://aeihawaii.com/dev/scheduler/assets/new_phototab/img/', '../img/'),
            ('http://aeihawaii.com/dev/scheduler/assets/image/', '../../image/'),
        ])

    # style.css
    css_style = os.path.join(BASE, 'assets/new_phototab/css/style.css')
    if os.path.exists(css_style):
        process_file(css_style, [
            ('http://aeihawaii.com/dev/scheduler/assets/new_phototab/img/', '../img/'),
        ])

    # default-skin.css
    css_default = os.path.join(BASE, 'assets/new_phototab/css/default-skin.css')
    if os.path.exists(css_default):
        process_file(css_default, [
            ('http://aeihawaii.com/dev/scheduler/assets/new_phototab/img/', '../img/'),
        ])

    # =========================================================================
    # PATTERN D: JS files - use /scheduler/ paths
    # =========================================================================
    print("\n" + "=" * 70)
    print("STEP 4: JS file replacements")
    print("=" * 70)

    js_replacement = ('http://aeihawaii.com/dev/scheduler/', '/scheduler/')

    js_files = [
        os.path.join(BASE, 'assets/js/web.js'),
        os.path.join(BASE, 'assets/js/multiupload.js'),
        os.path.join(BASE, 'assets/image-gallery/js/web.js'),
        os.path.join(BASE, 'assets/new_phototab/js/multiupload.js'),
        os.path.join(BASE, 'assets/new_phototab1/js/multiupload.js'),
        os.path.join(BASE, 'system/application/views/admin/multiupload.js'),
    ]

    for filepath in js_files:
        if os.path.exists(filepath):
            process_file(filepath, [js_replacement])

    # =========================================================================
    # STEP 5: Shell script - update paths
    # =========================================================================
    print("\n" + "=" * 70)
    print("STEP 5: Shell script replacements")
    print("=" * 70)

    sh_file = os.path.join(BASE, 'scripts/cleanup_pdftemp.sh')
    if os.path.exists(sh_file):
        process_file(sh_file, [
            ('/var/www/vhosts/aeihawaii.com/httpdocs/dev/scheduler/',
             '/var/www/vhosts/aeihawaii.com/httpdocs/scheduler/'),
        ])

    # =========================================================================
    # SUMMARY
    # =========================================================================
    print("\n" + "=" * 70)
    print("SUMMARY")
    print("=" * 70)
    print(f"  Files backed up:  {stats['files_backed_up']}")
    print(f"  Files modified:   {stats['files_modified']}")
    print(f"  Total replacements: {stats['replacements']}")
    print(f"  Skipped backups (already existed): {stats['skipped_backup']}")


if __name__ == '__main__':
    main()
