#!/bin/bash
# cleanup_pdftemp.sh - Clean up old PDF temp files
#
# MAINT-001: PDF Temp Files Cleanup
# Created: 2026-01-07
#
# Purpose: Automatically remove old temporary HTML and PDF files
#          from the pdftemp directories to prevent disk space issues.
#
# Schedule: Run daily at 2 AM via cron
# Crontab:  0 2 * * * /var/www/vhosts/aeihawaii.com/httpdocs/scheduler/scripts/cleanup_pdftemp.sh
#
# Retention Policy:
#   - HTML files: Delete after 1 day (intermediate files only)
#   - PDF files: Delete after 7 days (keep recent for potential re-download)

set -e

# Configuration
PDFTEMP_DIR="/var/www/vhosts/aeihawaii.com/httpdocs/scheduler/pdftemp"
ESIGN_PDFTEMP="/var/www/vhosts/aeihawaii.com/httpdocs/esign/upload/pdftemp"
TEMPPDF_DIR="/var/www/vhosts/aeihawaii.com/httpdocs/scheduler/temppdf"
LOG_FILE="/var/www/vhosts/aeihawaii.com/httpdocs/scheduler/logs/pdftemp_cleanup.log"

# Retention periods (in days)
HTML_RETENTION=1
PDF_RETENTION=7

# Function to log messages
log_message() {
    echo "$(date '+%Y-%m-%d %H:%M:%S'): $1" >> "$LOG_FILE"
}

# Function to get file count
get_count() {
    local dir="$1"
    if [ -d "$dir" ]; then
        ls -1 "$dir" 2>/dev/null | wc -l
    else
        echo "0"
    fi
}

# Function to get directory size
get_size() {
    local dir="$1"
    if [ -d "$dir" ]; then
        du -sh "$dir" 2>/dev/null | cut -f1
    else
        echo "0"
    fi
}

# Start cleanup
log_message "=========================================="
log_message "Starting pdftemp cleanup"

# Process main pdftemp directory
if [ -d "$PDFTEMP_DIR" ]; then
    BEFORE_COUNT=$(get_count "$PDFTEMP_DIR")
    BEFORE_SIZE=$(get_size "$PDFTEMP_DIR")

    log_message "Main pdftemp - Before: $BEFORE_COUNT files, $BEFORE_SIZE"

    # Delete old HTML files
    HTML_DELETED=$(find "$PDFTEMP_DIR" -name "*.html" -type f -mtime +$HTML_RETENTION -delete -print 2>/dev/null | wc -l)

    # Delete old PDF files
    PDF_DELETED=$(find "$PDFTEMP_DIR" -name "*.pdf" -type f -mtime +$PDF_RETENTION -delete -print 2>/dev/null | wc -l)

    AFTER_COUNT=$(get_count "$PDFTEMP_DIR")
    AFTER_SIZE=$(get_size "$PDFTEMP_DIR")

    log_message "Main pdftemp - Deleted: $HTML_DELETED HTML, $PDF_DELETED PDF"
    log_message "Main pdftemp - After: $AFTER_COUNT files, $AFTER_SIZE"
else
    log_message "Main pdftemp directory not found: $PDFTEMP_DIR"
fi

# Process esign pdftemp directory
if [ -d "$ESIGN_PDFTEMP" ]; then
    BEFORE_COUNT=$(get_count "$ESIGN_PDFTEMP")

    HTML_DELETED=$(find "$ESIGN_PDFTEMP" -name "*.html" -type f -mtime +$HTML_RETENTION -delete -print 2>/dev/null | wc -l)
    PDF_DELETED=$(find "$ESIGN_PDFTEMP" -name "*.pdf" -type f -mtime +$PDF_RETENTION -delete -print 2>/dev/null | wc -l)

    AFTER_COUNT=$(get_count "$ESIGN_PDFTEMP")

    log_message "Esign pdftemp - Before: $BEFORE_COUNT, After: $AFTER_COUNT (Deleted: $HTML_DELETED HTML, $PDF_DELETED PDF)"
else
    log_message "Esign pdftemp directory not found: $ESIGN_PDFTEMP"
fi

# Process temppdf directory
if [ -d "$TEMPPDF_DIR" ]; then
    BEFORE_COUNT=$(get_count "$TEMPPDF_DIR")

    HTML_DELETED=$(find "$TEMPPDF_DIR" -name "*.html" -type f -mtime +$HTML_RETENTION -delete -print 2>/dev/null | wc -l)
    PDF_DELETED=$(find "$TEMPPDF_DIR" -name "*.pdf" -type f -mtime +$PDF_RETENTION -delete -print 2>/dev/null | wc -l)

    AFTER_COUNT=$(get_count "$TEMPPDF_DIR")

    log_message "Temppdf - Before: $BEFORE_COUNT, After: $AFTER_COUNT (Deleted: $HTML_DELETED HTML, $PDF_DELETED PDF)"
else
    log_message "Temppdf directory not found: $TEMPPDF_DIR"
fi

log_message "Cleanup complete"
log_message "=========================================="

exit 0
