
    LhiK                         d Z ddlZddlZddlZddlZddlZdddddZ ej                  d      Z	d1d	Z
d
 Zi dddddddddddddddddddddd d!d"d#d$d%d%d&d&d'd'Zh d(Zd) Zd* Zd+ Zd, Zd- Zd. Zd/ Zed0k(  r e        yy)2a  
DPP Permit Match - Link SF permits to AEI jobs via TMK and address matching.

This script matches permit_sync records (from Salesforce) to AEI jobs/customers
by trying:
  1. TMK match (normalized 8-digit TMK comparison)
  2. Address match (normalized SF address vs normalized AEI address)

When a match is found:
  - Sets permit_sync.job_id to the matched job
  - If the AEI customer is missing TMK but the SF permit has it,
    backfills customers.cust_tmk

Usage:
  python3 dpp_permit_match.py                  # Dry run (default)
  python3 dpp_permit_match.py --apply          # Apply matches to DB
  python3 dpp_permit_match.py --verbose        # Detailed logging
  python3 dpp_permit_match.py --limit 10       # Process first N unmatched
    Nz18.225.0.90AEI_UserzP@55w02d7777mandhdesign_schedular)hostuserpassworddatabase	dpp_matchc                 |    | rt         j                  nt         j                  }d}t        j                  ||d       y )Nz'%(asctime)s [%(levelname)s] %(message)sz%Y-%m-%d %H:%M:%S)levelformatdatefmt)loggingDEBUGINFObasicConfig)verboser   fmts      ;/var/www/html/AEI_REMOTE/DPP_SALESFORCE/dpp_permit_match.pysetup_loggingr   +   s+    $GMM',,E
3CeC9LM    c                 \   | sy| j                         } | syt        j                  d|       d   j                         } | j                  dd      }t        j                  dd|      }|syt        |      dk(  r|S t        |      dkD  r|dd S t        |      dk\  r|j                  dd	      S y)
uW  Extract the core 8-digit TMK from various formats.

    AEI TMK formats seen:
      '29066059'          → 29066059  (already 8 digits)
      '29066059:0000'     → 29066059  (8 digits + colon + condo unit)
      '440130330008'      → 44013033  (first 8 digits, trailing = unit)
      ' 94149095'         → 94149095  (leading spaces)
      '91092053  '        → 91092053  (trailing spaces)
      '1-1-1-038-071-0000'→ 11038071  (dashed format, strip leading zone/section)
      '9-4-007:085 (0022)'→ 94007085  (dashed + colon + parens)

    SF TMKs are always clean 8-digit strings.
     z[:\s(]r   -z[^0-9]   N   0)stripresplitreplacesublenljust)tmkdigits_onlys     r   normalize_tmkr&   5   s     
))+C ((9c
"1
%
+
+
-C ++c2&K &&B4K ;1 ;!2A ;1  C((r   STREETSTAVENUEAVEDRIVEDRROADRDLANELNCOURTCTPLACEPL	BOULEVARDBLVDCIRCLECIRPARKWAYPKWYHIGHWAYHWYTERRACETERTRAILTRLWAYLOOPPATH>7   ST LOUIS	EWA BEACH	SALT LAKE
HAWAII KAI
PEARL CITYROYAL KUNIAOCEAN POINTEVILLAGE PARKFOSTER VILLAGEWAIALAE KAHALAPACIFIC PALISADESEWAAIEALAIEHEEIAKUNIAMANOAHAUULAKAAAWAKAHALAKAHUKUKAILUAMAKAHAMAKIKIPALOLOWAIPIOHALEIWAHOOPILIKAHALUUKAIMUKIKANEOHEKAPOLEINEWTOWNWAHIAWAWAIALAEWAIALUAWAIANAEWAIKELEWAIKIKIWAIPAHUWHEELERAHUIMANUDOWNTOWNHONOLULUIROQUOISKALAELOAKAPAHULUMAKAKILOMILILANIMOANALUAMOILIILINANAKULI	CRESTVIEW	SCHOFIELD	WAIMANALOc                    | sy| j                         j                         } t        j                  dd|       } t        j                  dd|       } t        j                  dd|       } t        j                  dd|       } t        j                  dd	|       } t        j                  d
d|       } t        j                  dd|       } t        j                         D ]"  \  }}t        j                  d|z   dz   ||       } $ t        j                         D ]  }t        j                  d|z   dz   ||       } ! t        j                  dd|       } t        j                  dd|       j                         } | S )z>Normalize an address for comparison. Returns uppercase string.r   ^\{[^}]*\}\s*(\([^)]*\)\s*)*z\s*\([^)]*\)\s* z-\s+(APT|UNIT|STE|SUITE|#|RM|ROOM)\s*\.?\s*\S*z\bEAST\bEz\bWEST\bWz	\bNORTH\bNz	\bSOUTH\bSz\bz\b\.?z\.z[.,\'#]z\s+)r   upperr   r!   STREET_ABBREVSitemsvalues)addr	long_form
short_forms      r   normalize_addressr      sN   ::<D 6612t<D 66$c40D 66BBMD 66+sD)D66+sD)D66,T*D66,T*D "0!5!5!7 F	:vvei'(2JEF %++- D
vvej(50*dCD 66*b$'D66&#t$**,DKr   c                 h    | s| S d| v r)| j                  d      }dj                  d |D              S | S )u   Normalize a street number by stripping leading zeros from dash parts.

    "45-0248" → "45-248", "91-1501" → "91-1501", "503" → "503"
    r   c              3   F   K   | ]  }|j                  d       xs d   yw)r   N)lstrip).0ps     r   	<genexpr>z*normalize_street_number.<locals>.<genexpr>   s      <,,<s   !)r   join)numpartss     r   normalize_street_numberr      s:    
 

cz		#xx<e<<<Jr   c                     | sy| j                         } t        j                  d|       }|rt        |j	                  d            S y)u   Extract leading street number, including dash-prefixed Hawaiian addresses.

    Examples:
      "503 ANOLANI ST"      → "503"
      "91-1501 HALAHUA ST"  → "91-1501"
      "2578-C PACIFIC HTS"  → "2578"
      "45-0248A PAHIKAUA"   → "45-248"
    r   z^(\d+(?:-\d+)?)   )r   r   matchr   group)r   ms     r   extract_street_numberr      s?     ::<D
#T*A&qwwqz22r   c                 X   | s| S | j                         }t        t        t        d      D ]C  }|j	                  d|z         s| dt        |       t        |      z
  dz
   j                         c S  | j                         }t        |      dk\  r|d   j                         j                  d      }|d	   j                         j                  d      }t        t        j                               t        t        j                               z  }||v r||vrdj                  |dd       S | S )
zRemove city name appended to SF street addresses.

    SF format: "503 ANOLANI ST Honolulu" or "91-1501 HALAHUA ST Kapolei"
    We want: "503 ANOLANI ST" or "91-1501 HALAHUA ST"
    T)keyreverser}   Nr      .)r   sortedHAWAII_CITIESr"   endswithr   r   rstripsetr   r   keysr   )streetr   citywordslastsecond_lastall_abbrevss          r   strip_city_from_sf_streetr      s	    LLNE }#t< @>>#*%63v;T2Q67==??@ LLNE
5zQRy '',Bioo'..s3.//12S9L9L9N5OO+%$k*A88E#2J''Mr   c                 \   | sg S t        j                  d| j                         j                               }|rs|j	                  d      j                         }|rPt        |d         dk(  r?|d   j                         r,|dd }|r%t        |d         dk(  r|d   j                         r,|S g S )uz  Extract the street name words (after the number) from a normalized address.

    "91-1501 HALAHUA ST"     → ["HALAHUA", "ST"]
    "503 ANOLANI ST"         → ["ANOLANI", "ST"]
    "2578-C PACIFIC HTS RD"  → ["PACIFIC", "HTS", "RD"]
    "2578C PACIFIC HTS RD"   → ["PACIFIC", "HTS", "RD"]
    "47-496 A AHUIMANU RD"   → ["AHUIMANU", "RD"]  (skip single-letter unit)
    z ^\d+(?:-\d+)?(?:-?[A-Z])?\s+(.+)r   r   N)r   r   r   r   r   r   r"   isalpha)r   r   r   s      r   extract_street_wordsr      s     	
4djjl6H6H6JKA
  "E!H*uQx/?/?/A!"IE E!H*uQx/?/?/AIr   c           
         t        | j                         t        j                  d       t        j                  d| j                         t        j                  d       t        j                  j                  dHi t        }|j                  d      }d}| j                  r|d| j                   z  }|j                  |       |j                         }t        j                  dt        |             |j                  d       |j                         }t        j                  d	t        |             i }|D ]4  }t        |d
         }|s|j                  |g       j!                  |       6 i }	|D ]4  }t#        |d         }
|
s|	j                  |
g       j!                  |       6 t        j                  dt        |      t        |	             t        |      ddddddd}|D ]  }|d   }|d   }t        |d         }|d   xs dj%                         }|s|s|dxx   dz  cc<   Ed}d}d }|rL||v rH||   }t'        |d       }|d   }|d   }d}|dxx   dz  cc<   t        j)                  d||||d          |s~|r{|j+                  d      d   j%                         }t-        j.                  dd|      j%                         }t1        |      }d}t-        j2                  d |t,        j4                        rd}t-        j2                  d!|      rd}|r|d"xx   dz  cc<   Cd#|d d$ v rV|j+                  d#d      }t        |      d%k(  r'|d   j%                         r|d   j%                         }n|d"xx   dz  cc<   d&|j7                         v sd'|v rGt-        j8                  d(|      }|r |j;                  d      j%                         }n|d"xx   dz  cc<   t#        |      }t=        |      }t?        |      }|r||	v r|	|   }d}d }|D ]  }t=        |d         }|st?        |      } d}!||k(  rd)}!no|rm| rkd}"tA        t        |      t        |             }#tC        |#      D ]  }$||$   | |$   k(  r|"dz  }" n |"d%k\  rd*}!n!|"dk(  r|d   }%t        |%      d+k\  r|%d,vrd-}!nd+}!|!|kD  s|!}|} |rE|d.k\  r@|d   }|d   }d/| d0}|d1xx   dz  cc<   t        j)                  d2||d d3 ||d   d d3 |       |s:|d4xx   dz  cc<   | j                  rt        j)                  d5|||r|d d6 nd       V| j                  sd|j                  d7||f       |rm|rk|j                  d8|f       |jE                         }&|&rFt        |&d
         s8|j                  d9||f       |d:xx   dz  cc<   t        j                  d;||       |jG                           |d   |d1   z   }'t        j                  d       t        j                  d<       t        j                  d=|d>          t        j                  d?|d          t        j                  d@|d1          t        j                  dA|d:          t        j                  dB|d4          t        j                  dC|d"          t        j                  dD|d          t        j                  dE|'|d>   rdF|'z  |d>   z  nd       | j                  st        j                  dG       t        j                  d       |jI                          |jI                          y )INz<============================================================zDPP Permit Match (apply=%s)T)
dictionaryz}
        SELECT id, permit_number, address_display, parcel_tmk, job_id
        FROM permit_sync
        WHERE job_id = 0
    z LIMIT zFound %d unmatched permitsa  
        SELECT c.id as customer_id, c.cust_tmk, c.address, c.city, c.zip_code,
               j.id as job_id, j.building_permit
        FROM customers c
        JOIN jobs j ON j.customer_id = c.id
        WHERE c.address IS NOT NULL AND c.address != ''
    z!Loaded %d AEI jobs with addressescust_tmkaddresszBTMK index: %d unique TMKs, Address index: %d unique street numbersr   )totalmatched_tmkmatched_addrtmk_backfilledno_matchno_datano_real_addridpermit_number
parcel_tmkaddress_displayr   r   r   c                     | d   S )Njob_id )rs    r   <lambda>zrun_matching.<locals>.<lambda>I  s
    8 r   )r   r   customer_idTMKr   z'  TMK match: %s (TMK %s) -> job %d (%s),r|   FzN^\d+\s+(PV\b|AMPS?\b|module|panel|INVERTER|TO\s+EXISTING|BATTERY|SOLAR|WITH\b)z^\d+-\d+\s*-\s*\(r   ]      zJOB CANCELLEDz***z	\]\s*(.+)
   r   r   )	THEANDr*   r(   r,   r.   r0   r2   r4   r      zADDRESS(score=)r   z2  Address match: %s (%s) -> job %d (%s) [score=%d](   r   z   No match: %s (TMK=%s, addr=%s)2   z0UPDATE permit_sync SET job_id = %s WHERE id = %sz,SELECT cust_tmk FROM customers WHERE id = %sz0UPDATE customers SET cust_tmk = %s WHERE id = %sr   z$    Backfilled TMK %s -> customer %dzMatching complete!z  Total unmatched:     %dr   z  Matched by TMK:      %dz  Matched by address:  %dz  TMK backfilled:      %dz  No match found:      %dz6  No real address:     %d (description text, TMK-only)z  No address/TMK data: %dz"  Total matched:       %d (%.1f%%)g      Y@z6  (DRY RUN - no changes written. Use --apply to write)r   )%r   r   loginfoapplymysql	connectorconnect	DB_CONFIGcursorlimitexecutefetchallr"   r&   
setdefaultappendr   r   maxdebugr   r   r!   r   r   Ir   searchr   r   r   minrangefetchonecommitclose)(argsconnr   sql	unmatchedaei_jobs	tmk_indexrownorm
addr_indexr   statsr   	permit_idpermit_namesf_tmksf_addrmatched_job_idmatched_customer_idmatch_method
candidatesbest	sf_streetis_descriptionbracket_parttmk_addrsf_numnorm_sfsf_words
best_scorebest_rownorm_aei	aei_wordsscorematchingmin_leniwordcusttotal_matcheds(                                           r   run_matchingr     sZ   $,,HHXHH*DJJ7HHX??""/Y/D[[D[)FC
 zz%%
NN3!IHH)3y>: NN  	  HHH0#h-@ I 7S_-  r*11#67 J 7#C	N3!!#r*11#67
 HHQ^S_.
 YE  SdG	(q/&'-2446g)! f	)"6*Jz'<=D!(^N"&}"5 L- A% II?!6>4	?L 'c*1-335I>INTTVI1)<I
 #Nxx 12;RTTC!%xx,i8!%n%*%in$(sA6|$)l1o.C.C.E ,Q 5 5 7I.)Q.))//"33u	7I99\9= (q 1 7 7 9I.)Q.)*95F'	2G+G4H&J.'/

% $'C0Y@H#  4X >IE (* "!i $%"%c(mS^"D!&w &A'{il: (A %	& $q=$%E%] $,A;D"4yA~$>w2w()()z)%*
#&I$'L 
a%-h%7N*2=*A'%3J<q#AL.)Q.)IIR)9Sb>>&y1#26
D *"||		<%vwws|BP ::NNB+ -B(* (d:.> ?NNJ!45 *+q0+HHC#%8: KKMgSl -(5+@@MHHXHH!"HH(%.9HH(%*>?HH(%*?@HH(%0@*ABHH(%
*;<HHEu^G\]HH(%	*:;HH1=7<W~U]"U7^31N::IJHHX
LLNJJLr   c                      t        j                  d      } | j                  ddd       | j                  dt        d d	       | j                  d
dd       | j	                         }t        |       y )Nz.DPP Permit Match - Link SF permits to AEI jobs)descriptionz--apply
store_truez&Apply matches to DB (default: dry run))actionhelpz--limitz"Limit number of permits to process)typedefaultr  z	--verbosezDebug logging)argparseArgumentParseradd_argumentint
parse_argsr  )parserr   s     r   mainr    sv    $$1abF
	,E  G
	TA  C
LODr   __main__)F)__doc__r  r   r   sysmysql.connectorr   r   	getLoggerr   r   r&   r   r   r   r   r   r   r   r  r  __name__r   r   r   <module>r"     sI  (   	 
  '		 g$N/nde%,d4:D
D4!($0;V e  )25 ;DU U	 5	 #)&	 39&	D
&:4dN zF r   