All the cleaned cities for U.S. patents are stored in ptoassigneend_us_citycleaned. (# 3572605)
====Functions -- Simplified to Simplify SQL Code==== ===== Extraction ===== * Extract Postcode SQL function: CREATE OR REPLACE FUNCTION ExtractPostcode(adr text) RETURNS text AS $$ BEGIN RETURN SUBSTRING(adr, '\d{5}[-]\d{4}'); END; $$ LANGUAGE plpgsql; * Extract State SQL function: CREATE OR REPLACE FUNCTION ExtractState(adr text) RETURNS text AS $$ SELECT CASE WHEN adr ~* '([,]|[.])\s\w{2,}\s{0,}\w{0,}\s{1,}\d{5}[-]\d{4}' THEN LTRIM(RTRIM(replace(regexp_replace(SUBSTRING(adr, '[,]\s\w{2,}\s{0,}\w{0,}\s{1,}\d{5}[-]\d{4}'), '\d{5}-\d{4}', ''),',',''))) WHEN adr ~* '(^|\s)\w{2}\s{1}\d{5}[-]\d{4}' THEN regexp_replace(SUBSTRING(adr, '\w{2}\s{1,}\d{5}[-]\d{4}'), '\d{5}-\d{4}', '') WHEN adr ~* 'D[.]C[.]\s\d{5}-\d{4}' THEN 'D.C.' ELSE NULL END AS result; $$ LANGUAGE SQL; * Extract City SQL function: CREATE OR REPLACE FUNCTION ExtractCity(adr text) RETURNS text AS $$ SELECT CASE WHEN adr ~* '\s{2,}\w{1,}\s{0,}\w{0,}\s{0,}[,]\s{0,}\w{1,}\s{0,}\w{0,}\s{0,}\d{5}[-]\d{4}' THEN LTRIM(RTRIM(REPLACE(SUBSTRING(SUBSTRING(adr, '\s{2,}\w{1,}\s{0,}\w{0,}\s{0,}[,]\s{0,}\w{1,}\s{0,}\w{0,}\s{0,}\d{5}[-]\d{4}'),'.*[,]'),',',''))) WHEN adr ~* '[,]\s{0,}\w{1,}\s{0,}\w{0,}\s{0,}[,]\s{0,}\w{1,}\s{0,}\w{0,}\s{0,}\d{5}[-]\d{4}' THEN LTRIM(RTRIM(REPLACE(SUBSTRING(SUBSTRING(adr, '[,]\s{0,}\w{1,}\s{0,}\w{0,}\s{0,}[,]\s{0,}\w{1,}\s{0,}\w{0,}\s{0,}\d{5}[-]\d{4}'),'[,].*[,]'),',',''))) WHEN adr ~* '^\w{1,}\s{0,}\w{0,}\s{0,}[,]\s{0,}\w{1,}\s{0,}\w{0,}\s{0,}\d{5}[-]\d{4}' THEN REPLACE(SUBSTRING(SUBSTRING(adr, '^\w{1,}\s{0,}\w{0,}\s{0,}[,]\s{0,}\w{1,}\s{0,}\w{0,}\s{0,}\d{5}[-]\d{4}'), '.*[,]'),',', '') WHEN adr ~* '^\w{1,}\s{0,}\w{0,}\s{0,}\w{2}\s{0,}\d{5}[-]\d{4}' THEN regexp_replace(SUBSTRING(adr, '^\w{1,}\s{0,1}\w{0,}\s{1,}\w{2}\s{0,}\d{5}[-]\d{4}'), '\w{2}\s\d{5}[-]\d{4}', '') WHEN adr ~* 'BATON[,] ROUGE[,]\s{1}LA' THEN 'BATON ROUGE' WHEN adr ~* 'ST[.]\s{1}PAUL' THEN 'ST. PAUL' WHEN adr ~* 'ARLINGTON[,]\s{1}VA' THEN 'ARLINGTON' ELSE NULL END AS result; $$ LANGUAGE SQL;
====Issues====