A.*, B.latitude, B.longitude FROM companybasecore AS A LEFT JOIN Geocodesportco AS B ON A.coname=B.coname AND A.datefirstinv=B.datefirstinv AND A.statecode=B.statecode;
--48001
===Firms===
This process is largely the same as for companies. I found old firms that had already been geocoded and checked for accuracy.
DROP TABLE oldfirmcoords;
CREATE TABLE oldfirmcoords (
firmname varchar(255),
latitude numeric,
longitude numeric
);
\COPY oldfirmcoords FROM 'FirmCoords.txt' DELIMITER AS E'\t' HEADER NULL AS '' CSV
--5556
DROP TABLE firmoldfilter;
CREATE TABLE firmoldfilter AS
SELECT *, CASE
WHEN longitude < -125 OR longitude > -66 OR latitude < 24 OR latitude > 50 OR latitude IS NULL OR longitude IS NULL THEN 1::int ELSE
0::int END AS excludeflag FROM oldfirmcoords;
--5556
Since oldfirmcoords does not have state codes, we have to find a way to include state codes to add in companies based in Puerto Rico, Hawaii, and Alaska. I did this by matching the firmoldfilter table back to the firm base table.
DROP TABLE firmcoordsmatch1;
CREATE TABLE firmcoordsmatch1 AS SELECT
A.firmname, A.state, B.latitude, B.longitude, COALESCE(B.excludeflag, 1) AS excludeflag FROM firmbasecore AS A LEFT JOIN firmoldfilter AS B ON A.firmname=B.firmname;
--15437
Then the process of tagging the PR, HI, and AK companies and including only correctly tagged companies is the same as for companies.
DROP TABLE firmcoordsexternal;
CREATE TABLE firmcoordsexternal AS
SELECT *, CASE WHEN state='PR' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as prflag,
CASE WHEN state='HI' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as hiflag,
CASE WHEN state='AK' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as akflag
FROM firmcoordsmatch1;
--15437
DROP TABLE goodfirmgeoold;
CREATE TABLE goodfirmgeoold AS SELECT
A.*, B.latitude, B.longitude, B.prflag, B.excludeflag, B.hiflag, B.akflag FROM firmcoreonedupremoved AS A LEFT JOIN firmcoordsexternal AS B ON A.firmname=B.firmname
WHERE excludeflag=0 or prflag=1 or hiflag=1 or akflag=1;
--5346
Find the remaining firms and run the geocode script on these firms
DROP TABLE remainingfirm;
CREATE TABLE remainingfirm AS SELECT A.firmname, A.addr1, A.addr2, A.city, A.state, A.zip FROM firmcoreonedupremoved AS A LEFT JOIN goodfirmgeoold AS B ON A.firmname=B.firmname
WHERE B.firmname IS NULL AND A.addr1 IS NOT NULL AND A.msacode!='9999';
--706
\COPY remainingfirm TO 'FirmGeoRemaining.txt' DELIMITER AS E'\t' HEADER NULL AS '' CSV
--706
DROP TABLE firmremainingcoords;
CREATE TABLE firmremainingcoords(
firmname varchar(255),
latitude numeric,
longitude numeric
);
\COPY firmremainingcoords FROM 'FirmRemainingCoords.txt' DELIMITER AS E'\t' HEADER NULL AS '' CSV
--706
Follow the same filtering process as above to get the good geocodes.
DROP TABLE firmnewfilter;
CREATE TABLE firmnewfilter AS
SELECT *, CASE
WHEN longitude < -125 OR longitude > -66 OR latitude < 24 OR latitude > 50 OR latitude IS NULL OR longitude IS NULL THEN 1::int ELSE
0::int END AS excludeflag FROM firmremainingcoords;
--706
DROP TABLE firmcoordsmatch2;
CREATE TABLE firmcoordsmatch2 AS SELECT
A.firmname, A.state, B.latitude, B.longitude, COALESCE(B.excludeflag, 1) AS excludeflag FROM firmcoreonedupremoved AS A LEFT JOIN firmnewfilter AS B ON A.firmname=B.firmname;
--15437
DROP TABLE firmcoordsexternalremaining;
CREATE TABLE firmcoordsexternalremaining AS
SELECT *, CASE WHEN state='PR' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as prflag,
CASE WHEN state='HI' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as hiflag,
CASE WHEN state='AK' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as akflag
FROM firmcoordsmatch2;
--15437
DROP TABLE goodfirmgeonew;
CREATE TABLE goodfirmgeonew AS SELECT A.*, B.latitude, B.longitude, B.prflag, B.excludeflag, B.hiflag, B.akflag FROM firmcoreonedupremoved AS A LEFT JOIN firmcoordsexternalremaining AS B
ON A.firmname=B.firmname
WHERE excludeflag=0 or prflag=1 or hiflag=1 or akflag=1;
--703
Combine the old and new geocoded firms and match them to firm base to get a firm geo table.
DROP TABLE firmgeocoords;
CREATE TABLE firmgeocoords AS
SELECT * FROM goodfirmgeonew
UNION
SELECT * FROM goodfirmgeoold;
--6049
DROP TABLE firmgeocore;
CREATE TABLE firmgeocore AS
SELECT A.*, B.latitude, B.longitude FROM firmbasecore AS A LEFT JOIN firmgeocoords AS B ON A.firmname=B.firmname;
--15437
===Branch Offices===
I did not use old branch office data because I could not find it anywhere in the old data set.
First copy all of the needed data out of the database to do geocoding.
\COPY (SELECT A.firmname, A.boaddr1, A.boaddr2, A.bocity, A.bostate, A.bozip FROM bonound AS A WHERE A.boaddr1 IS NOT NULL) TO 'BranchOffices.txt' WITH DELIMITER AS E'\t' HEADER
NULL AS '' CSV
--2046
Then load the data into the database and follow the same filtering process as above.
DROP TABLE bogeo;
CREATE TABLE bogeo (
firmname varchar(255),
latitude numeric,
longitude numeric
);
\COPY bogeo FROM 'BranchOfficesGeo.txt' WITH DELIMITER AS E'\t' HEADER NULL AS '' CSV
--2046
DROP TABLE bogeo1;
CREATE TABLE bogeo1 AS SELECT *, CASE
WHEN longitude < -125 OR longitude > -66 OR latitude < 24 OR latitude > 50 OR latitude IS NULL OR longitude IS NULL THEN 1::int ELSE
0::int END AS excludeflag FROM bogeo;
--2046
DROP TABLE bomatchgeo;
CREATE TABLE bomatchgeo AS
SELECT A.*, B.latitude, B.longitude, COALESCE(B.excludeflag, 1) AS excludeflag FROM branchofficecore AS A LEFT JOIN bogeo1 AS B ON A.firmname=B.firmname;
--10032
DROP TABLE bogeo2;
CREATE TABLE bogeo2 AS
SELECT *, CASE WHEN bostate='PR' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as prflag,
CASE WHEN bostate='HI' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as hiflag,
CASE WHEN bostate='AK' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as akflag
FROM bomatchgeo;
--10032
Match the correctly geocoded branch offices back to firm base to get the final table.
DROP TABLE bogeocore1;
CREATE TABLE bogeocore1 AS
SELECT * FROM bogeo2 WHERE excludeflag=0 or prflag=1 or hiflag=1 or akflag=1;
--1161
DROP TABLE firmbogeo;
CREATE TABLE firmbogeo AS
SELECT A.*, B.latitude AS BOLatitude, B.longitude AS BOLongitude FROM firmgeocore AS A LEFT JOIN bogeocore1 AS B ON A.firmname=B.firmname;
--15437