Changes

Jump to navigation Jump to search
6,423 bytes added ,  14:06, 7 August 2018
A.*, B.latitude, B.longitude FROM companybasecore AS A LEFT JOIN Geocodesportco AS B ON A.coname=B.coname AND A.datefirstinv=B.datefirstinv AND A.statecode=B.statecode;
--48001
 
===Firms===
This process is largely the same as for companies. I found old firms that had already been geocoded and checked for accuracy.
 
DROP TABLE oldfirmcoords;
CREATE TABLE oldfirmcoords (
firmname varchar(255),
latitude numeric,
longitude numeric
);
\COPY oldfirmcoords FROM 'FirmCoords.txt' DELIMITER AS E'\t' HEADER NULL AS '' CSV
--5556
 
DROP TABLE firmoldfilter;
CREATE TABLE firmoldfilter AS
SELECT *, CASE
WHEN longitude < -125 OR longitude > -66 OR latitude < 24 OR latitude > 50 OR latitude IS NULL OR longitude IS NULL THEN 1::int ELSE
0::int END AS excludeflag FROM oldfirmcoords;
--5556
 
Since oldfirmcoords does not have state codes, we have to find a way to include state codes to add in companies based in Puerto Rico, Hawaii, and Alaska. I did this by matching the firmoldfilter table back to the firm base table.
 
DROP TABLE firmcoordsmatch1;
CREATE TABLE firmcoordsmatch1 AS SELECT
A.firmname, A.state, B.latitude, B.longitude, COALESCE(B.excludeflag, 1) AS excludeflag FROM firmbasecore AS A LEFT JOIN firmoldfilter AS B ON A.firmname=B.firmname;
--15437
 
Then the process of tagging the PR, HI, and AK companies and including only correctly tagged companies is the same as for companies.
 
DROP TABLE firmcoordsexternal;
CREATE TABLE firmcoordsexternal AS
SELECT *, CASE WHEN state='PR' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as prflag,
CASE WHEN state='HI' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as hiflag,
CASE WHEN state='AK' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as akflag
FROM firmcoordsmatch1;
--15437
 
DROP TABLE goodfirmgeoold;
CREATE TABLE goodfirmgeoold AS SELECT
A.*, B.latitude, B.longitude, B.prflag, B.excludeflag, B.hiflag, B.akflag FROM firmcoreonedupremoved AS A LEFT JOIN firmcoordsexternal AS B ON A.firmname=B.firmname
WHERE excludeflag=0 or prflag=1 or hiflag=1 or akflag=1;
--5346
 
Find the remaining firms and run the geocode script on these firms
 
DROP TABLE remainingfirm;
CREATE TABLE remainingfirm AS SELECT A.firmname, A.addr1, A.addr2, A.city, A.state, A.zip FROM firmcoreonedupremoved AS A LEFT JOIN goodfirmgeoold AS B ON A.firmname=B.firmname
WHERE B.firmname IS NULL AND A.addr1 IS NOT NULL AND A.msacode!='9999';
--706
 
\COPY remainingfirm TO 'FirmGeoRemaining.txt' DELIMITER AS E'\t' HEADER NULL AS '' CSV
--706
 
DROP TABLE firmremainingcoords;
CREATE TABLE firmremainingcoords(
firmname varchar(255),
latitude numeric,
longitude numeric
);
 
\COPY firmremainingcoords FROM 'FirmRemainingCoords.txt' DELIMITER AS E'\t' HEADER NULL AS '' CSV
--706
 
Follow the same filtering process as above to get the good geocodes.
 
DROP TABLE firmnewfilter;
CREATE TABLE firmnewfilter AS
SELECT *, CASE
WHEN longitude < -125 OR longitude > -66 OR latitude < 24 OR latitude > 50 OR latitude IS NULL OR longitude IS NULL THEN 1::int ELSE
0::int END AS excludeflag FROM firmremainingcoords;
--706
 
DROP TABLE firmcoordsmatch2;
CREATE TABLE firmcoordsmatch2 AS SELECT
A.firmname, A.state, B.latitude, B.longitude, COALESCE(B.excludeflag, 1) AS excludeflag FROM firmcoreonedupremoved AS A LEFT JOIN firmnewfilter AS B ON A.firmname=B.firmname;
--15437
 
DROP TABLE firmcoordsexternalremaining;
CREATE TABLE firmcoordsexternalremaining AS
SELECT *, CASE WHEN state='PR' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as prflag,
CASE WHEN state='HI' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as hiflag,
CASE WHEN state='AK' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as akflag
FROM firmcoordsmatch2;
--15437
 
DROP TABLE goodfirmgeonew;
CREATE TABLE goodfirmgeonew AS SELECT A.*, B.latitude, B.longitude, B.prflag, B.excludeflag, B.hiflag, B.akflag FROM firmcoreonedupremoved AS A LEFT JOIN firmcoordsexternalremaining AS B
ON A.firmname=B.firmname
WHERE excludeflag=0 or prflag=1 or hiflag=1 or akflag=1;
--703
 
Combine the old and new geocoded firms and match them to firm base to get a firm geo table.
 
DROP TABLE firmgeocoords;
CREATE TABLE firmgeocoords AS
SELECT * FROM goodfirmgeonew
UNION
SELECT * FROM goodfirmgeoold;
--6049
 
DROP TABLE firmgeocore;
CREATE TABLE firmgeocore AS
SELECT A.*, B.latitude, B.longitude FROM firmbasecore AS A LEFT JOIN firmgeocoords AS B ON A.firmname=B.firmname;
--15437
 
===Branch Offices===
I did not use old branch office data because I could not find it anywhere in the old data set.
 
First copy all of the needed data out of the database to do geocoding.
 
\COPY (SELECT A.firmname, A.boaddr1, A.boaddr2, A.bocity, A.bostate, A.bozip FROM bonound AS A WHERE A.boaddr1 IS NOT NULL) TO 'BranchOffices.txt' WITH DELIMITER AS E'\t' HEADER
NULL AS '' CSV
--2046
 
Then load the data into the database and follow the same filtering process as above.
 
DROP TABLE bogeo;
CREATE TABLE bogeo (
firmname varchar(255),
latitude numeric,
longitude numeric
);
 
\COPY bogeo FROM 'BranchOfficesGeo.txt' WITH DELIMITER AS E'\t' HEADER NULL AS '' CSV
--2046
 
DROP TABLE bogeo1;
CREATE TABLE bogeo1 AS SELECT *, CASE
WHEN longitude < -125 OR longitude > -66 OR latitude < 24 OR latitude > 50 OR latitude IS NULL OR longitude IS NULL THEN 1::int ELSE
0::int END AS excludeflag FROM bogeo;
--2046
 
DROP TABLE bomatchgeo;
CREATE TABLE bomatchgeo AS
SELECT A.*, B.latitude, B.longitude, COALESCE(B.excludeflag, 1) AS excludeflag FROM branchofficecore AS A LEFT JOIN bogeo1 AS B ON A.firmname=B.firmname;
--10032
 
DROP TABLE bogeo2;
CREATE TABLE bogeo2 AS
SELECT *, CASE WHEN bostate='PR' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as prflag,
CASE WHEN bostate='HI' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as hiflag,
CASE WHEN bostate='AK' AND latitude!=44.9331 AND longitude!=7.54012 THEN 1::int ELSE 0::int END as akflag
FROM bomatchgeo;
--10032
 
Match the correctly geocoded branch offices back to firm base to get the final table.
 
DROP TABLE bogeocore1;
CREATE TABLE bogeocore1 AS
SELECT * FROM bogeo2 WHERE excludeflag=0 or prflag=1 or hiflag=1 or akflag=1;
--1161
 
DROP TABLE firmbogeo;
CREATE TABLE firmbogeo AS
SELECT A.*, B.latitude AS BOLatitude, B.longitude AS BOLongitude FROM firmgeocore AS A LEFT JOIN bogeocore1 AS B ON A.firmname=B.firmname;
--15437
158

edits

Navigation menu