--42093
You can see that there are many keys that do not exist in the other datasets.
==Redoing the companybase with the new SDC company data==
Since we did another pull in SDC to get the correct city and addresses. We need to update the companybasecore table which means we need to clean the new companybase. Then this will recreate the roundplus and roundlevel outputs.
SELECT COUNT(*)
FROM (SELECT DISTINCT coname, statecode, datefirstinv FROM sdccompanybase)a;
--44996
SELECT COUNT(*)
FROM (SELECT coname, statecode, datefirstinv FROM sdccompanybase)a;
--44997
DROP TABLE sdccompanybase1;
CREATE TABLE sdccompanybase1 AS
SELECT *,
CASE
WHEN nationcode = 'US' THEN 1::int
ELSE 0::int
END AS alwaysusflag,
CASE
WHEN coname = 'Undisclosed Company' THEN 1::int
ELSE 0::int
END AS undisclosedflag
FROM sdccompanybase;
--44997
SELECT COUNT(*)
FROM (SELECT coname, statecode, datefirstinv FROM sdccompanybase1 WHERE alwaysusflag = 1 AND undisclosedflag = 0)a;
--44966
SELECT COUNT(*)
FROM (SELECT DISTINCT coname, statecode, datefirstinv FROM sdccompanybase1 WHERE alwaysusflag = 1 AND undisclosedflag = 0)a;
--44966
DROP TABLE sdccompanybasecore;
CREATE TABLE sdccompanybasecore AS
SELECT DISTINCT
coname,lastupdated,foundingdate,datelastinv,datefirstinv,investedk,city,description,msa,msacode,nationcode,statecode,addr1,addr2,indcla
ss,indsubgroup,indminorgroup,url,zip
FROM sdccompanybase1 WHERE nationcode = 'US' AND undisclosedflag=0;
--44966
SELECT COUNT(*)
FROM (SELECT coname, statecode, datefirstinv FROM sdccompanybasecore)a;
SELECT COUNT(*)
FROM (SELECT DISTINCT coname, statecode, datefirstinv FROM sdccompanybasecore)a;
DROP TABLE roundplus;
CREATE TABLE roundplus AS
SELECT roundcore.*, c.city, seedflag, earlyflag, laterflag, growthflag, transactionflag, excludeflag,
CASE WHEN roundcore.datefirstinv=roundcore.rounddate THEN 1::int ELSE 0::int END as dealflag,
CASE WHEN SELFlagbase.coname IS NOT NULL THEN 1::int ELSE 0::int END AS hadgrowthvc,
extract(year from roundcore.rounddate) as roundyear,
CASE WHEN rndamtdisck IS NOT NULL THEN rndamtdisck/1000 WHEN rndamtdisck IS NULL AND rndamtestk IS NOT NULL THEN rndamtestk/1000 ELSE
NULL::real END as roundamtm
FROM roundcore
LEFT JOIN SelFlagBase ON SelFlagBase.coname=roundcore.coname AND SelFlagBase.statecode=roundcore.statecode AND
SelFlagBase.datefirstinv=roundcore.datefirstinv
LEFT JOIN stageflags ON stageflags.coname=roundcore.coname AND stageflags.statecode=roundcore.statecode AND
stageflags.datefirstinv=roundcore.datefirstinv AND stageflags.rounddate=roundcore.rounddate
LEFT JOIN sdccompanybasecore AS c ON c.coname = roundcore.coname AND c.statecode = roundcore.statecode AND c.datefirstinv =
roundcore.datefirstinv;
--142999
DROP TABLE roundleveloutput;
CREATE TABLE roundleveloutput AS
SELECT city, statecode, roundyear as year,
sum(roundamtm*seedflag) AS seedamtm,
sum(roundamtm*earlyflag) AS earlyamtm,
sum(roundamtm*laterflag) AS lateramtm,
sum(roundamtm*growthflag) AS selamtm,
sum(seedflag) AS numseeds,
sum(earlyflag) AS numearly,
sum(laterflag) AS numlater,
sum(growthflag) AS numsel,
sum(dealflag) AS numdeals
FROM roundplus WHERE hadgrowthvc=1 GROUP BY city, statecode, roundyear ORDER BY city, statecode, roundyear;
--22374
DROP TABLE roundleveloutput2;
CREATE TABLE roundleveloutput2 AS
SELECT roundleveloutput.*, numalive
FROM roundleveloutput
LEFT JOIN alivecount ON alivecount.city=roundleveloutput.city AND alivecount.statecode=roundleveloutput.statecode AND
alivecount.year=roundleveloutput.year;
--22374
\COPY roundleveloutput2 TO 'roundleveloutput2.txt' WITH DELIMITER AS E'\t' HEADER NULL AS '' CSV
==Cleaning roundline==