Changes
Jump to navigation
Jump to search
*Chairman
*CEO (Chief Exec. Officer)
*CFO
*COO
*CIO/CTO?
*On the Board:
**Board Member (may inc. VCs, etc.)
**Director (may inc. VCs, etc.)
**All the C's
**Exec Officer (may inc. VCs, etc.)
*Senior position:
**President ??? Fast look at co's that have presidents, do they also have C's?
**Vice-president
no edit summary
=The Research=
*SQL info can also be found on researcher@128.42.44.181 in the Women folder. Called "women.sql".
==creating basic tables==
DROP TABLE companyreal;
CREATE TABLE companyreal(
datefounded date,
companyname varchar(100),
prefix varchar(5),
firstname varchar(100),
lastname varchar(100),
jobtitle varchar(100),
companystate varchar(100),
companycity varchar(100),
industrygroup varchar(100),
amtinvestedthous float,
rounds int
);
\COPY companyreal FROM 'womencompanieslast-normal.txt' WITH DELIMITER AS E'\t' HEADER NULL AS '' CSV
--129536
DROP TABLE fundreal;
CREATE TABLE fundreal(
datefounded date,
fundname varchar(100),
prefix varchar(5),
firstname varchar(100),
lastname varchar(100),
jobtitle varchar(100),
fundstate varchar(100),
fundcity varchar(100),
fundtargetsizemil float,
firmname varchar(100)
);
\COPY fundreal FROM 'womenfundslast-normal.txt' WITH DELIMITER AS E'\t' HEADER NULL AS '' CSV
--156005
==adding variable where if person is woman, adds value of 1 to their name==
DROP TABLE manwomanfund;
CREATE TABLE manwomanfund AS
SELECT prefix, lastname, datefounded, jobtitle, fundname, fundcity, fundstate,
removeinitial(firstname) as firstname,
CASE WHEN prefix='Ms' THEN 1::int
WHEN prefix='Mr' THEN 0::int
ELSE Null::int END AS womanman FROM fundreal;
--156005
DROP TABLE manwomancompany;
CREATE TABLE manwomancompany AS
SELECT prefix, lastname, datefounded, jobtitle, companyname, companycity, companystate, industrygroup,
removeinitial(firstname) as firstname, amtinvestedthous, rounds,
CASE WHEN prefix='Ms' THEN 1::int
WHEN prefix='Mr' THEN 0::int
ELSE Null::int END AS womanman FROM companyreal;
--129536
==for people without a prefix, trying to assign gender variable to them based on list of common names==
DROP TABLE manwomanfund2;
CREATE TABLE manwomanfund2 AS
SELECT datefounded, prefix, firstname, lastname, fundname, jobtitle, womanman AS origwomanman, mf,
fundcity, fundstate,
CASE WHEN womanman IS NULL AND mf IS NOT NULL THEN mf
WHEN womanman IS NOT NULL THEN womanman
ELSE NULL::int END AS womanman
FROM manwomanfund LEFT JOIN commonnamesclean ON firstname = name
WHERE firstname IS NOT NULL;
--152819
DROP TABLE manwomancompany2;
CREATE TABLE manwomancompany2 AS
SELECT datefounded, prefix, firstname, lastname, companyname, jobtitle, womanman AS origwomanman, mf,
companycity, companystate, industrygroup, rounds, amtinvestedthous,
CASE WHEN womanman IS NULL AND mf IS NOT NULL THEN mf
WHEN womanman IS NOT NULL THEN womanman
ELSE NULL::int END AS womanman
FROM manwomancompany LEFT JOIN commonnamesclean ON lower(firstname) = lower(name)
WHERE firstname IS NOT NULL;
--123278
--%, state, city, round no
==me playing around, these tables not relevant==
DROP TABLE fundWomenCEOs;
CREATE TABLE fundwomentot AS
SELECT EXTRACT(YEAR FROM datefounded), AVG(womanman) AS PercentWomen
FROM manwomanfund2 GROUP BY EXTRACT(YEAR FROM datefounded);
--37
DROP TABLE companyWomen;
CREATE TABLE companywomentot AS
SELECT EXTRACT(YEAR FROM datefounded), AVG(womanman) AS PercentWomen
FROM manwomancompany2 GROUP BY EXTRACT(YEAR FROM datefounded);
--36
DROP TABLE womencompanystate;
CREATE TABLE womencompanystate AS
SELECT companystate, AVG(womanman) AS WomenPerState
FROM manwomancompany2 GROUP BY companystate;
--51
DROP TABLE womenfundstate;
CREATE TABLE womencfundstate AS
SELECT fundstate, AVG(womanman) AS WomenPerState
FROM manwomanfund2 GROUP BY fundstate;
--51
DROP TABLE womencompanycity;
CREATE TABLE womencompanycity AS
SELECT companycity, AVG(womanman) AS WomenPerCity
FROM manwomancompany2 GROUP BY companycity;
==assigning variable to titles==
DROP TABLE titles01;
CREATE TABLE titles01(
title varchar(100),
chairman int,
CEO int,
CFO int,
COO int,
CIO int,
CTO int,
boardmember int,
president int,
vicepresident int,
founder int,
director int
);
\COPY titles01 FROM 'impotitleslast.txt' WITH DELIMITER AS E'\t' HEADER NULL AS '' CSV
DROP TABLE title02;
==massive conglomeration table==
DROP TABLE ManWomanCompany3;
CREATE TABLE ManWomanCompany3 AS
SELECT Manwomancompany2.*,
CASE WHEN chairman IS NOT NULL THEN 1::int ELSE 0::int END AS chairman,
CASE WHEN CEO IS NOT NULL THEN 1::int ELSE 0::int END AS CEO,
CASE WHEN CFO IS NOT NULL THEN 1::int ELSE 0::int END AS CFO,
CASE WHEN COO IS NOT NULL THEN 1::int ELSE 0::int END AS COO,
CASE WHEN CIO IS NOT NULL THEN 1::int ELSE 0::int END AS CIO,
CASE WHEN CTO IS NOT NULL THEN 1::int ELSE 0::int END AS CTO,
CASE WHEN boardmember IS NOT NULL THEN 1::int ELSE 0::int END AS boardmember,
CASE WHEN president IS NOT NULL THEN 1::int ELSE 0::int END AS president,
CASE WHEN vicepresident IS NOT NULL THEN 1::int ELSE 0::int END AS vicepresident,
CASE WHEN founder IS NOT NULL THEN 1::int ELSE 0::int END AS founder,
CASE WHEN director IS NOT NULL THEN 1::int ELSE 0::int END AS director
FROM ManwomanCompany2
LEFT JOIN titles01 ON manwomancompany2.jobtitle=titles01.title;
==Example output data==
*Data on likelihood of follow-on round
==Variables and tablesstill to do:== Table Entrep2:*Prefix*FirstName*Title Todo:*Rename #this table to something meaningful! - DONE*Get distinct prefixes data still discounts a (Mr, Mrs, Drsmall) - DONE*Get distinct titles**Choose the titles! - DONE**Either make a lookup table or find the patterns**Build a new variable to add the percentage of data (doctors who didn'clean title'*Create t have a man/women variable (0 if man, 1 if womencommon name) - DONE**Load up your census names - DONE**Find the distinct man/women names -DONE. built SQL table w name plus 0#have data organized,1 value.**Write an SQL query now we need to figure out how to JOIN a man/woman variable when title = DR*Build the output table extract it in SQL CREATE TABLE WomenCEOs AS SELECT year, SUM(manwoman)/COUNT(manwoman) AS PercentWomenCEOs FROM tablename WHERE cleantitle='CEO' GROUP BY Year; Table Entrep1:*Prefix*FirstName*Job Title Todo:*Same as above, but different lookups meaningful terms for job titlethe report. ===Key Titles=== On the company side:
=The Paper=