Patent Data Processing - SQL Steps
Citations
To Extract Patents with Numbers Only and to Ignore Other RegExes CREATE OR REPLACE FUNCTION cleanpatno (text) RETURNS bigint AS $$ if ($_[0]) { my $var=$_[0]; if ($var=~/^\d*$/) {return $var;} return undef; } return undef; $$ LANGUAGE plperl;
- Columns *********************
patentdata:
Column | Type | Modifiers
+-------------------+-----------
patent | integer | cit_date | date | cit_name | character varying | cit_kind | character varying | cit_country | character varying | citation | integer | category | character varying | citseq | integer |
SELECT patent as citingpatentnumber, citation AS citedpatentnumber INTO citations_merged FROM citations; --SELECT 38452957
patent_2015:
Column | Type | Modifiers
+---------+-----------
citingpatentnumber | integer | citingpatentcountry | text | citedpatentnumber | text | citedpatentcountry | text |
SELECT CAST(citingpatentnumber AS bigint), CAST(cleanpatno( citedpatentnumber) AS bigint) as citedpatentnumber INTO citations_merged FROM citations; -- RESULT : SELECT 59227881
FINAL TABLE:
CREATE TABLE citations ( citingpatentnumber bigint, citedpatentnumber bigint );
Copy Statements:
patentdata: \COPY citations_merged TO '/tmp/merged_citations_export1.txt' DELIMITER AS E'\t' HEADER NULL AS CSV; --COPY 38452957
patent_2015: \COPY citations_merged TO '/tmp/merged_citations_export.txt' DELIMITER AS E'\t' HEADER NULL AS CSV; -- RESULT : COPY 59227881
allpatent: \COPY citations FROM '/tmp/merged_citations_export.txt' DELIMITER AS E'\t' HEADER NULL AS CSV; --RESULT : COPY 59227881
\COPY citations FROM '/tmp/merged_citations_export1.txt' DELIMITER AS E'\t' HEADER NULL AS CSV; --RESULT: COPY 38452957
CLONING:
CREATE DATABASE allpatentsProcessed WITH TEMPLATE allpatent OWNER researcher;