1,956 bytes added
, 16:33, 14 June 2016
Citations
To Extract Patents with Numbers Only and to Ignore Other RegExes
CREATE OR REPLACE FUNCTION cleanpatno (text) RETURNS bigint AS $$
if ($_[0]) {
my $var=$_[0];
if ($var=~/^\d*$/) {return $var;}
return undef;
}
return undef;
$$ LANGUAGE plperl;
************ Columns *********************
patentdata:
Column | Type | Modifiers
-------------+-------------------+-----------
patent | integer |
cit_date | date |
cit_name | character varying |
cit_kind | character varying |
cit_country | character varying |
citation | integer |
category | character varying |
citseq | integer |
SELECT patent as citingpatentnumber, citation AS citedpatentnumber
INTO citations_merged
FROM citations;
--SELECT 38452957
patent_2015:
Column | Type | Modifiers
---------------------+---------+-----------
citingpatentnumber | integer |
citingpatentcountry | text |
citedpatentnumber | text |
citedpatentcountry | text |
SELECT CAST(citingpatentnumber AS bigint), CAST(cleanpatno( citedpatentnumber) AS bigint) as citedpatentnumber
INTO citations_merged
FROM citations;
-- RESULT : SELECT 59227881
FINAL TABLE:
CREATE TABLE citations
(
citingpatentnumber bigint,
citedpatentnumber bigint
);
Copy Statements:
patentdata:
\COPY citations_merged TO '/tmp/merged_citations_export1.txt' DELIMITER AS E'\t' HEADER NULL AS '' CSV;
--COPY 38452957
patent_2015:
\COPY citations_merged TO '/tmp/merged_citations_export.txt' DELIMITER AS E'\t' HEADER NULL AS '' CSV;
-- RESULT : COPY 59227881
allpatent:
\COPY citations FROM '/tmp/merged_citations_export.txt' DELIMITER AS E'\t' HEADER NULL AS '' CSV;
--RESULT : COPY 59227881
\COPY citations FROM '/tmp/merged_citations_export1.txt' DELIMITER AS E'\t' HEADER NULL AS '' CSV;
--RESULT: COPY 38452957
CLONING:
CREATE DATABASE allpatentsProcessed WITH TEMPLATE allpatent OWNER researcher;