1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
"""
Seed raw-layer reference tables with initial values.
Idempotent — safe to run repeatedly. Uses ON CONFLICT DO UPDATE
so display_name/notes stay current if you change them here.
Usage: python scripts/seed.py
"""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from scripts.common.db import execute
def seed_source_system():
execute("""
INSERT INTO raw.source_system (code, display_name, url, notes) VALUES
('irs_xml', 'IRS Bulk XML E-Files', 'https://www.irs.gov/charities-non-profits/form-990-series-downloads', NULL),
('irs_pdf', 'IRS PDF Filings', NULL, NULL),
('irs_bmf', 'IRS EO BMF Extract', 'https://www.irs.gov/charities-non-profits/exempt-organizations-business-master-file-extract-eo-bmf', NULL),
('ny_ag', 'New York Attorney General', 'https://www.charitiesnys.com/RegistrySearch/search_charities.jsp', NULL),
('ca_ag', 'California Attorney General', 'https://rct.doj.ca.gov/Verification/Web/Search.aspx', NULL),
('fl_ag', 'Florida Charities', NULL, NULL),
('tx_ag', 'Texas Secretary of State', NULL, NULL),
('nj_ag', 'New Jersey Charities', NULL, NULL),
('il_ag', 'Illinois Attorney General', NULL, NULL),
('pa_ag', 'Pennsylvania Charities', NULL, NULL),
('tn_ag', 'Tennessee Secretary of State', NULL, NULL)
ON CONFLICT (code) DO UPDATE SET
display_name = EXCLUDED.display_name,
url = EXCLUDED.url;
""")
def seed_form_type():
execute("""
INSERT INTO raw.form_type (code, display_name, notes) VALUES
('990', 'Form 990', NULL),
('990PF', 'Form 990-PF', NULL),
('990EZ', 'Form 990-EZ', NULL),
('990O', 'Form 990-O', NULL),
('990T', 'Form 990-T', NULL),
('990A', 'Form 990 (Amended)', NULL),
('990PA', 'Form 990-PF (Amended)', NULL),
('990EA', 'Form 990-EZ (Amended)', NULL)
ON CONFLICT (code) DO UPDATE SET
display_name = EXCLUDED.display_name;
""")
def seed_grant_detail_status():
execute("""
INSERT INTO raw.grant_detail_status (code, display_name, notes) VALUES
('complete', 'Complete', 'All grants are in raw grant rows'),
('see_attached', 'See Attached', 'Filing says SEE ATTACHED for grant detail'),
('schedule_o', 'Schedule O', 'Grant detail is in Schedule O narrative'),
('placeholder_only', 'Placeholder Only', 'Only placeholder rows (VARIOUS, SEE STATEMENT, etc.)'),
('no_grants', 'No Grants', 'Filing has no grant schedule or zero grants reported'),
('unresolved', 'Unresolved', 'Completeness not yet determined'),
('supplemented', 'Supplemented', 'Detail recovered from supplemental source (PDF, state AG)')
ON CONFLICT (code) DO UPDATE SET
display_name = EXCLUDED.display_name,
notes = EXCLUDED.notes;
""")
def main():
print("Seeding reference tables...")
seed_source_system()
print(" source_system: done")
seed_form_type()
print(" form_type: done")
seed_grant_detail_status()
print(" grant_detail_status: done")
print("All reference tables seeded.")
if __name__ == "__main__":
main()
|