aboutsummaryrefslogtreecommitdiff
path: root/scripts/seed.py
diff options
context:
space:
mode:
authorbenj <benj@rse8.com>2026-04-10 11:13:57 +0800
committerbenj <benj@rse8.com>2026-04-10 11:13:57 +0800
commit6605e2cc428e3bdaa174ccc432941eab8c5d61cb (patch)
tree52f9d176c2ce1a80adb2ea2ac31cd12d3a29c0db /scripts/seed.py
parent493746b14c1251a45b061d2e3edd9160c929d2b9 (diff)
downloadtidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar
tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar.gz
tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar.bz2
tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar.lz
tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar.xz
tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar.zst
tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.zip
ensure parsers do not parse and store raw XML fields
Diffstat (limited to 'scripts/seed.py')
-rw-r--r--scripts/seed.py81
1 files changed, 81 insertions, 0 deletions
diff --git a/scripts/seed.py b/scripts/seed.py
new file mode 100644
index 0000000..72c12bd
--- /dev/null
+++ b/scripts/seed.py
@@ -0,0 +1,81 @@
+"""
+Seed raw-layer reference tables with initial values.
+
+Idempotent — safe to run repeatedly. Uses ON CONFLICT DO UPDATE
+so display_name/notes stay current if you change them here.
+
+Usage: python scripts/seed.py
+"""
+
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+from scripts.common.db import execute
+
+
+def seed_source_system():
+ execute("""
+ INSERT INTO raw.source_system (code, display_name, url, notes) VALUES
+ ('irs_xml', 'IRS Bulk XML E-Files', 'https://www.irs.gov/charities-non-profits/form-990-series-downloads', NULL),
+ ('irs_pdf', 'IRS PDF Filings', NULL, NULL),
+ ('irs_bmf', 'IRS EO BMF Extract', 'https://www.irs.gov/charities-non-profits/exempt-organizations-business-master-file-extract-eo-bmf', NULL),
+ ('ny_ag', 'New York Attorney General', 'https://www.charitiesnys.com/RegistrySearch/search_charities.jsp', NULL),
+ ('ca_ag', 'California Attorney General', 'https://rct.doj.ca.gov/Verification/Web/Search.aspx', NULL),
+ ('fl_ag', 'Florida Charities', NULL, NULL),
+ ('tx_ag', 'Texas Secretary of State', NULL, NULL),
+ ('nj_ag', 'New Jersey Charities', NULL, NULL),
+ ('il_ag', 'Illinois Attorney General', NULL, NULL),
+ ('pa_ag', 'Pennsylvania Charities', NULL, NULL),
+ ('tn_ag', 'Tennessee Secretary of State', NULL, NULL)
+ ON CONFLICT (code) DO UPDATE SET
+ display_name = EXCLUDED.display_name,
+ url = EXCLUDED.url;
+ """)
+
+
+def seed_form_type():
+ execute("""
+ INSERT INTO raw.form_type (code, display_name, notes) VALUES
+ ('990', 'Form 990', NULL),
+ ('990PF', 'Form 990-PF', NULL),
+ ('990EZ', 'Form 990-EZ', NULL),
+ ('990O', 'Form 990-O', NULL),
+ ('990T', 'Form 990-T', NULL),
+ ('990A', 'Form 990 (Amended)', NULL),
+ ('990PA', 'Form 990-PF (Amended)', NULL),
+ ('990EA', 'Form 990-EZ (Amended)', NULL)
+ ON CONFLICT (code) DO UPDATE SET
+ display_name = EXCLUDED.display_name;
+ """)
+
+
+def seed_grant_detail_status():
+ execute("""
+ INSERT INTO raw.grant_detail_status (code, display_name, notes) VALUES
+ ('complete', 'Complete', 'All grants are in raw grant rows'),
+ ('see_attached', 'See Attached', 'Filing says SEE ATTACHED for grant detail'),
+ ('schedule_o', 'Schedule O', 'Grant detail is in Schedule O narrative'),
+ ('placeholder_only', 'Placeholder Only', 'Only placeholder rows (VARIOUS, SEE STATEMENT, etc.)'),
+ ('no_grants', 'No Grants', 'Filing has no grant schedule or zero grants reported'),
+ ('unresolved', 'Unresolved', 'Completeness not yet determined'),
+ ('supplemented', 'Supplemented', 'Detail recovered from supplemental source (PDF, state AG)')
+ ON CONFLICT (code) DO UPDATE SET
+ display_name = EXCLUDED.display_name,
+ notes = EXCLUDED.notes;
+ """)
+
+
+def main():
+ print("Seeding reference tables...")
+ seed_source_system()
+ print(" source_system: done")
+ seed_form_type()
+ print(" form_type: done")
+ seed_grant_detail_status()
+ print(" grant_detail_status: done")
+ print("All reference tables seeded.")
+
+
+if __name__ == "__main__":
+ main()