aboutsummaryrefslogtreecommitdiff
path: root/scripts/common/__init__.py
diff options
context:
space:
mode:
authorbenj <benj@rse8.com>2026-04-10 11:13:57 +0800
committerbenj <benj@rse8.com>2026-04-10 11:13:57 +0800
commit6605e2cc428e3bdaa174ccc432941eab8c5d61cb (patch)
tree52f9d176c2ce1a80adb2ea2ac31cd12d3a29c0db /scripts/common/__init__.py
parent493746b14c1251a45b061d2e3edd9160c929d2b9 (diff)
downloadtidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar
tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar.gz
tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar.bz2
tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar.lz
tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar.xz
tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar.zst
tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.zip
ensure parsers do not parse and store raw XML fields
Diffstat (limited to 'scripts/common/__init__.py')
-rw-r--r--scripts/common/__init__.py28
1 files changed, 28 insertions, 0 deletions
diff --git a/scripts/common/__init__.py b/scripts/common/__init__.py
new file mode 100644
index 0000000..f06ee26
--- /dev/null
+++ b/scripts/common/__init__.py
@@ -0,0 +1,28 @@
+"""
+Shared infrastructure for the 990 data pipeline (v2).
+
+This package provides the single authoritative implementation of
+normalization, XML helpers, DB access, and ingest tracking for
+all parsers under scripts/parse/, scripts/fetch/, and scripts/extract/.
+
+Old parsers in scripts/ still use scripts/parse_common.py directly.
+"""
+
+import zipfile_deflate64 # noqa: F401
+
+from scripts.common.db import (
+ execute, execute_scalar, execute_all, execute_transaction, copy_rows,
+ # Legacy (shell-based, for old parsers)
+ psql, psql_scalar, psql_query_values, insert_rows,
+)
+from scripts.common.normalize import normalize_ein, parse_numeric, map_form_type, is_placeholder
+from scripts.common.xml import (
+ text, strip_ns, leaf_paths, extract_filing_metadata,
+ derive_source_document_id,
+)
+from scripts.common.ingest import (
+ start_ingest_run, finish_ingest_run, fail_ingest_run, log_ingest_error,
+)
+from scripts.common.filing import (
+ upsert_raw_filing, record_raw_filing_source,
+)