diff options
| author | benj <benj@rse8.com> | 2026-04-10 11:13:57 +0800 |
|---|---|---|
| committer | benj <benj@rse8.com> | 2026-04-10 11:13:57 +0800 |
| commit | 6605e2cc428e3bdaa174ccc432941eab8c5d61cb (patch) | |
| tree | 52f9d176c2ce1a80adb2ea2ac31cd12d3a29c0db /scripts/common/__init__.py | |
| parent | 493746b14c1251a45b061d2e3edd9160c929d2b9 (diff) | |
| download | tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar.gz tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar.bz2 tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar.lz tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar.xz tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.tar.zst tidyindex-6605e2cc428e3bdaa174ccc432941eab8c5d61cb.zip | |
ensure parsers do not parse and store raw XML fields
Diffstat (limited to 'scripts/common/__init__.py')
| -rw-r--r-- | scripts/common/__init__.py | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/scripts/common/__init__.py b/scripts/common/__init__.py new file mode 100644 index 0000000..f06ee26 --- /dev/null +++ b/scripts/common/__init__.py @@ -0,0 +1,28 @@ +""" +Shared infrastructure for the 990 data pipeline (v2). + +This package provides the single authoritative implementation of +normalization, XML helpers, DB access, and ingest tracking for +all parsers under scripts/parse/, scripts/fetch/, and scripts/extract/. + +Old parsers in scripts/ still use scripts/parse_common.py directly. +""" + +import zipfile_deflate64 # noqa: F401 + +from scripts.common.db import ( + execute, execute_scalar, execute_all, execute_transaction, copy_rows, + # Legacy (shell-based, for old parsers) + psql, psql_scalar, psql_query_values, insert_rows, +) +from scripts.common.normalize import normalize_ein, parse_numeric, map_form_type, is_placeholder +from scripts.common.xml import ( + text, strip_ns, leaf_paths, extract_filing_metadata, + derive_source_document_id, +) +from scripts.common.ingest import ( + start_ingest_run, finish_ingest_run, fail_ingest_run, log_ingest_error, +) +from scripts.common.filing import ( + upsert_raw_filing, record_raw_filing_source, +) |
