aboutsummaryrefslogtreecommitdiff
path: root/scripts/common/filing.py
diff options
context:
space:
mode:
authorbenj <benj@rse8.com>2026-05-01 09:36:21 +0800
committerbenj <benj@rse8.com>2026-05-01 09:36:21 +0800
commit850f4f826b536d913235e174dc07aef74e51bf60 (patch)
treea2806da6c0ed5c48d21178e0c6c280d5a40ccd38 /scripts/common/filing.py
parent6605e2cc428e3bdaa174ccc432941eab8c5d61cb (diff)
downloadtidyindex-850f4f826b536d913235e174dc07aef74e51bf60.tar
tidyindex-850f4f826b536d913235e174dc07aef74e51bf60.tar.gz
tidyindex-850f4f826b536d913235e174dc07aef74e51bf60.tar.bz2
tidyindex-850f4f826b536d913235e174dc07aef74e51bf60.tar.lz
tidyindex-850f4f826b536d913235e174dc07aef74e51bf60.tar.xz
tidyindex-850f4f826b536d913235e174dc07aef74e51bf60.tar.zst
tidyindex-850f4f826b536d913235e174dc07aef74e51bf60.zip
irs 990 doc prarsers and some web stuffHEADmaster
Diffstat (limited to 'scripts/common/filing.py')
-rw-r--r--scripts/common/filing.py11
1 files changed, 10 insertions, 1 deletions
diff --git a/scripts/common/filing.py b/scripts/common/filing.py
index 44bcabc..3c7aaf3 100644
--- a/scripts/common/filing.py
+++ b/scripts/common/filing.py
@@ -5,7 +5,7 @@ Provides upsert/record operations for raw.filing and raw.filing_source.
Form-specific child row operations live in the parsers themselves.
"""
-from scripts.common.db import execute_scalar
+from scripts.common.db import execute_all, execute_scalar
def _execute_scalar(conn, sql, params=None):
@@ -91,3 +91,12 @@ def record_raw_filing_source(raw_filing_id, ingest_run_id, source_archive, sourc
f"already mapped to raw_filing_id={existing_filing_id}, "
f"but trying to map to {raw_filing_id}"
)
+
+
+def get_seen_source_paths(source_archive):
+ """Return the set of source_path values already committed for an archive."""
+ rows = execute_all(
+ "SELECT source_path FROM raw.filing_source WHERE source_archive = %s",
+ (source_archive,),
+ )
+ return {row[0] for row in rows}