diff options
| author | benj <benj@rse8.com> | 2026-05-01 09:36:21 +0800 |
|---|---|---|
| committer | benj <benj@rse8.com> | 2026-05-01 09:36:21 +0800 |
| commit | 850f4f826b536d913235e174dc07aef74e51bf60 (patch) | |
| tree | a2806da6c0ed5c48d21178e0c6c280d5a40ccd38 /scripts/common | |
| parent | 6605e2cc428e3bdaa174ccc432941eab8c5d61cb (diff) | |
| download | tidyindex-master.tar tidyindex-master.tar.gz tidyindex-master.tar.bz2 tidyindex-master.tar.lz tidyindex-master.tar.xz tidyindex-master.tar.zst tidyindex-master.zip | |
Diffstat (limited to '')
| -rw-r--r-- | scripts/common/filing.py | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/scripts/common/filing.py b/scripts/common/filing.py index 44bcabc..3c7aaf3 100644 --- a/scripts/common/filing.py +++ b/scripts/common/filing.py @@ -5,7 +5,7 @@ Provides upsert/record operations for raw.filing and raw.filing_source. Form-specific child row operations live in the parsers themselves. """ -from scripts.common.db import execute_scalar +from scripts.common.db import execute_all, execute_scalar def _execute_scalar(conn, sql, params=None): @@ -91,3 +91,12 @@ def record_raw_filing_source(raw_filing_id, ingest_run_id, source_archive, sourc f"already mapped to raw_filing_id={existing_filing_id}, " f"but trying to map to {raw_filing_id}" ) + + +def get_seen_source_paths(source_archive): + """Return the set of source_path values already committed for an archive.""" + rows = execute_all( + "SELECT source_path FROM raw.filing_source WHERE source_archive = %s", + (source_archive,), + ) + return {row[0] for row in rows} |
