aboutsummaryrefslogtreecommitdiff
path: root/scripts/common/filing.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--scripts/common/filing.py11
1 files changed, 10 insertions, 1 deletions
diff --git a/scripts/common/filing.py b/scripts/common/filing.py
index 44bcabc..3c7aaf3 100644
--- a/scripts/common/filing.py
+++ b/scripts/common/filing.py
@@ -5,7 +5,7 @@ Provides upsert/record operations for raw.filing and raw.filing_source.
Form-specific child row operations live in the parsers themselves.
"""
-from scripts.common.db import execute_scalar
+from scripts.common.db import execute_all, execute_scalar
def _execute_scalar(conn, sql, params=None):
@@ -91,3 +91,12 @@ def record_raw_filing_source(raw_filing_id, ingest_run_id, source_archive, sourc
f"already mapped to raw_filing_id={existing_filing_id}, "
f"but trying to map to {raw_filing_id}"
)
+
+
+def get_seen_source_paths(source_archive):
+ """Return the set of source_path values already committed for an archive."""
+ rows = execute_all(
+ "SELECT source_path FROM raw.filing_source WHERE source_archive = %s",
+ (source_archive,),
+ )
+ return {row[0] for row in rows}