Add Disaggregated COT data support (2019–2026)
Integrates the CFTC Disaggregated Commitments of Traders reports (com_disagg_txt_YYYY.zip) which break positions down by Producer/Merchant, Swap Dealers, Managed Money, and Other Reportables — a different report type from the existing legacy COT data. - schema.sql: add disagg_reports, disagg_positions, disagg_concentration tables - parser.py: add DisaggPositionRow/DisaggBlock dataclasses and parse_disagg_csv_text()/parse_disagg_zip_file() for c_year.txt format - importer.py: add import_disagg_block(), import_disagg_zip_file(), run_disagg_historical_import() for 2019–2026 yearly ZIPs - cli.py: add import-disagg-history subcommand - docker-compose.yaml: run import-disagg-history on startup (idempotent via import_log, so re-deploys skip already-imported years) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
495e0c9314
commit
2c28ac3b0a
@ -63,6 +63,13 @@ def cmd_import_zip(args):
|
|||||||
print(f"{result.rows_inserted} inserted, {result.rows_skipped} skipped")
|
print(f"{result.rows_inserted} inserted, {result.rows_skipped} skipped")
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_import_disagg_history(args):
|
||||||
|
from app.ingestion.importer import run_disagg_historical_import
|
||||||
|
print(f"Importing Disaggregated COT data {args.start_year}–{args.end_year}...")
|
||||||
|
run_disagg_historical_import(start_year=args.start_year, end_year=args.end_year, verbose=True)
|
||||||
|
print("Done.")
|
||||||
|
|
||||||
|
|
||||||
def cmd_download_and_import(args):
|
def cmd_download_and_import(args):
|
||||||
from app.ingestion.importer import download_and_import
|
from app.ingestion.importer import download_and_import
|
||||||
result = download_and_import()
|
result = download_and_import()
|
||||||
@ -123,6 +130,11 @@ def main():
|
|||||||
p = sub.add_parser("import-zip", help="Import a single ZIP file")
|
p = sub.add_parser("import-zip", help="Import a single ZIP file")
|
||||||
p.add_argument("file")
|
p.add_argument("file")
|
||||||
|
|
||||||
|
p = sub.add_parser("import-disagg-history",
|
||||||
|
help="Download and import Disaggregated COT yearly ZIPs (2019–2026)")
|
||||||
|
p.add_argument("--start-year", type=int, default=2019)
|
||||||
|
p.add_argument("--end-year", type=int, default=2026)
|
||||||
|
|
||||||
sub.add_parser("download-and-import", help="Download latest weekly report and import it")
|
sub.add_parser("download-and-import", help="Download latest weekly report and import it")
|
||||||
sub.add_parser("status", help="Show database statistics")
|
sub.add_parser("status", help="Show database statistics")
|
||||||
|
|
||||||
@ -133,6 +145,7 @@ def main():
|
|||||||
"import-history": cmd_import_history,
|
"import-history": cmd_import_history,
|
||||||
"import-html": cmd_import_html,
|
"import-html": cmd_import_html,
|
||||||
"import-zip": cmd_import_zip,
|
"import-zip": cmd_import_zip,
|
||||||
|
"import-disagg-history": cmd_import_disagg_history,
|
||||||
"download-and-import": cmd_download_and_import,
|
"download-and-import": cmd_download_and_import,
|
||||||
"status": cmd_status,
|
"status": cmd_status,
|
||||||
}
|
}
|
||||||
|
|||||||
@ -18,6 +18,7 @@ from app.ingestion.parser import (
|
|||||||
CommodityBlock,
|
CommodityBlock,
|
||||||
parse_html_file,
|
parse_html_file,
|
||||||
parse_zip_file,
|
parse_zip_file,
|
||||||
|
parse_disagg_zip_file,
|
||||||
)
|
)
|
||||||
|
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
@ -357,6 +358,216 @@ def run_historical_import(start_year: int = 1995, end_year: int = 2026,
|
|||||||
f"{result.rows_skipped} skipped — {status}")
|
f"{result.rows_skipped} skipped — {status}")
|
||||||
|
|
||||||
|
|
||||||
|
def import_disagg_block(conn: sqlite3.Connection, block, source: str) -> tuple[int, int]:
|
||||||
|
"""
|
||||||
|
Insert one DisaggBlock into the disaggregated tables.
|
||||||
|
Returns (rows_inserted, rows_skipped).
|
||||||
|
"""
|
||||||
|
commodity_id = _upsert_commodity(conn, block)
|
||||||
|
|
||||||
|
cur = conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT OR IGNORE INTO disagg_reports
|
||||||
|
(commodity_id, report_date, source_file)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
""",
|
||||||
|
(commodity_id, block.report_date, source),
|
||||||
|
)
|
||||||
|
if cur.rowcount == 0:
|
||||||
|
return 0, 1
|
||||||
|
report_id = cur.lastrowid
|
||||||
|
|
||||||
|
inserted = 0
|
||||||
|
for pos in block.rows:
|
||||||
|
sfx = pos.row_type
|
||||||
|
chg = (
|
||||||
|
pos.chg_open_interest,
|
||||||
|
pos.chg_prod_merc_long, pos.chg_prod_merc_short,
|
||||||
|
pos.chg_swap_long, pos.chg_swap_short, pos.chg_swap_spread,
|
||||||
|
pos.chg_m_money_long, pos.chg_m_money_short, pos.chg_m_money_spread,
|
||||||
|
pos.chg_other_rept_long, pos.chg_other_rept_short,pos.chg_other_rept_spread,
|
||||||
|
pos.chg_tot_rept_long, pos.chg_tot_rept_short,
|
||||||
|
pos.chg_nonrept_long, pos.chg_nonrept_short,
|
||||||
|
) if sfx == 'All' else (None,) * 16
|
||||||
|
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT OR IGNORE INTO disagg_positions (
|
||||||
|
report_id, row_type,
|
||||||
|
open_interest,
|
||||||
|
prod_merc_long, prod_merc_short,
|
||||||
|
swap_long, swap_short, swap_spread,
|
||||||
|
m_money_long, m_money_short, m_money_spread,
|
||||||
|
other_rept_long, other_rept_short, other_rept_spread,
|
||||||
|
tot_rept_long, tot_rept_short,
|
||||||
|
nonrept_long, nonrept_short,
|
||||||
|
chg_open_interest,
|
||||||
|
chg_prod_merc_long, chg_prod_merc_short,
|
||||||
|
chg_swap_long, chg_swap_short, chg_swap_spread,
|
||||||
|
chg_m_money_long, chg_m_money_short, chg_m_money_spread,
|
||||||
|
chg_other_rept_long, chg_other_rept_short, chg_other_rept_spread,
|
||||||
|
chg_tot_rept_long, chg_tot_rept_short,
|
||||||
|
chg_nonrept_long, chg_nonrept_short,
|
||||||
|
pct_open_interest,
|
||||||
|
pct_prod_merc_long, pct_prod_merc_short,
|
||||||
|
pct_swap_long, pct_swap_short, pct_swap_spread,
|
||||||
|
pct_m_money_long, pct_m_money_short, pct_m_money_spread,
|
||||||
|
pct_other_rept_long, pct_other_rept_short, pct_other_rept_spread,
|
||||||
|
pct_tot_rept_long, pct_tot_rept_short,
|
||||||
|
pct_nonrept_long, pct_nonrept_short,
|
||||||
|
traders_total,
|
||||||
|
traders_prod_merc_long, traders_prod_merc_short,
|
||||||
|
traders_swap_long, traders_swap_short, traders_swap_spread,
|
||||||
|
traders_m_money_long, traders_m_money_short, traders_m_money_spread,
|
||||||
|
traders_other_rept_long, traders_other_rept_short, traders_other_rept_spread,
|
||||||
|
traders_tot_rept_long, traders_tot_rept_short
|
||||||
|
) VALUES (
|
||||||
|
?, ?,
|
||||||
|
?,
|
||||||
|
?, ?,
|
||||||
|
?, ?, ?,
|
||||||
|
?, ?, ?,
|
||||||
|
?, ?, ?,
|
||||||
|
?, ?,
|
||||||
|
?, ?,
|
||||||
|
?,
|
||||||
|
?, ?,
|
||||||
|
?, ?, ?,
|
||||||
|
?, ?, ?,
|
||||||
|
?, ?, ?,
|
||||||
|
?, ?,
|
||||||
|
?, ?,
|
||||||
|
?,
|
||||||
|
?, ?,
|
||||||
|
?, ?, ?,
|
||||||
|
?, ?, ?,
|
||||||
|
?, ?, ?,
|
||||||
|
?, ?,
|
||||||
|
?, ?,
|
||||||
|
?,
|
||||||
|
?, ?,
|
||||||
|
?, ?, ?,
|
||||||
|
?, ?, ?,
|
||||||
|
?, ?, ?,
|
||||||
|
?, ?
|
||||||
|
)
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
report_id, sfx,
|
||||||
|
pos.open_interest,
|
||||||
|
pos.prod_merc_long, pos.prod_merc_short,
|
||||||
|
pos.swap_long, pos.swap_short, pos.swap_spread,
|
||||||
|
pos.m_money_long, pos.m_money_short, pos.m_money_spread,
|
||||||
|
pos.other_rept_long, pos.other_rept_short,pos.other_rept_spread,
|
||||||
|
pos.tot_rept_long, pos.tot_rept_short,
|
||||||
|
pos.nonrept_long, pos.nonrept_short,
|
||||||
|
*chg,
|
||||||
|
pos.pct_open_interest,
|
||||||
|
pos.pct_prod_merc_long, pos.pct_prod_merc_short,
|
||||||
|
pos.pct_swap_long, pos.pct_swap_short, pos.pct_swap_spread,
|
||||||
|
pos.pct_m_money_long, pos.pct_m_money_short, pos.pct_m_money_spread,
|
||||||
|
pos.pct_other_rept_long, pos.pct_other_rept_short,pos.pct_other_rept_spread,
|
||||||
|
pos.pct_tot_rept_long, pos.pct_tot_rept_short,
|
||||||
|
pos.pct_nonrept_long, pos.pct_nonrept_short,
|
||||||
|
pos.traders_total,
|
||||||
|
pos.traders_prod_merc_long, pos.traders_prod_merc_short,
|
||||||
|
pos.traders_swap_long, pos.traders_swap_short, pos.traders_swap_spread,
|
||||||
|
pos.traders_m_money_long, pos.traders_m_money_short, pos.traders_m_money_spread,
|
||||||
|
pos.traders_other_rept_long, pos.traders_other_rept_short, pos.traders_other_rept_spread,
|
||||||
|
pos.traders_tot_rept_long, pos.traders_tot_rept_short,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
inserted += 1
|
||||||
|
|
||||||
|
conc = block.concentration.get(sfx)
|
||||||
|
if conc:
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT OR IGNORE INTO disagg_concentration (
|
||||||
|
report_id, row_type,
|
||||||
|
conc_gross_long_4, conc_gross_short_4,
|
||||||
|
conc_gross_long_8, conc_gross_short_8,
|
||||||
|
conc_net_long_4, conc_net_short_4,
|
||||||
|
conc_net_long_8, conc_net_short_8
|
||||||
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
report_id, sfx,
|
||||||
|
conc.conc_gross_long_4, conc.conc_gross_short_4,
|
||||||
|
conc.conc_gross_long_8, conc.conc_gross_short_8,
|
||||||
|
conc.conc_net_long_4, conc.conc_net_short_4,
|
||||||
|
conc.conc_net_long_8, conc.conc_net_short_8,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
return inserted, 0
|
||||||
|
|
||||||
|
|
||||||
|
def import_disagg_zip_file(zip_path: str, source_label: Optional[str] = None) -> ImportResult:
|
||||||
|
"""Import a Disaggregated COT ZIP file."""
|
||||||
|
source = source_label or Path(zip_path).name
|
||||||
|
result = ImportResult(source=source)
|
||||||
|
try:
|
||||||
|
with get_db() as conn:
|
||||||
|
for block in parse_disagg_zip_file(zip_path):
|
||||||
|
ins, skp = import_disagg_block(conn, block, source)
|
||||||
|
result.rows_inserted += ins
|
||||||
|
result.rows_skipped += skp
|
||||||
|
conn.commit()
|
||||||
|
except Exception as e:
|
||||||
|
result.error = str(e)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def run_disagg_historical_import(start_year: int = 2019, end_year: int = 2026,
|
||||||
|
verbose: bool = True) -> None:
|
||||||
|
"""
|
||||||
|
Download and import the Disaggregated COT yearly ZIP files.
|
||||||
|
Uses import_log to skip already-completed sources.
|
||||||
|
"""
|
||||||
|
for year in range(start_year, end_year + 1):
|
||||||
|
label = f"com_disagg_txt_{year}.zip"
|
||||||
|
url = f"{HISTORICAL_BASE}/com_disagg_txt_{year}.zip"
|
||||||
|
|
||||||
|
with get_db() as conn:
|
||||||
|
if _already_imported(conn, label):
|
||||||
|
if verbose:
|
||||||
|
print(f" [skip] {label} (already imported)")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print(f" [download] {label} ...")
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as tmp:
|
||||||
|
tmp_path = Path(tmp.name)
|
||||||
|
|
||||||
|
if not _download_zip(url, tmp_path):
|
||||||
|
if verbose:
|
||||||
|
print(f" [error] Failed to download {url}")
|
||||||
|
with get_db() as conn:
|
||||||
|
_log_start(conn, label, 'disagg_annual_zip')
|
||||||
|
_log_done(conn, label, 0, 0, f"Download failed: {url}")
|
||||||
|
tmp_path.unlink(missing_ok=True)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print(f" [import] {label} ...")
|
||||||
|
|
||||||
|
with get_db() as conn:
|
||||||
|
_log_start(conn, label, 'disagg_annual_zip')
|
||||||
|
|
||||||
|
result = import_disagg_zip_file(str(tmp_path), source_label=label)
|
||||||
|
tmp_path.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
with get_db() as conn:
|
||||||
|
_log_done(conn, label, result.rows_inserted, result.rows_skipped, result.error)
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
status = "ERROR: " + result.error if result.error else "OK"
|
||||||
|
print(f" [done] {label}: {result.rows_inserted} inserted, "
|
||||||
|
f"{result.rows_skipped} skipped — {status}")
|
||||||
|
|
||||||
|
|
||||||
def download_and_import() -> ImportResult:
|
def download_and_import() -> ImportResult:
|
||||||
"""
|
"""
|
||||||
Download the current weekly report and import it.
|
Download the current weekly report and import it.
|
||||||
|
|||||||
@ -616,3 +616,243 @@ def parse_csv_text(text: str) -> Iterator[CommodityBlock]:
|
|||||||
block = _csv_row_to_block(row)
|
block = _csv_row_to_block(row)
|
||||||
if block:
|
if block:
|
||||||
yield block
|
yield block
|
||||||
|
|
||||||
|
|
||||||
|
# ── Disaggregated COT format (com_disagg_txt_YYYY.zip / c_year.txt) ─────────
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DisaggPositionRow:
|
||||||
|
row_type: str
|
||||||
|
open_interest: Optional[int] = None
|
||||||
|
prod_merc_long: Optional[int] = None
|
||||||
|
prod_merc_short: Optional[int] = None
|
||||||
|
swap_long: Optional[int] = None
|
||||||
|
swap_short: Optional[int] = None
|
||||||
|
swap_spread: Optional[int] = None
|
||||||
|
m_money_long: Optional[int] = None
|
||||||
|
m_money_short: Optional[int] = None
|
||||||
|
m_money_spread: Optional[int] = None
|
||||||
|
other_rept_long: Optional[int] = None
|
||||||
|
other_rept_short: Optional[int] = None
|
||||||
|
other_rept_spread: Optional[int] = None
|
||||||
|
tot_rept_long: Optional[int] = None
|
||||||
|
tot_rept_short: Optional[int] = None
|
||||||
|
nonrept_long: Optional[int] = None
|
||||||
|
nonrept_short: Optional[int] = None
|
||||||
|
chg_open_interest: Optional[int] = None
|
||||||
|
chg_prod_merc_long: Optional[int] = None
|
||||||
|
chg_prod_merc_short: Optional[int] = None
|
||||||
|
chg_swap_long: Optional[int] = None
|
||||||
|
chg_swap_short: Optional[int] = None
|
||||||
|
chg_swap_spread: Optional[int] = None
|
||||||
|
chg_m_money_long: Optional[int] = None
|
||||||
|
chg_m_money_short: Optional[int] = None
|
||||||
|
chg_m_money_spread: Optional[int] = None
|
||||||
|
chg_other_rept_long: Optional[int] = None
|
||||||
|
chg_other_rept_short: Optional[int] = None
|
||||||
|
chg_other_rept_spread: Optional[int] = None
|
||||||
|
chg_tot_rept_long: Optional[int] = None
|
||||||
|
chg_tot_rept_short: Optional[int] = None
|
||||||
|
chg_nonrept_long: Optional[int] = None
|
||||||
|
chg_nonrept_short: Optional[int] = None
|
||||||
|
pct_open_interest: Optional[float] = None
|
||||||
|
pct_prod_merc_long: Optional[float] = None
|
||||||
|
pct_prod_merc_short: Optional[float] = None
|
||||||
|
pct_swap_long: Optional[float] = None
|
||||||
|
pct_swap_short: Optional[float] = None
|
||||||
|
pct_swap_spread: Optional[float] = None
|
||||||
|
pct_m_money_long: Optional[float] = None
|
||||||
|
pct_m_money_short: Optional[float] = None
|
||||||
|
pct_m_money_spread: Optional[float] = None
|
||||||
|
pct_other_rept_long: Optional[float] = None
|
||||||
|
pct_other_rept_short: Optional[float] = None
|
||||||
|
pct_other_rept_spread: Optional[float] = None
|
||||||
|
pct_tot_rept_long: Optional[float] = None
|
||||||
|
pct_tot_rept_short: Optional[float] = None
|
||||||
|
pct_nonrept_long: Optional[float] = None
|
||||||
|
pct_nonrept_short: Optional[float] = None
|
||||||
|
traders_total: Optional[int] = None
|
||||||
|
traders_prod_merc_long: Optional[int] = None
|
||||||
|
traders_prod_merc_short: Optional[int] = None
|
||||||
|
traders_swap_long: Optional[int] = None
|
||||||
|
traders_swap_short: Optional[int] = None
|
||||||
|
traders_swap_spread: Optional[int] = None
|
||||||
|
traders_m_money_long: Optional[int] = None
|
||||||
|
traders_m_money_short: Optional[int] = None
|
||||||
|
traders_m_money_spread: Optional[int] = None
|
||||||
|
traders_other_rept_long: Optional[int] = None
|
||||||
|
traders_other_rept_short: Optional[int] = None
|
||||||
|
traders_other_rept_spread: Optional[int] = None
|
||||||
|
traders_tot_rept_long: Optional[int] = None
|
||||||
|
traders_tot_rept_short: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DisaggBlock:
|
||||||
|
cftc_code: str
|
||||||
|
name: str
|
||||||
|
exchange: str
|
||||||
|
exchange_abbr: str
|
||||||
|
contract_unit: str
|
||||||
|
report_date: str
|
||||||
|
rows: list # list of DisaggPositionRow (one per row_type)
|
||||||
|
concentration: dict # row_type -> ConcentrationRow
|
||||||
|
|
||||||
|
|
||||||
|
def _di(row: dict, col: str) -> Optional[int]:
|
||||||
|
v = row.get(col, '').strip()
|
||||||
|
if not v:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return int(float(v))
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _df(row: dict, col: str) -> Optional[float]:
|
||||||
|
v = row.get(col, '').strip()
|
||||||
|
if not v:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return float(v)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_disagg_position_row(csv_row: dict, sfx: str) -> DisaggPositionRow:
|
||||||
|
pos = DisaggPositionRow(row_type=sfx)
|
||||||
|
pos.open_interest = _di(csv_row, f'Open_Interest_{sfx}')
|
||||||
|
pos.prod_merc_long = _di(csv_row, f'Prod_Merc_Positions_Long_{sfx}')
|
||||||
|
pos.prod_merc_short = _di(csv_row, f'Prod_Merc_Positions_Short_{sfx}')
|
||||||
|
# NB: CFTC uses double underscore for Swap Short/Spread columns
|
||||||
|
pos.swap_long = _di(csv_row, f'Swap_Positions_Long_{sfx}')
|
||||||
|
pos.swap_short = _di(csv_row, f'Swap__Positions_Short_{sfx}')
|
||||||
|
pos.swap_spread = _di(csv_row, f'Swap__Positions_Spread_{sfx}')
|
||||||
|
pos.m_money_long = _di(csv_row, f'M_Money_Positions_Long_{sfx}')
|
||||||
|
pos.m_money_short = _di(csv_row, f'M_Money_Positions_Short_{sfx}')
|
||||||
|
pos.m_money_spread = _di(csv_row, f'M_Money_Positions_Spread_{sfx}')
|
||||||
|
pos.other_rept_long = _di(csv_row, f'Other_Rept_Positions_Long_{sfx}')
|
||||||
|
pos.other_rept_short = _di(csv_row, f'Other_Rept_Positions_Short_{sfx}')
|
||||||
|
pos.other_rept_spread= _di(csv_row, f'Other_Rept_Positions_Spread_{sfx}')
|
||||||
|
pos.tot_rept_long = _di(csv_row, f'Tot_Rept_Positions_Long_{sfx}')
|
||||||
|
pos.tot_rept_short = _di(csv_row, f'Tot_Rept_Positions_Short_{sfx}')
|
||||||
|
pos.nonrept_long = _di(csv_row, f'NonRept_Positions_Long_{sfx}')
|
||||||
|
pos.nonrept_short = _di(csv_row, f'NonRept_Positions_Short_{sfx}')
|
||||||
|
|
||||||
|
if sfx == 'All':
|
||||||
|
pos.chg_open_interest = _di(csv_row, 'Change_in_Open_Interest_All')
|
||||||
|
pos.chg_prod_merc_long = _di(csv_row, 'Change_in_Prod_Merc_Long_All')
|
||||||
|
pos.chg_prod_merc_short = _di(csv_row, 'Change_in_Prod_Merc_Short_All')
|
||||||
|
pos.chg_swap_long = _di(csv_row, 'Change_in_Swap_Long_All')
|
||||||
|
pos.chg_swap_short = _di(csv_row, 'Change_in_Swap_Short_All')
|
||||||
|
pos.chg_swap_spread = _di(csv_row, 'Change_in_Swap_Spread_All')
|
||||||
|
pos.chg_m_money_long = _di(csv_row, 'Change_in_M_Money_Long_All')
|
||||||
|
pos.chg_m_money_short = _di(csv_row, 'Change_in_M_Money_Short_All')
|
||||||
|
pos.chg_m_money_spread = _di(csv_row, 'Change_in_M_Money_Spread_All')
|
||||||
|
pos.chg_other_rept_long = _di(csv_row, 'Change_in_Other_Rept_Long_All')
|
||||||
|
pos.chg_other_rept_short = _di(csv_row, 'Change_in_Other_Rept_Short_All')
|
||||||
|
pos.chg_other_rept_spread= _di(csv_row, 'Change_in_Other_Rept_Spread_All')
|
||||||
|
pos.chg_tot_rept_long = _di(csv_row, 'Change_in_Tot_Rept_Long_All')
|
||||||
|
pos.chg_tot_rept_short = _di(csv_row, 'Change_in_Tot_Rept_Short_All')
|
||||||
|
pos.chg_nonrept_long = _di(csv_row, 'Change_in_NonRept_Long_All')
|
||||||
|
pos.chg_nonrept_short = _di(csv_row, 'Change_in_NonRept_Short_All')
|
||||||
|
|
||||||
|
pos.pct_open_interest = _df(csv_row, f'Pct_of_Open_Interest_{sfx}')
|
||||||
|
pos.pct_prod_merc_long = _df(csv_row, f'Pct_of_OI_Prod_Merc_Long_{sfx}')
|
||||||
|
pos.pct_prod_merc_short = _df(csv_row, f'Pct_of_OI_Prod_Merc_Short_{sfx}')
|
||||||
|
pos.pct_swap_long = _df(csv_row, f'Pct_of_OI_Swap_Long_{sfx}')
|
||||||
|
pos.pct_swap_short = _df(csv_row, f'Pct_of_OI_Swap_Short_{sfx}')
|
||||||
|
pos.pct_swap_spread = _df(csv_row, f'Pct_of_OI_Swap_Spread_{sfx}')
|
||||||
|
pos.pct_m_money_long = _df(csv_row, f'Pct_of_OI_M_Money_Long_{sfx}')
|
||||||
|
pos.pct_m_money_short = _df(csv_row, f'Pct_of_OI_M_Money_Short_{sfx}')
|
||||||
|
pos.pct_m_money_spread = _df(csv_row, f'Pct_of_OI_M_Money_Spread_{sfx}')
|
||||||
|
pos.pct_other_rept_long = _df(csv_row, f'Pct_of_OI_Other_Rept_Long_{sfx}')
|
||||||
|
pos.pct_other_rept_short = _df(csv_row, f'Pct_of_OI_Other_Rept_Short_{sfx}')
|
||||||
|
pos.pct_other_rept_spread= _df(csv_row, f'Pct_of_OI_Other_Rept_Spread_{sfx}')
|
||||||
|
pos.pct_tot_rept_long = _df(csv_row, f'Pct_of_OI_Tot_Rept_Long_{sfx}')
|
||||||
|
pos.pct_tot_rept_short = _df(csv_row, f'Pct_of_OI_Tot_Rept_Short_{sfx}')
|
||||||
|
pos.pct_nonrept_long = _df(csv_row, f'Pct_of_OI_NonRept_Long_{sfx}')
|
||||||
|
pos.pct_nonrept_short = _df(csv_row, f'Pct_of_OI_NonRept_Short_{sfx}')
|
||||||
|
|
||||||
|
pos.traders_total = _di(csv_row, f'Traders_Tot_{sfx}')
|
||||||
|
pos.traders_prod_merc_long = _di(csv_row, f'Traders_Prod_Merc_Long_{sfx}')
|
||||||
|
pos.traders_prod_merc_short = _di(csv_row, f'Traders_Prod_Merc_Short_{sfx}')
|
||||||
|
pos.traders_swap_long = _di(csv_row, f'Traders_Swap_Long_{sfx}')
|
||||||
|
pos.traders_swap_short = _di(csv_row, f'Traders_Swap_Short_{sfx}')
|
||||||
|
pos.traders_swap_spread = _di(csv_row, f'Traders_Swap_Spread_{sfx}')
|
||||||
|
pos.traders_m_money_long = _di(csv_row, f'Traders_M_Money_Long_{sfx}')
|
||||||
|
pos.traders_m_money_short = _di(csv_row, f'Traders_M_Money_Short_{sfx}')
|
||||||
|
pos.traders_m_money_spread = _di(csv_row, f'Traders_M_Money_Spread_{sfx}')
|
||||||
|
pos.traders_other_rept_long = _di(csv_row, f'Traders_Other_Rept_Long_{sfx}')
|
||||||
|
pos.traders_other_rept_short = _di(csv_row, f'Traders_Other_Rept_Short_{sfx}')
|
||||||
|
pos.traders_other_rept_spread= _di(csv_row, f'Traders_Other_Rept_Spread_{sfx}')
|
||||||
|
pos.traders_tot_rept_long = _di(csv_row, f'Traders_Tot_Rept_Long_{sfx}')
|
||||||
|
pos.traders_tot_rept_short = _di(csv_row, f'Traders_Tot_Rept_Short_{sfx}')
|
||||||
|
return pos
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_disagg_concentration(csv_row: dict, sfx: str) -> ConcentrationRow:
|
||||||
|
return ConcentrationRow(
|
||||||
|
conc_gross_long_4 =_df(csv_row, f'Conc_Gross_LE_4_TDR_Long_{sfx}'),
|
||||||
|
conc_gross_short_4=_df(csv_row, f'Conc_Gross_LE_4_TDR_Short_{sfx}'),
|
||||||
|
conc_gross_long_8 =_df(csv_row, f'Conc_Gross_LE_8_TDR_Long_{sfx}'),
|
||||||
|
conc_gross_short_8=_df(csv_row, f'Conc_Gross_LE_8_TDR_Short_{sfx}'),
|
||||||
|
conc_net_long_4 =_df(csv_row, f'Conc_Net_LE_4_TDR_Long_{sfx}'),
|
||||||
|
conc_net_short_4 =_df(csv_row, f'Conc_Net_LE_4_TDR_Short_{sfx}'),
|
||||||
|
conc_net_long_8 =_df(csv_row, f'Conc_Net_LE_8_TDR_Long_{sfx}'),
|
||||||
|
conc_net_short_8 =_df(csv_row, f'Conc_Net_LE_8_TDR_Short_{sfx}'),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _disagg_csv_row_to_block(csv_row: dict) -> Optional[DisaggBlock]:
|
||||||
|
full_name = csv_row.get('Market_and_Exchange_Names', '').strip()
|
||||||
|
report_date = csv_row.get('Report_Date_as_YYYY-MM-DD', '').strip()
|
||||||
|
cftc_code = csv_row.get('CFTC_Contract_Market_Code', '').strip()
|
||||||
|
contract_unit= csv_row.get('Contract_Units', '').strip()
|
||||||
|
|
||||||
|
if not full_name or not report_date or not cftc_code:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if ' - ' in full_name:
|
||||||
|
name, exchange = full_name.split(' - ', 1)
|
||||||
|
else:
|
||||||
|
name, exchange = full_name, ''
|
||||||
|
name = name.strip()
|
||||||
|
exchange = exchange.strip()
|
||||||
|
exchange_abbr = EXCHANGE_ABBR.get(exchange.upper(),
|
||||||
|
exchange[:6].upper().replace(' ', ''))
|
||||||
|
|
||||||
|
rows = [_parse_disagg_position_row(csv_row, sfx) for sfx in ('All', 'Old', 'Other')]
|
||||||
|
concentration = {sfx: _parse_disagg_concentration(csv_row, sfx)
|
||||||
|
for sfx in ('All', 'Old', 'Other')}
|
||||||
|
|
||||||
|
return DisaggBlock(
|
||||||
|
cftc_code=cftc_code,
|
||||||
|
name=name,
|
||||||
|
exchange=exchange,
|
||||||
|
exchange_abbr=exchange_abbr,
|
||||||
|
contract_unit=contract_unit,
|
||||||
|
report_date=report_date,
|
||||||
|
rows=rows,
|
||||||
|
concentration=concentration,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_disagg_csv_text(text: str) -> Iterator[DisaggBlock]:
|
||||||
|
"""Parse a CFTC Disaggregated COT CSV file (c_year.txt format)."""
|
||||||
|
import csv as _csv
|
||||||
|
reader = _csv.DictReader(text.splitlines())
|
||||||
|
for row in reader:
|
||||||
|
block = _disagg_csv_row_to_block(row)
|
||||||
|
if block:
|
||||||
|
yield block
|
||||||
|
|
||||||
|
|
||||||
|
def parse_disagg_zip_file(zip_path: str) -> Iterator[DisaggBlock]:
|
||||||
|
"""Parse a CFTC Disaggregated COT ZIP archive (com_disagg_txt_YYYY.zip)."""
|
||||||
|
with zipfile.ZipFile(zip_path) as zf:
|
||||||
|
txt_files = [n for n in zf.namelist() if n.lower().endswith('.txt')]
|
||||||
|
for fname in txt_files:
|
||||||
|
with zf.open(fname) as f:
|
||||||
|
text = f.read().decode('latin-1')
|
||||||
|
yield from parse_disagg_csv_text(text)
|
||||||
|
|||||||
@ -10,6 +10,7 @@ services:
|
|||||||
- DB_PATH=/data/cot.db
|
- DB_PATH=/data/cot.db
|
||||||
command: >
|
command: >
|
||||||
sh -c "python3 -m app.ingestion.cli init-db &&
|
sh -c "python3 -m app.ingestion.cli init-db &&
|
||||||
|
python3 -m app.ingestion.cli import-disagg-history &&
|
||||||
uvicorn app.api.main:app --host 0.0.0.0 --port 8000"
|
uvicorn app.api.main:app --host 0.0.0.0 --port 8000"
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
healthcheck:
|
healthcheck:
|
||||||
|
|||||||
135
schema.sql
135
schema.sql
@ -142,6 +142,141 @@ CREATE TABLE IF NOT EXISTS import_log (
|
|||||||
error_message TEXT
|
error_message TEXT
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- ----------------------------------------------------------------
|
||||||
|
-- disagg_reports: one row per (commodity x report_date), disaggregated COT
|
||||||
|
-- ----------------------------------------------------------------
|
||||||
|
CREATE TABLE IF NOT EXISTS disagg_reports (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
commodity_id INTEGER NOT NULL REFERENCES commodities(id),
|
||||||
|
report_date TEXT NOT NULL,
|
||||||
|
source_file TEXT,
|
||||||
|
imported_at TEXT DEFAULT (datetime('now')),
|
||||||
|
UNIQUE (commodity_id, report_date)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_disagg_reports_date ON disagg_reports(report_date);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_disagg_reports_commodity ON disagg_reports(commodity_id);
|
||||||
|
|
||||||
|
-- ----------------------------------------------------------------
|
||||||
|
-- disagg_positions: disaggregated position data per (report x row_type)
|
||||||
|
-- row_type: 'All', 'Old', 'Other'
|
||||||
|
-- Includes positions, week-over-week changes, % of OI, and trader counts.
|
||||||
|
-- ----------------------------------------------------------------
|
||||||
|
CREATE TABLE IF NOT EXISTS disagg_positions (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
report_id INTEGER NOT NULL REFERENCES disagg_reports(id),
|
||||||
|
row_type TEXT NOT NULL CHECK (row_type IN ('All', 'Old', 'Other')),
|
||||||
|
|
||||||
|
-- Open interest
|
||||||
|
open_interest INTEGER,
|
||||||
|
|
||||||
|
-- Producer/Merchant/Processor/User
|
||||||
|
prod_merc_long INTEGER,
|
||||||
|
prod_merc_short INTEGER,
|
||||||
|
|
||||||
|
-- Swap Dealers
|
||||||
|
swap_long INTEGER,
|
||||||
|
swap_short INTEGER,
|
||||||
|
swap_spread INTEGER,
|
||||||
|
|
||||||
|
-- Managed Money
|
||||||
|
m_money_long INTEGER,
|
||||||
|
m_money_short INTEGER,
|
||||||
|
m_money_spread INTEGER,
|
||||||
|
|
||||||
|
-- Other Reportable
|
||||||
|
other_rept_long INTEGER,
|
||||||
|
other_rept_short INTEGER,
|
||||||
|
other_rept_spread INTEGER,
|
||||||
|
|
||||||
|
-- Total Reportable
|
||||||
|
tot_rept_long INTEGER,
|
||||||
|
tot_rept_short INTEGER,
|
||||||
|
|
||||||
|
-- Non-Reportable
|
||||||
|
nonrept_long INTEGER,
|
||||||
|
nonrept_short INTEGER,
|
||||||
|
|
||||||
|
-- Week-over-week changes (stored on All rows only)
|
||||||
|
chg_open_interest INTEGER,
|
||||||
|
chg_prod_merc_long INTEGER,
|
||||||
|
chg_prod_merc_short INTEGER,
|
||||||
|
chg_swap_long INTEGER,
|
||||||
|
chg_swap_short INTEGER,
|
||||||
|
chg_swap_spread INTEGER,
|
||||||
|
chg_m_money_long INTEGER,
|
||||||
|
chg_m_money_short INTEGER,
|
||||||
|
chg_m_money_spread INTEGER,
|
||||||
|
chg_other_rept_long INTEGER,
|
||||||
|
chg_other_rept_short INTEGER,
|
||||||
|
chg_other_rept_spread INTEGER,
|
||||||
|
chg_tot_rept_long INTEGER,
|
||||||
|
chg_tot_rept_short INTEGER,
|
||||||
|
chg_nonrept_long INTEGER,
|
||||||
|
chg_nonrept_short INTEGER,
|
||||||
|
|
||||||
|
-- Percent of open interest
|
||||||
|
pct_open_interest REAL,
|
||||||
|
pct_prod_merc_long REAL,
|
||||||
|
pct_prod_merc_short REAL,
|
||||||
|
pct_swap_long REAL,
|
||||||
|
pct_swap_short REAL,
|
||||||
|
pct_swap_spread REAL,
|
||||||
|
pct_m_money_long REAL,
|
||||||
|
pct_m_money_short REAL,
|
||||||
|
pct_m_money_spread REAL,
|
||||||
|
pct_other_rept_long REAL,
|
||||||
|
pct_other_rept_short REAL,
|
||||||
|
pct_other_rept_spread REAL,
|
||||||
|
pct_tot_rept_long REAL,
|
||||||
|
pct_tot_rept_short REAL,
|
||||||
|
pct_nonrept_long REAL,
|
||||||
|
pct_nonrept_short REAL,
|
||||||
|
|
||||||
|
-- Number of traders
|
||||||
|
traders_total INTEGER,
|
||||||
|
traders_prod_merc_long INTEGER,
|
||||||
|
traders_prod_merc_short INTEGER,
|
||||||
|
traders_swap_long INTEGER,
|
||||||
|
traders_swap_short INTEGER,
|
||||||
|
traders_swap_spread INTEGER,
|
||||||
|
traders_m_money_long INTEGER,
|
||||||
|
traders_m_money_short INTEGER,
|
||||||
|
traders_m_money_spread INTEGER,
|
||||||
|
traders_other_rept_long INTEGER,
|
||||||
|
traders_other_rept_short INTEGER,
|
||||||
|
traders_other_rept_spread INTEGER,
|
||||||
|
traders_tot_rept_long INTEGER,
|
||||||
|
traders_tot_rept_short INTEGER,
|
||||||
|
|
||||||
|
UNIQUE (report_id, row_type)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_disagg_positions_report ON disagg_positions(report_id);
|
||||||
|
|
||||||
|
-- ----------------------------------------------------------------
|
||||||
|
-- disagg_concentration: largest-trader concentration for disaggregated reports
|
||||||
|
-- row_type: 'All', 'Old', 'Other'
|
||||||
|
-- ----------------------------------------------------------------
|
||||||
|
CREATE TABLE IF NOT EXISTS disagg_concentration (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
report_id INTEGER NOT NULL REFERENCES disagg_reports(id),
|
||||||
|
row_type TEXT NOT NULL CHECK (row_type IN ('All', 'Old', 'Other')),
|
||||||
|
|
||||||
|
conc_gross_long_4 REAL,
|
||||||
|
conc_gross_short_4 REAL,
|
||||||
|
conc_gross_long_8 REAL,
|
||||||
|
conc_gross_short_8 REAL,
|
||||||
|
conc_net_long_4 REAL,
|
||||||
|
conc_net_short_4 REAL,
|
||||||
|
conc_net_long_8 REAL,
|
||||||
|
conc_net_short_8 REAL,
|
||||||
|
|
||||||
|
UNIQUE (report_id, row_type)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_disagg_concentration_report ON disagg_concentration(report_id);
|
||||||
|
|
||||||
-- ----------------------------------------------------------------
|
-- ----------------------------------------------------------------
|
||||||
-- v_net_positions: convenience view for common analytical queries
|
-- v_net_positions: convenience view for common analytical queries
|
||||||
-- ----------------------------------------------------------------
|
-- ----------------------------------------------------------------
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user