diff --git a/app/ingestion/cli.py b/app/ingestion/cli.py index fc042c9..d6fa223 100644 --- a/app/ingestion/cli.py +++ b/app/ingestion/cli.py @@ -63,6 +63,13 @@ def cmd_import_zip(args): print(f"{result.rows_inserted} inserted, {result.rows_skipped} skipped") +def cmd_import_disagg_history(args): + from app.ingestion.importer import run_disagg_historical_import + print(f"Importing Disaggregated COT data {args.start_year}–{args.end_year}...") + run_disagg_historical_import(start_year=args.start_year, end_year=args.end_year, verbose=True) + print("Done.") + + def cmd_download_and_import(args): from app.ingestion.importer import download_and_import result = download_and_import() @@ -123,6 +130,11 @@ def main(): p = sub.add_parser("import-zip", help="Import a single ZIP file") p.add_argument("file") + p = sub.add_parser("import-disagg-history", + help="Download and import Disaggregated COT yearly ZIPs (2019–2026)") + p.add_argument("--start-year", type=int, default=2019) + p.add_argument("--end-year", type=int, default=2026) + sub.add_parser("download-and-import", help="Download latest weekly report and import it") sub.add_parser("status", help="Show database statistics") @@ -133,6 +145,7 @@ def main(): "import-history": cmd_import_history, "import-html": cmd_import_html, "import-zip": cmd_import_zip, + "import-disagg-history": cmd_import_disagg_history, "download-and-import": cmd_download_and_import, "status": cmd_status, } diff --git a/app/ingestion/importer.py b/app/ingestion/importer.py index c40cd10..265c1f1 100644 --- a/app/ingestion/importer.py +++ b/app/ingestion/importer.py @@ -18,6 +18,7 @@ from app.ingestion.parser import ( CommodityBlock, parse_html_file, parse_zip_file, + parse_disagg_zip_file, ) HEADERS = { @@ -357,6 +358,216 @@ def run_historical_import(start_year: int = 1995, end_year: int = 2026, f"{result.rows_skipped} skipped — {status}") +def import_disagg_block(conn: sqlite3.Connection, block, source: str) -> tuple[int, int]: + """ + Insert one DisaggBlock into the disaggregated tables. + Returns (rows_inserted, rows_skipped). + """ + commodity_id = _upsert_commodity(conn, block) + + cur = conn.execute( + """ + INSERT OR IGNORE INTO disagg_reports + (commodity_id, report_date, source_file) + VALUES (?, ?, ?) + """, + (commodity_id, block.report_date, source), + ) + if cur.rowcount == 0: + return 0, 1 + report_id = cur.lastrowid + + inserted = 0 + for pos in block.rows: + sfx = pos.row_type + chg = ( + pos.chg_open_interest, + pos.chg_prod_merc_long, pos.chg_prod_merc_short, + pos.chg_swap_long, pos.chg_swap_short, pos.chg_swap_spread, + pos.chg_m_money_long, pos.chg_m_money_short, pos.chg_m_money_spread, + pos.chg_other_rept_long, pos.chg_other_rept_short,pos.chg_other_rept_spread, + pos.chg_tot_rept_long, pos.chg_tot_rept_short, + pos.chg_nonrept_long, pos.chg_nonrept_short, + ) if sfx == 'All' else (None,) * 16 + + conn.execute( + """ + INSERT OR IGNORE INTO disagg_positions ( + report_id, row_type, + open_interest, + prod_merc_long, prod_merc_short, + swap_long, swap_short, swap_spread, + m_money_long, m_money_short, m_money_spread, + other_rept_long, other_rept_short, other_rept_spread, + tot_rept_long, tot_rept_short, + nonrept_long, nonrept_short, + chg_open_interest, + chg_prod_merc_long, chg_prod_merc_short, + chg_swap_long, chg_swap_short, chg_swap_spread, + chg_m_money_long, chg_m_money_short, chg_m_money_spread, + chg_other_rept_long, chg_other_rept_short, chg_other_rept_spread, + chg_tot_rept_long, chg_tot_rept_short, + chg_nonrept_long, chg_nonrept_short, + pct_open_interest, + pct_prod_merc_long, pct_prod_merc_short, + pct_swap_long, pct_swap_short, pct_swap_spread, + pct_m_money_long, pct_m_money_short, pct_m_money_spread, + pct_other_rept_long, pct_other_rept_short, pct_other_rept_spread, + pct_tot_rept_long, pct_tot_rept_short, + pct_nonrept_long, pct_nonrept_short, + traders_total, + traders_prod_merc_long, traders_prod_merc_short, + traders_swap_long, traders_swap_short, traders_swap_spread, + traders_m_money_long, traders_m_money_short, traders_m_money_spread, + traders_other_rept_long, traders_other_rept_short, traders_other_rept_spread, + traders_tot_rept_long, traders_tot_rept_short + ) VALUES ( + ?, ?, + ?, + ?, ?, + ?, ?, ?, + ?, ?, ?, + ?, ?, ?, + ?, ?, + ?, ?, + ?, + ?, ?, + ?, ?, ?, + ?, ?, ?, + ?, ?, ?, + ?, ?, + ?, ?, + ?, + ?, ?, + ?, ?, ?, + ?, ?, ?, + ?, ?, ?, + ?, ?, + ?, ?, + ?, + ?, ?, + ?, ?, ?, + ?, ?, ?, + ?, ?, ?, + ?, ? + ) + """, + ( + report_id, sfx, + pos.open_interest, + pos.prod_merc_long, pos.prod_merc_short, + pos.swap_long, pos.swap_short, pos.swap_spread, + pos.m_money_long, pos.m_money_short, pos.m_money_spread, + pos.other_rept_long, pos.other_rept_short,pos.other_rept_spread, + pos.tot_rept_long, pos.tot_rept_short, + pos.nonrept_long, pos.nonrept_short, + *chg, + pos.pct_open_interest, + pos.pct_prod_merc_long, pos.pct_prod_merc_short, + pos.pct_swap_long, pos.pct_swap_short, pos.pct_swap_spread, + pos.pct_m_money_long, pos.pct_m_money_short, pos.pct_m_money_spread, + pos.pct_other_rept_long, pos.pct_other_rept_short,pos.pct_other_rept_spread, + pos.pct_tot_rept_long, pos.pct_tot_rept_short, + pos.pct_nonrept_long, pos.pct_nonrept_short, + pos.traders_total, + pos.traders_prod_merc_long, pos.traders_prod_merc_short, + pos.traders_swap_long, pos.traders_swap_short, pos.traders_swap_spread, + pos.traders_m_money_long, pos.traders_m_money_short, pos.traders_m_money_spread, + pos.traders_other_rept_long, pos.traders_other_rept_short, pos.traders_other_rept_spread, + pos.traders_tot_rept_long, pos.traders_tot_rept_short, + ), + ) + inserted += 1 + + conc = block.concentration.get(sfx) + if conc: + conn.execute( + """ + INSERT OR IGNORE INTO disagg_concentration ( + report_id, row_type, + conc_gross_long_4, conc_gross_short_4, + conc_gross_long_8, conc_gross_short_8, + conc_net_long_4, conc_net_short_4, + conc_net_long_8, conc_net_short_8 + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + report_id, sfx, + conc.conc_gross_long_4, conc.conc_gross_short_4, + conc.conc_gross_long_8, conc.conc_gross_short_8, + conc.conc_net_long_4, conc.conc_net_short_4, + conc.conc_net_long_8, conc.conc_net_short_8, + ), + ) + + return inserted, 0 + + +def import_disagg_zip_file(zip_path: str, source_label: Optional[str] = None) -> ImportResult: + """Import a Disaggregated COT ZIP file.""" + source = source_label or Path(zip_path).name + result = ImportResult(source=source) + try: + with get_db() as conn: + for block in parse_disagg_zip_file(zip_path): + ins, skp = import_disagg_block(conn, block, source) + result.rows_inserted += ins + result.rows_skipped += skp + conn.commit() + except Exception as e: + result.error = str(e) + return result + + +def run_disagg_historical_import(start_year: int = 2019, end_year: int = 2026, + verbose: bool = True) -> None: + """ + Download and import the Disaggregated COT yearly ZIP files. + Uses import_log to skip already-completed sources. + """ + for year in range(start_year, end_year + 1): + label = f"com_disagg_txt_{year}.zip" + url = f"{HISTORICAL_BASE}/com_disagg_txt_{year}.zip" + + with get_db() as conn: + if _already_imported(conn, label): + if verbose: + print(f" [skip] {label} (already imported)") + continue + + if verbose: + print(f" [download] {label} ...") + + with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as tmp: + tmp_path = Path(tmp.name) + + if not _download_zip(url, tmp_path): + if verbose: + print(f" [error] Failed to download {url}") + with get_db() as conn: + _log_start(conn, label, 'disagg_annual_zip') + _log_done(conn, label, 0, 0, f"Download failed: {url}") + tmp_path.unlink(missing_ok=True) + continue + + if verbose: + print(f" [import] {label} ...") + + with get_db() as conn: + _log_start(conn, label, 'disagg_annual_zip') + + result = import_disagg_zip_file(str(tmp_path), source_label=label) + tmp_path.unlink(missing_ok=True) + + with get_db() as conn: + _log_done(conn, label, result.rows_inserted, result.rows_skipped, result.error) + + if verbose: + status = "ERROR: " + result.error if result.error else "OK" + print(f" [done] {label}: {result.rows_inserted} inserted, " + f"{result.rows_skipped} skipped — {status}") + + def download_and_import() -> ImportResult: """ Download the current weekly report and import it. diff --git a/app/ingestion/parser.py b/app/ingestion/parser.py index 229695d..b3b47e9 100644 --- a/app/ingestion/parser.py +++ b/app/ingestion/parser.py @@ -616,3 +616,243 @@ def parse_csv_text(text: str) -> Iterator[CommodityBlock]: block = _csv_row_to_block(row) if block: yield block + + +# ── Disaggregated COT format (com_disagg_txt_YYYY.zip / c_year.txt) ───────── + +@dataclass +class DisaggPositionRow: + row_type: str + open_interest: Optional[int] = None + prod_merc_long: Optional[int] = None + prod_merc_short: Optional[int] = None + swap_long: Optional[int] = None + swap_short: Optional[int] = None + swap_spread: Optional[int] = None + m_money_long: Optional[int] = None + m_money_short: Optional[int] = None + m_money_spread: Optional[int] = None + other_rept_long: Optional[int] = None + other_rept_short: Optional[int] = None + other_rept_spread: Optional[int] = None + tot_rept_long: Optional[int] = None + tot_rept_short: Optional[int] = None + nonrept_long: Optional[int] = None + nonrept_short: Optional[int] = None + chg_open_interest: Optional[int] = None + chg_prod_merc_long: Optional[int] = None + chg_prod_merc_short: Optional[int] = None + chg_swap_long: Optional[int] = None + chg_swap_short: Optional[int] = None + chg_swap_spread: Optional[int] = None + chg_m_money_long: Optional[int] = None + chg_m_money_short: Optional[int] = None + chg_m_money_spread: Optional[int] = None + chg_other_rept_long: Optional[int] = None + chg_other_rept_short: Optional[int] = None + chg_other_rept_spread: Optional[int] = None + chg_tot_rept_long: Optional[int] = None + chg_tot_rept_short: Optional[int] = None + chg_nonrept_long: Optional[int] = None + chg_nonrept_short: Optional[int] = None + pct_open_interest: Optional[float] = None + pct_prod_merc_long: Optional[float] = None + pct_prod_merc_short: Optional[float] = None + pct_swap_long: Optional[float] = None + pct_swap_short: Optional[float] = None + pct_swap_spread: Optional[float] = None + pct_m_money_long: Optional[float] = None + pct_m_money_short: Optional[float] = None + pct_m_money_spread: Optional[float] = None + pct_other_rept_long: Optional[float] = None + pct_other_rept_short: Optional[float] = None + pct_other_rept_spread: Optional[float] = None + pct_tot_rept_long: Optional[float] = None + pct_tot_rept_short: Optional[float] = None + pct_nonrept_long: Optional[float] = None + pct_nonrept_short: Optional[float] = None + traders_total: Optional[int] = None + traders_prod_merc_long: Optional[int] = None + traders_prod_merc_short: Optional[int] = None + traders_swap_long: Optional[int] = None + traders_swap_short: Optional[int] = None + traders_swap_spread: Optional[int] = None + traders_m_money_long: Optional[int] = None + traders_m_money_short: Optional[int] = None + traders_m_money_spread: Optional[int] = None + traders_other_rept_long: Optional[int] = None + traders_other_rept_short: Optional[int] = None + traders_other_rept_spread: Optional[int] = None + traders_tot_rept_long: Optional[int] = None + traders_tot_rept_short: Optional[int] = None + + +@dataclass +class DisaggBlock: + cftc_code: str + name: str + exchange: str + exchange_abbr: str + contract_unit: str + report_date: str + rows: list # list of DisaggPositionRow (one per row_type) + concentration: dict # row_type -> ConcentrationRow + + +def _di(row: dict, col: str) -> Optional[int]: + v = row.get(col, '').strip() + if not v: + return None + try: + return int(float(v)) + except ValueError: + return None + + +def _df(row: dict, col: str) -> Optional[float]: + v = row.get(col, '').strip() + if not v: + return None + try: + return float(v) + except ValueError: + return None + + +def _parse_disagg_position_row(csv_row: dict, sfx: str) -> DisaggPositionRow: + pos = DisaggPositionRow(row_type=sfx) + pos.open_interest = _di(csv_row, f'Open_Interest_{sfx}') + pos.prod_merc_long = _di(csv_row, f'Prod_Merc_Positions_Long_{sfx}') + pos.prod_merc_short = _di(csv_row, f'Prod_Merc_Positions_Short_{sfx}') + # NB: CFTC uses double underscore for Swap Short/Spread columns + pos.swap_long = _di(csv_row, f'Swap_Positions_Long_{sfx}') + pos.swap_short = _di(csv_row, f'Swap__Positions_Short_{sfx}') + pos.swap_spread = _di(csv_row, f'Swap__Positions_Spread_{sfx}') + pos.m_money_long = _di(csv_row, f'M_Money_Positions_Long_{sfx}') + pos.m_money_short = _di(csv_row, f'M_Money_Positions_Short_{sfx}') + pos.m_money_spread = _di(csv_row, f'M_Money_Positions_Spread_{sfx}') + pos.other_rept_long = _di(csv_row, f'Other_Rept_Positions_Long_{sfx}') + pos.other_rept_short = _di(csv_row, f'Other_Rept_Positions_Short_{sfx}') + pos.other_rept_spread= _di(csv_row, f'Other_Rept_Positions_Spread_{sfx}') + pos.tot_rept_long = _di(csv_row, f'Tot_Rept_Positions_Long_{sfx}') + pos.tot_rept_short = _di(csv_row, f'Tot_Rept_Positions_Short_{sfx}') + pos.nonrept_long = _di(csv_row, f'NonRept_Positions_Long_{sfx}') + pos.nonrept_short = _di(csv_row, f'NonRept_Positions_Short_{sfx}') + + if sfx == 'All': + pos.chg_open_interest = _di(csv_row, 'Change_in_Open_Interest_All') + pos.chg_prod_merc_long = _di(csv_row, 'Change_in_Prod_Merc_Long_All') + pos.chg_prod_merc_short = _di(csv_row, 'Change_in_Prod_Merc_Short_All') + pos.chg_swap_long = _di(csv_row, 'Change_in_Swap_Long_All') + pos.chg_swap_short = _di(csv_row, 'Change_in_Swap_Short_All') + pos.chg_swap_spread = _di(csv_row, 'Change_in_Swap_Spread_All') + pos.chg_m_money_long = _di(csv_row, 'Change_in_M_Money_Long_All') + pos.chg_m_money_short = _di(csv_row, 'Change_in_M_Money_Short_All') + pos.chg_m_money_spread = _di(csv_row, 'Change_in_M_Money_Spread_All') + pos.chg_other_rept_long = _di(csv_row, 'Change_in_Other_Rept_Long_All') + pos.chg_other_rept_short = _di(csv_row, 'Change_in_Other_Rept_Short_All') + pos.chg_other_rept_spread= _di(csv_row, 'Change_in_Other_Rept_Spread_All') + pos.chg_tot_rept_long = _di(csv_row, 'Change_in_Tot_Rept_Long_All') + pos.chg_tot_rept_short = _di(csv_row, 'Change_in_Tot_Rept_Short_All') + pos.chg_nonrept_long = _di(csv_row, 'Change_in_NonRept_Long_All') + pos.chg_nonrept_short = _di(csv_row, 'Change_in_NonRept_Short_All') + + pos.pct_open_interest = _df(csv_row, f'Pct_of_Open_Interest_{sfx}') + pos.pct_prod_merc_long = _df(csv_row, f'Pct_of_OI_Prod_Merc_Long_{sfx}') + pos.pct_prod_merc_short = _df(csv_row, f'Pct_of_OI_Prod_Merc_Short_{sfx}') + pos.pct_swap_long = _df(csv_row, f'Pct_of_OI_Swap_Long_{sfx}') + pos.pct_swap_short = _df(csv_row, f'Pct_of_OI_Swap_Short_{sfx}') + pos.pct_swap_spread = _df(csv_row, f'Pct_of_OI_Swap_Spread_{sfx}') + pos.pct_m_money_long = _df(csv_row, f'Pct_of_OI_M_Money_Long_{sfx}') + pos.pct_m_money_short = _df(csv_row, f'Pct_of_OI_M_Money_Short_{sfx}') + pos.pct_m_money_spread = _df(csv_row, f'Pct_of_OI_M_Money_Spread_{sfx}') + pos.pct_other_rept_long = _df(csv_row, f'Pct_of_OI_Other_Rept_Long_{sfx}') + pos.pct_other_rept_short = _df(csv_row, f'Pct_of_OI_Other_Rept_Short_{sfx}') + pos.pct_other_rept_spread= _df(csv_row, f'Pct_of_OI_Other_Rept_Spread_{sfx}') + pos.pct_tot_rept_long = _df(csv_row, f'Pct_of_OI_Tot_Rept_Long_{sfx}') + pos.pct_tot_rept_short = _df(csv_row, f'Pct_of_OI_Tot_Rept_Short_{sfx}') + pos.pct_nonrept_long = _df(csv_row, f'Pct_of_OI_NonRept_Long_{sfx}') + pos.pct_nonrept_short = _df(csv_row, f'Pct_of_OI_NonRept_Short_{sfx}') + + pos.traders_total = _di(csv_row, f'Traders_Tot_{sfx}') + pos.traders_prod_merc_long = _di(csv_row, f'Traders_Prod_Merc_Long_{sfx}') + pos.traders_prod_merc_short = _di(csv_row, f'Traders_Prod_Merc_Short_{sfx}') + pos.traders_swap_long = _di(csv_row, f'Traders_Swap_Long_{sfx}') + pos.traders_swap_short = _di(csv_row, f'Traders_Swap_Short_{sfx}') + pos.traders_swap_spread = _di(csv_row, f'Traders_Swap_Spread_{sfx}') + pos.traders_m_money_long = _di(csv_row, f'Traders_M_Money_Long_{sfx}') + pos.traders_m_money_short = _di(csv_row, f'Traders_M_Money_Short_{sfx}') + pos.traders_m_money_spread = _di(csv_row, f'Traders_M_Money_Spread_{sfx}') + pos.traders_other_rept_long = _di(csv_row, f'Traders_Other_Rept_Long_{sfx}') + pos.traders_other_rept_short = _di(csv_row, f'Traders_Other_Rept_Short_{sfx}') + pos.traders_other_rept_spread= _di(csv_row, f'Traders_Other_Rept_Spread_{sfx}') + pos.traders_tot_rept_long = _di(csv_row, f'Traders_Tot_Rept_Long_{sfx}') + pos.traders_tot_rept_short = _di(csv_row, f'Traders_Tot_Rept_Short_{sfx}') + return pos + + +def _parse_disagg_concentration(csv_row: dict, sfx: str) -> ConcentrationRow: + return ConcentrationRow( + conc_gross_long_4 =_df(csv_row, f'Conc_Gross_LE_4_TDR_Long_{sfx}'), + conc_gross_short_4=_df(csv_row, f'Conc_Gross_LE_4_TDR_Short_{sfx}'), + conc_gross_long_8 =_df(csv_row, f'Conc_Gross_LE_8_TDR_Long_{sfx}'), + conc_gross_short_8=_df(csv_row, f'Conc_Gross_LE_8_TDR_Short_{sfx}'), + conc_net_long_4 =_df(csv_row, f'Conc_Net_LE_4_TDR_Long_{sfx}'), + conc_net_short_4 =_df(csv_row, f'Conc_Net_LE_4_TDR_Short_{sfx}'), + conc_net_long_8 =_df(csv_row, f'Conc_Net_LE_8_TDR_Long_{sfx}'), + conc_net_short_8 =_df(csv_row, f'Conc_Net_LE_8_TDR_Short_{sfx}'), + ) + + +def _disagg_csv_row_to_block(csv_row: dict) -> Optional[DisaggBlock]: + full_name = csv_row.get('Market_and_Exchange_Names', '').strip() + report_date = csv_row.get('Report_Date_as_YYYY-MM-DD', '').strip() + cftc_code = csv_row.get('CFTC_Contract_Market_Code', '').strip() + contract_unit= csv_row.get('Contract_Units', '').strip() + + if not full_name or not report_date or not cftc_code: + return None + + if ' - ' in full_name: + name, exchange = full_name.split(' - ', 1) + else: + name, exchange = full_name, '' + name = name.strip() + exchange = exchange.strip() + exchange_abbr = EXCHANGE_ABBR.get(exchange.upper(), + exchange[:6].upper().replace(' ', '')) + + rows = [_parse_disagg_position_row(csv_row, sfx) for sfx in ('All', 'Old', 'Other')] + concentration = {sfx: _parse_disagg_concentration(csv_row, sfx) + for sfx in ('All', 'Old', 'Other')} + + return DisaggBlock( + cftc_code=cftc_code, + name=name, + exchange=exchange, + exchange_abbr=exchange_abbr, + contract_unit=contract_unit, + report_date=report_date, + rows=rows, + concentration=concentration, + ) + + +def parse_disagg_csv_text(text: str) -> Iterator[DisaggBlock]: + """Parse a CFTC Disaggregated COT CSV file (c_year.txt format).""" + import csv as _csv + reader = _csv.DictReader(text.splitlines()) + for row in reader: + block = _disagg_csv_row_to_block(row) + if block: + yield block + + +def parse_disagg_zip_file(zip_path: str) -> Iterator[DisaggBlock]: + """Parse a CFTC Disaggregated COT ZIP archive (com_disagg_txt_YYYY.zip).""" + with zipfile.ZipFile(zip_path) as zf: + txt_files = [n for n in zf.namelist() if n.lower().endswith('.txt')] + for fname in txt_files: + with zf.open(fname) as f: + text = f.read().decode('latin-1') + yield from parse_disagg_csv_text(text) diff --git a/docker-compose.yaml b/docker-compose.yaml index f87ec7a..5a32c09 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -10,6 +10,7 @@ services: - DB_PATH=/data/cot.db command: > sh -c "python3 -m app.ingestion.cli init-db && + python3 -m app.ingestion.cli import-disagg-history && uvicorn app.api.main:app --host 0.0.0.0 --port 8000" restart: unless-stopped healthcheck: diff --git a/schema.sql b/schema.sql index c7bdf1f..9c3c9d1 100644 --- a/schema.sql +++ b/schema.sql @@ -142,6 +142,141 @@ CREATE TABLE IF NOT EXISTS import_log ( error_message TEXT ); +-- ---------------------------------------------------------------- +-- disagg_reports: one row per (commodity x report_date), disaggregated COT +-- ---------------------------------------------------------------- +CREATE TABLE IF NOT EXISTS disagg_reports ( + id INTEGER PRIMARY KEY, + commodity_id INTEGER NOT NULL REFERENCES commodities(id), + report_date TEXT NOT NULL, + source_file TEXT, + imported_at TEXT DEFAULT (datetime('now')), + UNIQUE (commodity_id, report_date) +); + +CREATE INDEX IF NOT EXISTS idx_disagg_reports_date ON disagg_reports(report_date); +CREATE INDEX IF NOT EXISTS idx_disagg_reports_commodity ON disagg_reports(commodity_id); + +-- ---------------------------------------------------------------- +-- disagg_positions: disaggregated position data per (report x row_type) +-- row_type: 'All', 'Old', 'Other' +-- Includes positions, week-over-week changes, % of OI, and trader counts. +-- ---------------------------------------------------------------- +CREATE TABLE IF NOT EXISTS disagg_positions ( + id INTEGER PRIMARY KEY, + report_id INTEGER NOT NULL REFERENCES disagg_reports(id), + row_type TEXT NOT NULL CHECK (row_type IN ('All', 'Old', 'Other')), + + -- Open interest + open_interest INTEGER, + + -- Producer/Merchant/Processor/User + prod_merc_long INTEGER, + prod_merc_short INTEGER, + + -- Swap Dealers + swap_long INTEGER, + swap_short INTEGER, + swap_spread INTEGER, + + -- Managed Money + m_money_long INTEGER, + m_money_short INTEGER, + m_money_spread INTEGER, + + -- Other Reportable + other_rept_long INTEGER, + other_rept_short INTEGER, + other_rept_spread INTEGER, + + -- Total Reportable + tot_rept_long INTEGER, + tot_rept_short INTEGER, + + -- Non-Reportable + nonrept_long INTEGER, + nonrept_short INTEGER, + + -- Week-over-week changes (stored on All rows only) + chg_open_interest INTEGER, + chg_prod_merc_long INTEGER, + chg_prod_merc_short INTEGER, + chg_swap_long INTEGER, + chg_swap_short INTEGER, + chg_swap_spread INTEGER, + chg_m_money_long INTEGER, + chg_m_money_short INTEGER, + chg_m_money_spread INTEGER, + chg_other_rept_long INTEGER, + chg_other_rept_short INTEGER, + chg_other_rept_spread INTEGER, + chg_tot_rept_long INTEGER, + chg_tot_rept_short INTEGER, + chg_nonrept_long INTEGER, + chg_nonrept_short INTEGER, + + -- Percent of open interest + pct_open_interest REAL, + pct_prod_merc_long REAL, + pct_prod_merc_short REAL, + pct_swap_long REAL, + pct_swap_short REAL, + pct_swap_spread REAL, + pct_m_money_long REAL, + pct_m_money_short REAL, + pct_m_money_spread REAL, + pct_other_rept_long REAL, + pct_other_rept_short REAL, + pct_other_rept_spread REAL, + pct_tot_rept_long REAL, + pct_tot_rept_short REAL, + pct_nonrept_long REAL, + pct_nonrept_short REAL, + + -- Number of traders + traders_total INTEGER, + traders_prod_merc_long INTEGER, + traders_prod_merc_short INTEGER, + traders_swap_long INTEGER, + traders_swap_short INTEGER, + traders_swap_spread INTEGER, + traders_m_money_long INTEGER, + traders_m_money_short INTEGER, + traders_m_money_spread INTEGER, + traders_other_rept_long INTEGER, + traders_other_rept_short INTEGER, + traders_other_rept_spread INTEGER, + traders_tot_rept_long INTEGER, + traders_tot_rept_short INTEGER, + + UNIQUE (report_id, row_type) +); + +CREATE INDEX IF NOT EXISTS idx_disagg_positions_report ON disagg_positions(report_id); + +-- ---------------------------------------------------------------- +-- disagg_concentration: largest-trader concentration for disaggregated reports +-- row_type: 'All', 'Old', 'Other' +-- ---------------------------------------------------------------- +CREATE TABLE IF NOT EXISTS disagg_concentration ( + id INTEGER PRIMARY KEY, + report_id INTEGER NOT NULL REFERENCES disagg_reports(id), + row_type TEXT NOT NULL CHECK (row_type IN ('All', 'Old', 'Other')), + + conc_gross_long_4 REAL, + conc_gross_short_4 REAL, + conc_gross_long_8 REAL, + conc_gross_short_8 REAL, + conc_net_long_4 REAL, + conc_net_short_4 REAL, + conc_net_long_8 REAL, + conc_net_short_8 REAL, + + UNIQUE (report_id, row_type) +); + +CREATE INDEX IF NOT EXISTS idx_disagg_concentration_report ON disagg_concentration(report_id); + -- ---------------------------------------------------------------- -- v_net_positions: convenience view for common analytical queries -- ----------------------------------------------------------------