Add Disaggregated COT data support (2019–2026)

Integrates the CFTC Disaggregated Commitments of Traders reports
(com_disagg_txt_YYYY.zip) which break positions down by Producer/Merchant,
Swap Dealers, Managed Money, and Other Reportables — a different report
type from the existing legacy COT data.

- schema.sql: add disagg_reports, disagg_positions, disagg_concentration tables
- parser.py: add DisaggPositionRow/DisaggBlock dataclasses and
  parse_disagg_csv_text()/parse_disagg_zip_file() for c_year.txt format
- importer.py: add import_disagg_block(), import_disagg_zip_file(),
  run_disagg_historical_import() for 2019–2026 yearly ZIPs
- cli.py: add import-disagg-history subcommand
- docker-compose.yaml: run import-disagg-history on startup (idempotent
  via import_log, so re-deploys skip already-imported years)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Greg 2026-03-22 18:11:59 +01:00
parent 495e0c9314
commit 2c28ac3b0a
5 changed files with 600 additions and 0 deletions

View File

@ -63,6 +63,13 @@ def cmd_import_zip(args):
print(f"{result.rows_inserted} inserted, {result.rows_skipped} skipped") print(f"{result.rows_inserted} inserted, {result.rows_skipped} skipped")
def cmd_import_disagg_history(args):
from app.ingestion.importer import run_disagg_historical_import
print(f"Importing Disaggregated COT data {args.start_year}{args.end_year}...")
run_disagg_historical_import(start_year=args.start_year, end_year=args.end_year, verbose=True)
print("Done.")
def cmd_download_and_import(args): def cmd_download_and_import(args):
from app.ingestion.importer import download_and_import from app.ingestion.importer import download_and_import
result = download_and_import() result = download_and_import()
@ -123,6 +130,11 @@ def main():
p = sub.add_parser("import-zip", help="Import a single ZIP file") p = sub.add_parser("import-zip", help="Import a single ZIP file")
p.add_argument("file") p.add_argument("file")
p = sub.add_parser("import-disagg-history",
help="Download and import Disaggregated COT yearly ZIPs (20192026)")
p.add_argument("--start-year", type=int, default=2019)
p.add_argument("--end-year", type=int, default=2026)
sub.add_parser("download-and-import", help="Download latest weekly report and import it") sub.add_parser("download-and-import", help="Download latest weekly report and import it")
sub.add_parser("status", help="Show database statistics") sub.add_parser("status", help="Show database statistics")
@ -133,6 +145,7 @@ def main():
"import-history": cmd_import_history, "import-history": cmd_import_history,
"import-html": cmd_import_html, "import-html": cmd_import_html,
"import-zip": cmd_import_zip, "import-zip": cmd_import_zip,
"import-disagg-history": cmd_import_disagg_history,
"download-and-import": cmd_download_and_import, "download-and-import": cmd_download_and_import,
"status": cmd_status, "status": cmd_status,
} }

View File

@ -18,6 +18,7 @@ from app.ingestion.parser import (
CommodityBlock, CommodityBlock,
parse_html_file, parse_html_file,
parse_zip_file, parse_zip_file,
parse_disagg_zip_file,
) )
HEADERS = { HEADERS = {
@ -357,6 +358,216 @@ def run_historical_import(start_year: int = 1995, end_year: int = 2026,
f"{result.rows_skipped} skipped — {status}") f"{result.rows_skipped} skipped — {status}")
def import_disagg_block(conn: sqlite3.Connection, block, source: str) -> tuple[int, int]:
"""
Insert one DisaggBlock into the disaggregated tables.
Returns (rows_inserted, rows_skipped).
"""
commodity_id = _upsert_commodity(conn, block)
cur = conn.execute(
"""
INSERT OR IGNORE INTO disagg_reports
(commodity_id, report_date, source_file)
VALUES (?, ?, ?)
""",
(commodity_id, block.report_date, source),
)
if cur.rowcount == 0:
return 0, 1
report_id = cur.lastrowid
inserted = 0
for pos in block.rows:
sfx = pos.row_type
chg = (
pos.chg_open_interest,
pos.chg_prod_merc_long, pos.chg_prod_merc_short,
pos.chg_swap_long, pos.chg_swap_short, pos.chg_swap_spread,
pos.chg_m_money_long, pos.chg_m_money_short, pos.chg_m_money_spread,
pos.chg_other_rept_long, pos.chg_other_rept_short,pos.chg_other_rept_spread,
pos.chg_tot_rept_long, pos.chg_tot_rept_short,
pos.chg_nonrept_long, pos.chg_nonrept_short,
) if sfx == 'All' else (None,) * 16
conn.execute(
"""
INSERT OR IGNORE INTO disagg_positions (
report_id, row_type,
open_interest,
prod_merc_long, prod_merc_short,
swap_long, swap_short, swap_spread,
m_money_long, m_money_short, m_money_spread,
other_rept_long, other_rept_short, other_rept_spread,
tot_rept_long, tot_rept_short,
nonrept_long, nonrept_short,
chg_open_interest,
chg_prod_merc_long, chg_prod_merc_short,
chg_swap_long, chg_swap_short, chg_swap_spread,
chg_m_money_long, chg_m_money_short, chg_m_money_spread,
chg_other_rept_long, chg_other_rept_short, chg_other_rept_spread,
chg_tot_rept_long, chg_tot_rept_short,
chg_nonrept_long, chg_nonrept_short,
pct_open_interest,
pct_prod_merc_long, pct_prod_merc_short,
pct_swap_long, pct_swap_short, pct_swap_spread,
pct_m_money_long, pct_m_money_short, pct_m_money_spread,
pct_other_rept_long, pct_other_rept_short, pct_other_rept_spread,
pct_tot_rept_long, pct_tot_rept_short,
pct_nonrept_long, pct_nonrept_short,
traders_total,
traders_prod_merc_long, traders_prod_merc_short,
traders_swap_long, traders_swap_short, traders_swap_spread,
traders_m_money_long, traders_m_money_short, traders_m_money_spread,
traders_other_rept_long, traders_other_rept_short, traders_other_rept_spread,
traders_tot_rept_long, traders_tot_rept_short
) VALUES (
?, ?,
?,
?, ?,
?, ?, ?,
?, ?, ?,
?, ?, ?,
?, ?,
?, ?,
?,
?, ?,
?, ?, ?,
?, ?, ?,
?, ?, ?,
?, ?,
?, ?,
?,
?, ?,
?, ?, ?,
?, ?, ?,
?, ?, ?,
?, ?,
?, ?,
?,
?, ?,
?, ?, ?,
?, ?, ?,
?, ?, ?,
?, ?
)
""",
(
report_id, sfx,
pos.open_interest,
pos.prod_merc_long, pos.prod_merc_short,
pos.swap_long, pos.swap_short, pos.swap_spread,
pos.m_money_long, pos.m_money_short, pos.m_money_spread,
pos.other_rept_long, pos.other_rept_short,pos.other_rept_spread,
pos.tot_rept_long, pos.tot_rept_short,
pos.nonrept_long, pos.nonrept_short,
*chg,
pos.pct_open_interest,
pos.pct_prod_merc_long, pos.pct_prod_merc_short,
pos.pct_swap_long, pos.pct_swap_short, pos.pct_swap_spread,
pos.pct_m_money_long, pos.pct_m_money_short, pos.pct_m_money_spread,
pos.pct_other_rept_long, pos.pct_other_rept_short,pos.pct_other_rept_spread,
pos.pct_tot_rept_long, pos.pct_tot_rept_short,
pos.pct_nonrept_long, pos.pct_nonrept_short,
pos.traders_total,
pos.traders_prod_merc_long, pos.traders_prod_merc_short,
pos.traders_swap_long, pos.traders_swap_short, pos.traders_swap_spread,
pos.traders_m_money_long, pos.traders_m_money_short, pos.traders_m_money_spread,
pos.traders_other_rept_long, pos.traders_other_rept_short, pos.traders_other_rept_spread,
pos.traders_tot_rept_long, pos.traders_tot_rept_short,
),
)
inserted += 1
conc = block.concentration.get(sfx)
if conc:
conn.execute(
"""
INSERT OR IGNORE INTO disagg_concentration (
report_id, row_type,
conc_gross_long_4, conc_gross_short_4,
conc_gross_long_8, conc_gross_short_8,
conc_net_long_4, conc_net_short_4,
conc_net_long_8, conc_net_short_8
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
report_id, sfx,
conc.conc_gross_long_4, conc.conc_gross_short_4,
conc.conc_gross_long_8, conc.conc_gross_short_8,
conc.conc_net_long_4, conc.conc_net_short_4,
conc.conc_net_long_8, conc.conc_net_short_8,
),
)
return inserted, 0
def import_disagg_zip_file(zip_path: str, source_label: Optional[str] = None) -> ImportResult:
"""Import a Disaggregated COT ZIP file."""
source = source_label or Path(zip_path).name
result = ImportResult(source=source)
try:
with get_db() as conn:
for block in parse_disagg_zip_file(zip_path):
ins, skp = import_disagg_block(conn, block, source)
result.rows_inserted += ins
result.rows_skipped += skp
conn.commit()
except Exception as e:
result.error = str(e)
return result
def run_disagg_historical_import(start_year: int = 2019, end_year: int = 2026,
verbose: bool = True) -> None:
"""
Download and import the Disaggregated COT yearly ZIP files.
Uses import_log to skip already-completed sources.
"""
for year in range(start_year, end_year + 1):
label = f"com_disagg_txt_{year}.zip"
url = f"{HISTORICAL_BASE}/com_disagg_txt_{year}.zip"
with get_db() as conn:
if _already_imported(conn, label):
if verbose:
print(f" [skip] {label} (already imported)")
continue
if verbose:
print(f" [download] {label} ...")
with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as tmp:
tmp_path = Path(tmp.name)
if not _download_zip(url, tmp_path):
if verbose:
print(f" [error] Failed to download {url}")
with get_db() as conn:
_log_start(conn, label, 'disagg_annual_zip')
_log_done(conn, label, 0, 0, f"Download failed: {url}")
tmp_path.unlink(missing_ok=True)
continue
if verbose:
print(f" [import] {label} ...")
with get_db() as conn:
_log_start(conn, label, 'disagg_annual_zip')
result = import_disagg_zip_file(str(tmp_path), source_label=label)
tmp_path.unlink(missing_ok=True)
with get_db() as conn:
_log_done(conn, label, result.rows_inserted, result.rows_skipped, result.error)
if verbose:
status = "ERROR: " + result.error if result.error else "OK"
print(f" [done] {label}: {result.rows_inserted} inserted, "
f"{result.rows_skipped} skipped — {status}")
def download_and_import() -> ImportResult: def download_and_import() -> ImportResult:
""" """
Download the current weekly report and import it. Download the current weekly report and import it.

View File

@ -616,3 +616,243 @@ def parse_csv_text(text: str) -> Iterator[CommodityBlock]:
block = _csv_row_to_block(row) block = _csv_row_to_block(row)
if block: if block:
yield block yield block
# ── Disaggregated COT format (com_disagg_txt_YYYY.zip / c_year.txt) ─────────
@dataclass
class DisaggPositionRow:
row_type: str
open_interest: Optional[int] = None
prod_merc_long: Optional[int] = None
prod_merc_short: Optional[int] = None
swap_long: Optional[int] = None
swap_short: Optional[int] = None
swap_spread: Optional[int] = None
m_money_long: Optional[int] = None
m_money_short: Optional[int] = None
m_money_spread: Optional[int] = None
other_rept_long: Optional[int] = None
other_rept_short: Optional[int] = None
other_rept_spread: Optional[int] = None
tot_rept_long: Optional[int] = None
tot_rept_short: Optional[int] = None
nonrept_long: Optional[int] = None
nonrept_short: Optional[int] = None
chg_open_interest: Optional[int] = None
chg_prod_merc_long: Optional[int] = None
chg_prod_merc_short: Optional[int] = None
chg_swap_long: Optional[int] = None
chg_swap_short: Optional[int] = None
chg_swap_spread: Optional[int] = None
chg_m_money_long: Optional[int] = None
chg_m_money_short: Optional[int] = None
chg_m_money_spread: Optional[int] = None
chg_other_rept_long: Optional[int] = None
chg_other_rept_short: Optional[int] = None
chg_other_rept_spread: Optional[int] = None
chg_tot_rept_long: Optional[int] = None
chg_tot_rept_short: Optional[int] = None
chg_nonrept_long: Optional[int] = None
chg_nonrept_short: Optional[int] = None
pct_open_interest: Optional[float] = None
pct_prod_merc_long: Optional[float] = None
pct_prod_merc_short: Optional[float] = None
pct_swap_long: Optional[float] = None
pct_swap_short: Optional[float] = None
pct_swap_spread: Optional[float] = None
pct_m_money_long: Optional[float] = None
pct_m_money_short: Optional[float] = None
pct_m_money_spread: Optional[float] = None
pct_other_rept_long: Optional[float] = None
pct_other_rept_short: Optional[float] = None
pct_other_rept_spread: Optional[float] = None
pct_tot_rept_long: Optional[float] = None
pct_tot_rept_short: Optional[float] = None
pct_nonrept_long: Optional[float] = None
pct_nonrept_short: Optional[float] = None
traders_total: Optional[int] = None
traders_prod_merc_long: Optional[int] = None
traders_prod_merc_short: Optional[int] = None
traders_swap_long: Optional[int] = None
traders_swap_short: Optional[int] = None
traders_swap_spread: Optional[int] = None
traders_m_money_long: Optional[int] = None
traders_m_money_short: Optional[int] = None
traders_m_money_spread: Optional[int] = None
traders_other_rept_long: Optional[int] = None
traders_other_rept_short: Optional[int] = None
traders_other_rept_spread: Optional[int] = None
traders_tot_rept_long: Optional[int] = None
traders_tot_rept_short: Optional[int] = None
@dataclass
class DisaggBlock:
cftc_code: str
name: str
exchange: str
exchange_abbr: str
contract_unit: str
report_date: str
rows: list # list of DisaggPositionRow (one per row_type)
concentration: dict # row_type -> ConcentrationRow
def _di(row: dict, col: str) -> Optional[int]:
v = row.get(col, '').strip()
if not v:
return None
try:
return int(float(v))
except ValueError:
return None
def _df(row: dict, col: str) -> Optional[float]:
v = row.get(col, '').strip()
if not v:
return None
try:
return float(v)
except ValueError:
return None
def _parse_disagg_position_row(csv_row: dict, sfx: str) -> DisaggPositionRow:
pos = DisaggPositionRow(row_type=sfx)
pos.open_interest = _di(csv_row, f'Open_Interest_{sfx}')
pos.prod_merc_long = _di(csv_row, f'Prod_Merc_Positions_Long_{sfx}')
pos.prod_merc_short = _di(csv_row, f'Prod_Merc_Positions_Short_{sfx}')
# NB: CFTC uses double underscore for Swap Short/Spread columns
pos.swap_long = _di(csv_row, f'Swap_Positions_Long_{sfx}')
pos.swap_short = _di(csv_row, f'Swap__Positions_Short_{sfx}')
pos.swap_spread = _di(csv_row, f'Swap__Positions_Spread_{sfx}')
pos.m_money_long = _di(csv_row, f'M_Money_Positions_Long_{sfx}')
pos.m_money_short = _di(csv_row, f'M_Money_Positions_Short_{sfx}')
pos.m_money_spread = _di(csv_row, f'M_Money_Positions_Spread_{sfx}')
pos.other_rept_long = _di(csv_row, f'Other_Rept_Positions_Long_{sfx}')
pos.other_rept_short = _di(csv_row, f'Other_Rept_Positions_Short_{sfx}')
pos.other_rept_spread= _di(csv_row, f'Other_Rept_Positions_Spread_{sfx}')
pos.tot_rept_long = _di(csv_row, f'Tot_Rept_Positions_Long_{sfx}')
pos.tot_rept_short = _di(csv_row, f'Tot_Rept_Positions_Short_{sfx}')
pos.nonrept_long = _di(csv_row, f'NonRept_Positions_Long_{sfx}')
pos.nonrept_short = _di(csv_row, f'NonRept_Positions_Short_{sfx}')
if sfx == 'All':
pos.chg_open_interest = _di(csv_row, 'Change_in_Open_Interest_All')
pos.chg_prod_merc_long = _di(csv_row, 'Change_in_Prod_Merc_Long_All')
pos.chg_prod_merc_short = _di(csv_row, 'Change_in_Prod_Merc_Short_All')
pos.chg_swap_long = _di(csv_row, 'Change_in_Swap_Long_All')
pos.chg_swap_short = _di(csv_row, 'Change_in_Swap_Short_All')
pos.chg_swap_spread = _di(csv_row, 'Change_in_Swap_Spread_All')
pos.chg_m_money_long = _di(csv_row, 'Change_in_M_Money_Long_All')
pos.chg_m_money_short = _di(csv_row, 'Change_in_M_Money_Short_All')
pos.chg_m_money_spread = _di(csv_row, 'Change_in_M_Money_Spread_All')
pos.chg_other_rept_long = _di(csv_row, 'Change_in_Other_Rept_Long_All')
pos.chg_other_rept_short = _di(csv_row, 'Change_in_Other_Rept_Short_All')
pos.chg_other_rept_spread= _di(csv_row, 'Change_in_Other_Rept_Spread_All')
pos.chg_tot_rept_long = _di(csv_row, 'Change_in_Tot_Rept_Long_All')
pos.chg_tot_rept_short = _di(csv_row, 'Change_in_Tot_Rept_Short_All')
pos.chg_nonrept_long = _di(csv_row, 'Change_in_NonRept_Long_All')
pos.chg_nonrept_short = _di(csv_row, 'Change_in_NonRept_Short_All')
pos.pct_open_interest = _df(csv_row, f'Pct_of_Open_Interest_{sfx}')
pos.pct_prod_merc_long = _df(csv_row, f'Pct_of_OI_Prod_Merc_Long_{sfx}')
pos.pct_prod_merc_short = _df(csv_row, f'Pct_of_OI_Prod_Merc_Short_{sfx}')
pos.pct_swap_long = _df(csv_row, f'Pct_of_OI_Swap_Long_{sfx}')
pos.pct_swap_short = _df(csv_row, f'Pct_of_OI_Swap_Short_{sfx}')
pos.pct_swap_spread = _df(csv_row, f'Pct_of_OI_Swap_Spread_{sfx}')
pos.pct_m_money_long = _df(csv_row, f'Pct_of_OI_M_Money_Long_{sfx}')
pos.pct_m_money_short = _df(csv_row, f'Pct_of_OI_M_Money_Short_{sfx}')
pos.pct_m_money_spread = _df(csv_row, f'Pct_of_OI_M_Money_Spread_{sfx}')
pos.pct_other_rept_long = _df(csv_row, f'Pct_of_OI_Other_Rept_Long_{sfx}')
pos.pct_other_rept_short = _df(csv_row, f'Pct_of_OI_Other_Rept_Short_{sfx}')
pos.pct_other_rept_spread= _df(csv_row, f'Pct_of_OI_Other_Rept_Spread_{sfx}')
pos.pct_tot_rept_long = _df(csv_row, f'Pct_of_OI_Tot_Rept_Long_{sfx}')
pos.pct_tot_rept_short = _df(csv_row, f'Pct_of_OI_Tot_Rept_Short_{sfx}')
pos.pct_nonrept_long = _df(csv_row, f'Pct_of_OI_NonRept_Long_{sfx}')
pos.pct_nonrept_short = _df(csv_row, f'Pct_of_OI_NonRept_Short_{sfx}')
pos.traders_total = _di(csv_row, f'Traders_Tot_{sfx}')
pos.traders_prod_merc_long = _di(csv_row, f'Traders_Prod_Merc_Long_{sfx}')
pos.traders_prod_merc_short = _di(csv_row, f'Traders_Prod_Merc_Short_{sfx}')
pos.traders_swap_long = _di(csv_row, f'Traders_Swap_Long_{sfx}')
pos.traders_swap_short = _di(csv_row, f'Traders_Swap_Short_{sfx}')
pos.traders_swap_spread = _di(csv_row, f'Traders_Swap_Spread_{sfx}')
pos.traders_m_money_long = _di(csv_row, f'Traders_M_Money_Long_{sfx}')
pos.traders_m_money_short = _di(csv_row, f'Traders_M_Money_Short_{sfx}')
pos.traders_m_money_spread = _di(csv_row, f'Traders_M_Money_Spread_{sfx}')
pos.traders_other_rept_long = _di(csv_row, f'Traders_Other_Rept_Long_{sfx}')
pos.traders_other_rept_short = _di(csv_row, f'Traders_Other_Rept_Short_{sfx}')
pos.traders_other_rept_spread= _di(csv_row, f'Traders_Other_Rept_Spread_{sfx}')
pos.traders_tot_rept_long = _di(csv_row, f'Traders_Tot_Rept_Long_{sfx}')
pos.traders_tot_rept_short = _di(csv_row, f'Traders_Tot_Rept_Short_{sfx}')
return pos
def _parse_disagg_concentration(csv_row: dict, sfx: str) -> ConcentrationRow:
return ConcentrationRow(
conc_gross_long_4 =_df(csv_row, f'Conc_Gross_LE_4_TDR_Long_{sfx}'),
conc_gross_short_4=_df(csv_row, f'Conc_Gross_LE_4_TDR_Short_{sfx}'),
conc_gross_long_8 =_df(csv_row, f'Conc_Gross_LE_8_TDR_Long_{sfx}'),
conc_gross_short_8=_df(csv_row, f'Conc_Gross_LE_8_TDR_Short_{sfx}'),
conc_net_long_4 =_df(csv_row, f'Conc_Net_LE_4_TDR_Long_{sfx}'),
conc_net_short_4 =_df(csv_row, f'Conc_Net_LE_4_TDR_Short_{sfx}'),
conc_net_long_8 =_df(csv_row, f'Conc_Net_LE_8_TDR_Long_{sfx}'),
conc_net_short_8 =_df(csv_row, f'Conc_Net_LE_8_TDR_Short_{sfx}'),
)
def _disagg_csv_row_to_block(csv_row: dict) -> Optional[DisaggBlock]:
full_name = csv_row.get('Market_and_Exchange_Names', '').strip()
report_date = csv_row.get('Report_Date_as_YYYY-MM-DD', '').strip()
cftc_code = csv_row.get('CFTC_Contract_Market_Code', '').strip()
contract_unit= csv_row.get('Contract_Units', '').strip()
if not full_name or not report_date or not cftc_code:
return None
if ' - ' in full_name:
name, exchange = full_name.split(' - ', 1)
else:
name, exchange = full_name, ''
name = name.strip()
exchange = exchange.strip()
exchange_abbr = EXCHANGE_ABBR.get(exchange.upper(),
exchange[:6].upper().replace(' ', ''))
rows = [_parse_disagg_position_row(csv_row, sfx) for sfx in ('All', 'Old', 'Other')]
concentration = {sfx: _parse_disagg_concentration(csv_row, sfx)
for sfx in ('All', 'Old', 'Other')}
return DisaggBlock(
cftc_code=cftc_code,
name=name,
exchange=exchange,
exchange_abbr=exchange_abbr,
contract_unit=contract_unit,
report_date=report_date,
rows=rows,
concentration=concentration,
)
def parse_disagg_csv_text(text: str) -> Iterator[DisaggBlock]:
"""Parse a CFTC Disaggregated COT CSV file (c_year.txt format)."""
import csv as _csv
reader = _csv.DictReader(text.splitlines())
for row in reader:
block = _disagg_csv_row_to_block(row)
if block:
yield block
def parse_disagg_zip_file(zip_path: str) -> Iterator[DisaggBlock]:
"""Parse a CFTC Disaggregated COT ZIP archive (com_disagg_txt_YYYY.zip)."""
with zipfile.ZipFile(zip_path) as zf:
txt_files = [n for n in zf.namelist() if n.lower().endswith('.txt')]
for fname in txt_files:
with zf.open(fname) as f:
text = f.read().decode('latin-1')
yield from parse_disagg_csv_text(text)

View File

@ -10,6 +10,7 @@ services:
- DB_PATH=/data/cot.db - DB_PATH=/data/cot.db
command: > command: >
sh -c "python3 -m app.ingestion.cli init-db && sh -c "python3 -m app.ingestion.cli init-db &&
python3 -m app.ingestion.cli import-disagg-history &&
uvicorn app.api.main:app --host 0.0.0.0 --port 8000" uvicorn app.api.main:app --host 0.0.0.0 --port 8000"
restart: unless-stopped restart: unless-stopped
healthcheck: healthcheck:

View File

@ -142,6 +142,141 @@ CREATE TABLE IF NOT EXISTS import_log (
error_message TEXT error_message TEXT
); );
-- ----------------------------------------------------------------
-- disagg_reports: one row per (commodity x report_date), disaggregated COT
-- ----------------------------------------------------------------
CREATE TABLE IF NOT EXISTS disagg_reports (
id INTEGER PRIMARY KEY,
commodity_id INTEGER NOT NULL REFERENCES commodities(id),
report_date TEXT NOT NULL,
source_file TEXT,
imported_at TEXT DEFAULT (datetime('now')),
UNIQUE (commodity_id, report_date)
);
CREATE INDEX IF NOT EXISTS idx_disagg_reports_date ON disagg_reports(report_date);
CREATE INDEX IF NOT EXISTS idx_disagg_reports_commodity ON disagg_reports(commodity_id);
-- ----------------------------------------------------------------
-- disagg_positions: disaggregated position data per (report x row_type)
-- row_type: 'All', 'Old', 'Other'
-- Includes positions, week-over-week changes, % of OI, and trader counts.
-- ----------------------------------------------------------------
CREATE TABLE IF NOT EXISTS disagg_positions (
id INTEGER PRIMARY KEY,
report_id INTEGER NOT NULL REFERENCES disagg_reports(id),
row_type TEXT NOT NULL CHECK (row_type IN ('All', 'Old', 'Other')),
-- Open interest
open_interest INTEGER,
-- Producer/Merchant/Processor/User
prod_merc_long INTEGER,
prod_merc_short INTEGER,
-- Swap Dealers
swap_long INTEGER,
swap_short INTEGER,
swap_spread INTEGER,
-- Managed Money
m_money_long INTEGER,
m_money_short INTEGER,
m_money_spread INTEGER,
-- Other Reportable
other_rept_long INTEGER,
other_rept_short INTEGER,
other_rept_spread INTEGER,
-- Total Reportable
tot_rept_long INTEGER,
tot_rept_short INTEGER,
-- Non-Reportable
nonrept_long INTEGER,
nonrept_short INTEGER,
-- Week-over-week changes (stored on All rows only)
chg_open_interest INTEGER,
chg_prod_merc_long INTEGER,
chg_prod_merc_short INTEGER,
chg_swap_long INTEGER,
chg_swap_short INTEGER,
chg_swap_spread INTEGER,
chg_m_money_long INTEGER,
chg_m_money_short INTEGER,
chg_m_money_spread INTEGER,
chg_other_rept_long INTEGER,
chg_other_rept_short INTEGER,
chg_other_rept_spread INTEGER,
chg_tot_rept_long INTEGER,
chg_tot_rept_short INTEGER,
chg_nonrept_long INTEGER,
chg_nonrept_short INTEGER,
-- Percent of open interest
pct_open_interest REAL,
pct_prod_merc_long REAL,
pct_prod_merc_short REAL,
pct_swap_long REAL,
pct_swap_short REAL,
pct_swap_spread REAL,
pct_m_money_long REAL,
pct_m_money_short REAL,
pct_m_money_spread REAL,
pct_other_rept_long REAL,
pct_other_rept_short REAL,
pct_other_rept_spread REAL,
pct_tot_rept_long REAL,
pct_tot_rept_short REAL,
pct_nonrept_long REAL,
pct_nonrept_short REAL,
-- Number of traders
traders_total INTEGER,
traders_prod_merc_long INTEGER,
traders_prod_merc_short INTEGER,
traders_swap_long INTEGER,
traders_swap_short INTEGER,
traders_swap_spread INTEGER,
traders_m_money_long INTEGER,
traders_m_money_short INTEGER,
traders_m_money_spread INTEGER,
traders_other_rept_long INTEGER,
traders_other_rept_short INTEGER,
traders_other_rept_spread INTEGER,
traders_tot_rept_long INTEGER,
traders_tot_rept_short INTEGER,
UNIQUE (report_id, row_type)
);
CREATE INDEX IF NOT EXISTS idx_disagg_positions_report ON disagg_positions(report_id);
-- ----------------------------------------------------------------
-- disagg_concentration: largest-trader concentration for disaggregated reports
-- row_type: 'All', 'Old', 'Other'
-- ----------------------------------------------------------------
CREATE TABLE IF NOT EXISTS disagg_concentration (
id INTEGER PRIMARY KEY,
report_id INTEGER NOT NULL REFERENCES disagg_reports(id),
row_type TEXT NOT NULL CHECK (row_type IN ('All', 'Old', 'Other')),
conc_gross_long_4 REAL,
conc_gross_short_4 REAL,
conc_gross_long_8 REAL,
conc_gross_short_8 REAL,
conc_net_long_4 REAL,
conc_net_short_4 REAL,
conc_net_long_8 REAL,
conc_net_short_8 REAL,
UNIQUE (report_id, row_type)
);
CREATE INDEX IF NOT EXISTS idx_disagg_concentration_report ON disagg_concentration(report_id);
-- ---------------------------------------------------------------- -- ----------------------------------------------------------------
-- v_net_positions: convenience view for common analytical queries -- v_net_positions: convenience view for common analytical queries
-- ---------------------------------------------------------------- -- ----------------------------------------------------------------