COTexplorer/schema.sql
Greg 2c28ac3b0a Add Disaggregated COT data support (2019–2026)
Integrates the CFTC Disaggregated Commitments of Traders reports
(com_disagg_txt_YYYY.zip) which break positions down by Producer/Merchant,
Swap Dealers, Managed Money, and Other Reportables — a different report
type from the existing legacy COT data.

- schema.sql: add disagg_reports, disagg_positions, disagg_concentration tables
- parser.py: add DisaggPositionRow/DisaggBlock dataclasses and
  parse_disagg_csv_text()/parse_disagg_zip_file() for c_year.txt format
- importer.py: add import_disagg_block(), import_disagg_zip_file(),
  run_disagg_historical_import() for 2019–2026 yearly ZIPs
- cli.py: add import-disagg-history subcommand
- docker-compose.yaml: run import-disagg-history on startup (idempotent
  via import_log, so re-deploys skip already-imported years)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-22 18:11:59 +01:00

319 lines
11 KiB
SQL

PRAGMA journal_mode = WAL;
PRAGMA foreign_keys = ON;
-- ----------------------------------------------------------------
-- commodities: one row per unique market (stable reference table)
-- ----------------------------------------------------------------
CREATE TABLE IF NOT EXISTS commodities (
id INTEGER PRIMARY KEY,
cftc_code TEXT NOT NULL UNIQUE,
name TEXT NOT NULL,
exchange TEXT NOT NULL,
exchange_abbr TEXT NOT NULL,
contract_unit TEXT,
created_at TEXT DEFAULT (datetime('now'))
);
CREATE INDEX IF NOT EXISTS idx_commodities_name ON commodities(name);
CREATE INDEX IF NOT EXISTS idx_commodities_exchange ON commodities(exchange_abbr);
-- ----------------------------------------------------------------
-- reports: one row per (commodity x report_date)
-- ----------------------------------------------------------------
CREATE TABLE IF NOT EXISTS reports (
id INTEGER PRIMARY KEY,
commodity_id INTEGER NOT NULL REFERENCES commodities(id),
report_date TEXT NOT NULL,
prev_report_date TEXT,
source_file TEXT,
imported_at TEXT DEFAULT (datetime('now')),
UNIQUE (commodity_id, report_date)
);
CREATE INDEX IF NOT EXISTS idx_reports_date ON reports(report_date);
CREATE INDEX IF NOT EXISTS idx_reports_commodity ON reports(commodity_id);
-- ----------------------------------------------------------------
-- positions: core position data, one row per (report x row_type)
-- row_type: 'All', 'Old', 'Other'
-- ----------------------------------------------------------------
CREATE TABLE IF NOT EXISTS positions (
id INTEGER PRIMARY KEY,
report_id INTEGER NOT NULL REFERENCES reports(id),
row_type TEXT NOT NULL CHECK (row_type IN ('All', 'Old', 'Other')),
-- Open interest
open_interest INTEGER,
-- Non-commercial
noncomm_long INTEGER,
noncomm_short INTEGER,
noncomm_spreading INTEGER,
-- Commercial
comm_long INTEGER,
comm_short INTEGER,
-- Total reportable
total_long INTEGER,
total_short INTEGER,
-- Nonreportable (small traders)
nonrept_long INTEGER,
nonrept_short INTEGER,
-- Week-over-week changes (stored on All rows only)
chg_open_interest INTEGER,
chg_noncomm_long INTEGER,
chg_noncomm_short INTEGER,
chg_noncomm_spreading INTEGER,
chg_comm_long INTEGER,
chg_comm_short INTEGER,
chg_total_long INTEGER,
chg_total_short INTEGER,
chg_nonrept_long INTEGER,
chg_nonrept_short INTEGER,
-- Percent of open interest
pct_open_interest REAL,
pct_noncomm_long REAL,
pct_noncomm_short REAL,
pct_noncomm_spreading REAL,
pct_comm_long REAL,
pct_comm_short REAL,
pct_total_long REAL,
pct_total_short REAL,
pct_nonrept_long REAL,
pct_nonrept_short REAL,
-- Number of traders
traders_total INTEGER,
traders_noncomm_long INTEGER,
traders_noncomm_short INTEGER,
traders_noncomm_spread INTEGER,
traders_comm_long INTEGER,
traders_comm_short INTEGER,
traders_total_long INTEGER,
traders_total_short INTEGER,
UNIQUE (report_id, row_type)
);
CREATE INDEX IF NOT EXISTS idx_positions_report ON positions(report_id);
-- ----------------------------------------------------------------
-- concentration: largest-trader data (separate -- less-queried)
-- row_type: 'All', 'Old', 'Other'
-- ----------------------------------------------------------------
CREATE TABLE IF NOT EXISTS concentration (
id INTEGER PRIMARY KEY,
report_id INTEGER NOT NULL REFERENCES reports(id),
row_type TEXT NOT NULL CHECK (row_type IN ('All', 'Old', 'Other')),
-- By Gross Position
conc_gross_long_4 REAL,
conc_gross_short_4 REAL,
conc_gross_long_8 REAL,
conc_gross_short_8 REAL,
-- By Net Position
conc_net_long_4 REAL,
conc_net_short_4 REAL,
conc_net_long_8 REAL,
conc_net_short_8 REAL,
UNIQUE (report_id, row_type)
);
CREATE INDEX IF NOT EXISTS idx_concentration_report ON concentration(report_id);
-- ----------------------------------------------------------------
-- import_log: tracks which source files have been processed
-- ----------------------------------------------------------------
CREATE TABLE IF NOT EXISTS import_log (
id INTEGER PRIMARY KEY,
source TEXT NOT NULL UNIQUE,
source_type TEXT NOT NULL,
rows_inserted INTEGER DEFAULT 0,
rows_skipped INTEGER DEFAULT 0,
started_at TEXT,
completed_at TEXT,
status TEXT DEFAULT 'pending',
error_message TEXT
);
-- ----------------------------------------------------------------
-- disagg_reports: one row per (commodity x report_date), disaggregated COT
-- ----------------------------------------------------------------
CREATE TABLE IF NOT EXISTS disagg_reports (
id INTEGER PRIMARY KEY,
commodity_id INTEGER NOT NULL REFERENCES commodities(id),
report_date TEXT NOT NULL,
source_file TEXT,
imported_at TEXT DEFAULT (datetime('now')),
UNIQUE (commodity_id, report_date)
);
CREATE INDEX IF NOT EXISTS idx_disagg_reports_date ON disagg_reports(report_date);
CREATE INDEX IF NOT EXISTS idx_disagg_reports_commodity ON disagg_reports(commodity_id);
-- ----------------------------------------------------------------
-- disagg_positions: disaggregated position data per (report x row_type)
-- row_type: 'All', 'Old', 'Other'
-- Includes positions, week-over-week changes, % of OI, and trader counts.
-- ----------------------------------------------------------------
CREATE TABLE IF NOT EXISTS disagg_positions (
id INTEGER PRIMARY KEY,
report_id INTEGER NOT NULL REFERENCES disagg_reports(id),
row_type TEXT NOT NULL CHECK (row_type IN ('All', 'Old', 'Other')),
-- Open interest
open_interest INTEGER,
-- Producer/Merchant/Processor/User
prod_merc_long INTEGER,
prod_merc_short INTEGER,
-- Swap Dealers
swap_long INTEGER,
swap_short INTEGER,
swap_spread INTEGER,
-- Managed Money
m_money_long INTEGER,
m_money_short INTEGER,
m_money_spread INTEGER,
-- Other Reportable
other_rept_long INTEGER,
other_rept_short INTEGER,
other_rept_spread INTEGER,
-- Total Reportable
tot_rept_long INTEGER,
tot_rept_short INTEGER,
-- Non-Reportable
nonrept_long INTEGER,
nonrept_short INTEGER,
-- Week-over-week changes (stored on All rows only)
chg_open_interest INTEGER,
chg_prod_merc_long INTEGER,
chg_prod_merc_short INTEGER,
chg_swap_long INTEGER,
chg_swap_short INTEGER,
chg_swap_spread INTEGER,
chg_m_money_long INTEGER,
chg_m_money_short INTEGER,
chg_m_money_spread INTEGER,
chg_other_rept_long INTEGER,
chg_other_rept_short INTEGER,
chg_other_rept_spread INTEGER,
chg_tot_rept_long INTEGER,
chg_tot_rept_short INTEGER,
chg_nonrept_long INTEGER,
chg_nonrept_short INTEGER,
-- Percent of open interest
pct_open_interest REAL,
pct_prod_merc_long REAL,
pct_prod_merc_short REAL,
pct_swap_long REAL,
pct_swap_short REAL,
pct_swap_spread REAL,
pct_m_money_long REAL,
pct_m_money_short REAL,
pct_m_money_spread REAL,
pct_other_rept_long REAL,
pct_other_rept_short REAL,
pct_other_rept_spread REAL,
pct_tot_rept_long REAL,
pct_tot_rept_short REAL,
pct_nonrept_long REAL,
pct_nonrept_short REAL,
-- Number of traders
traders_total INTEGER,
traders_prod_merc_long INTEGER,
traders_prod_merc_short INTEGER,
traders_swap_long INTEGER,
traders_swap_short INTEGER,
traders_swap_spread INTEGER,
traders_m_money_long INTEGER,
traders_m_money_short INTEGER,
traders_m_money_spread INTEGER,
traders_other_rept_long INTEGER,
traders_other_rept_short INTEGER,
traders_other_rept_spread INTEGER,
traders_tot_rept_long INTEGER,
traders_tot_rept_short INTEGER,
UNIQUE (report_id, row_type)
);
CREATE INDEX IF NOT EXISTS idx_disagg_positions_report ON disagg_positions(report_id);
-- ----------------------------------------------------------------
-- disagg_concentration: largest-trader concentration for disaggregated reports
-- row_type: 'All', 'Old', 'Other'
-- ----------------------------------------------------------------
CREATE TABLE IF NOT EXISTS disagg_concentration (
id INTEGER PRIMARY KEY,
report_id INTEGER NOT NULL REFERENCES disagg_reports(id),
row_type TEXT NOT NULL CHECK (row_type IN ('All', 'Old', 'Other')),
conc_gross_long_4 REAL,
conc_gross_short_4 REAL,
conc_gross_long_8 REAL,
conc_gross_short_8 REAL,
conc_net_long_4 REAL,
conc_net_short_4 REAL,
conc_net_long_8 REAL,
conc_net_short_8 REAL,
UNIQUE (report_id, row_type)
);
CREATE INDEX IF NOT EXISTS idx_disagg_concentration_report ON disagg_concentration(report_id);
-- ----------------------------------------------------------------
-- v_net_positions: convenience view for common analytical queries
-- ----------------------------------------------------------------
CREATE VIEW IF NOT EXISTS v_net_positions AS
SELECT
c.cftc_code,
c.name AS commodity,
c.exchange_abbr AS exchange,
r.report_date,
r.prev_report_date,
p.row_type,
p.open_interest,
p.noncomm_long,
p.noncomm_short,
p.noncomm_spreading,
(p.noncomm_long - p.noncomm_short) AS noncomm_net,
p.comm_long,
p.comm_short,
(p.comm_long - p.comm_short) AS comm_net,
p.nonrept_long,
p.nonrept_short,
(p.nonrept_long - p.nonrept_short) AS nonrept_net,
p.chg_open_interest,
p.chg_noncomm_long,
p.chg_noncomm_short,
p.chg_comm_long,
p.chg_comm_short,
p.pct_noncomm_long,
p.pct_noncomm_short,
p.pct_comm_long,
p.pct_comm_short,
p.traders_total,
p.traders_noncomm_long,
p.traders_noncomm_short,
p.traders_comm_long,
p.traders_comm_short
FROM positions p
JOIN reports r ON r.id = p.report_id
JOIN commodities c ON c.id = r.commodity_id;