FastAPI application that ingests CFTC Commitments of Traders data into SQLite and exposes it via a REST API with analytics endpoints (screener, percentile rank, concentration). Includes CLI for historical and weekly data ingestion, Docker setup, and a frontend. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
619 lines
22 KiB
Python
619 lines
22 KiB
Python
"""
|
||
CFTC COT Report Parser
|
||
|
||
Parses the fixed-width text format used by CFTC for Commitments of Traders
|
||
Long Reports. The format uses ':' as group separators within each data line.
|
||
|
||
Handles both:
|
||
- Weekly HTML files (text wrapped in <pre> tag)
|
||
- Historical ZIP files (.txt files, same format without HTML wrapper)
|
||
"""
|
||
|
||
import re
|
||
import zipfile
|
||
from dataclasses import dataclass, field
|
||
from datetime import date
|
||
from pathlib import Path
|
||
from typing import Iterator, Optional
|
||
|
||
# Regex patterns
|
||
COMMODITY_HEADER_RE = re.compile(r'^(\S.+?)\s{2,}Code-(\d+)\s*$')
|
||
DATE_RE = re.compile(
|
||
r'(January|February|March|April|May|June|July|August|September|October|November|December)'
|
||
r'\s+(\d{1,2}),\s+(\d{4})'
|
||
)
|
||
EXCHANGE_ABBR = {
|
||
'CHICAGO BOARD OF TRADE': 'CBT',
|
||
'CHICAGO MERCANTILE EXCHANGE': 'CME',
|
||
'NEW YORK MERCANTILE EXCHANGE': 'NYMEX',
|
||
'COMMODITY EXCHANGE INC': 'COMEX',
|
||
'COMMODITY EXCHANGE INC.': 'COMEX',
|
||
'ICE FUTURES U.S.': 'ICE',
|
||
'ICE FUTURES U.S': 'ICE',
|
||
'ICE FUTURES EUROPE': 'ICE-EU',
|
||
'KANSAS CITY BOARD OF TRADE': 'KCBT',
|
||
'MINNEAPOLIS GRAIN EXCHANGE': 'MGE',
|
||
}
|
||
|
||
|
||
def _parse_date(text: str) -> Optional[str]:
|
||
"""Extract ISO date string from text like 'February 17, 2026'."""
|
||
m = DATE_RE.search(text)
|
||
if not m:
|
||
return None
|
||
month, day, year = m.groups()
|
||
try:
|
||
d = date(int(year), list(['January', 'February', 'March', 'April', 'May', 'June',
|
||
'July', 'August', 'September', 'October', 'November',
|
||
'December']).index(month) + 1, int(day))
|
||
return d.isoformat()
|
||
except (ValueError, IndexError):
|
||
return None
|
||
|
||
|
||
def _nums(s: str, as_float: bool = False) -> list:
|
||
"""Parse whitespace-separated numbers from a string, stripping commas."""
|
||
clean = s.replace(',', '').strip()
|
||
if not clean:
|
||
return []
|
||
result = []
|
||
for tok in clean.split():
|
||
try:
|
||
result.append(float(tok) if as_float else int(float(tok)))
|
||
except ValueError:
|
||
pass
|
||
return result
|
||
|
||
|
||
def _parse_position_line(line: str, as_float: bool = False) -> tuple[str, list]:
|
||
"""
|
||
Parse a data line like:
|
||
'All : 544,127: 117,677 175,249 205,702 184,989 124,796 508,367 505,746: 35,760 38,381'
|
||
|
||
Returns (row_label, flat_list_of_values) where values are:
|
||
[open_interest, noncomm_long, noncomm_short, spreading,
|
||
comm_long, comm_short, total_long, total_short,
|
||
nonrept_long, nonrept_short] -- 10 values total
|
||
"""
|
||
parts = line.split(':')
|
||
label = parts[0].strip()
|
||
values = []
|
||
for part in parts[1:]:
|
||
values.extend(_nums(part, as_float=as_float))
|
||
return label, values
|
||
|
||
|
||
def _parse_trader_line(line: str) -> tuple[str, list]:
|
||
"""
|
||
Parse a traders line like:
|
||
'All : 375: 122 119 146 105 104 309 296:'
|
||
Returns (label, [total, noncomm_long, noncomm_short, spread, comm_long, comm_short, total_long, total_short])
|
||
"""
|
||
parts = line.split(':')
|
||
label = parts[0].strip()
|
||
values = []
|
||
for part in parts[1:]:
|
||
values.extend(_nums(part))
|
||
return label, values
|
||
|
||
|
||
def _parse_concentration_line(line: str) -> tuple[str, list]:
|
||
"""
|
||
Parse a concentration line like:
|
||
'All : 12.5 11.6 21.6 20.1 9.2 7.2 15.3 12.5'
|
||
Returns (label, [gross_long_4, gross_short_4, gross_long_8, gross_short_8,
|
||
net_long_4, net_short_4, net_long_8, net_short_8])
|
||
"""
|
||
# Only one colon (after label) -- but some lines may have more from header artefacts
|
||
idx = line.index(':')
|
||
label = line[:idx].strip()
|
||
values = _nums(line[idx + 1:], as_float=True)
|
||
return label, values
|
||
|
||
|
||
@dataclass
|
||
class PositionRow:
|
||
open_interest: Optional[int] = None
|
||
noncomm_long: Optional[int] = None
|
||
noncomm_short: Optional[int] = None
|
||
noncomm_spreading: Optional[int] = None
|
||
comm_long: Optional[int] = None
|
||
comm_short: Optional[int] = None
|
||
total_long: Optional[int] = None
|
||
total_short: Optional[int] = None
|
||
nonrept_long: Optional[int] = None
|
||
nonrept_short: Optional[int] = None
|
||
|
||
|
||
@dataclass
|
||
class ChangesRow:
|
||
chg_open_interest: Optional[int] = None
|
||
chg_noncomm_long: Optional[int] = None
|
||
chg_noncomm_short: Optional[int] = None
|
||
chg_noncomm_spreading: Optional[int] = None
|
||
chg_comm_long: Optional[int] = None
|
||
chg_comm_short: Optional[int] = None
|
||
chg_total_long: Optional[int] = None
|
||
chg_total_short: Optional[int] = None
|
||
chg_nonrept_long: Optional[int] = None
|
||
chg_nonrept_short: Optional[int] = None
|
||
|
||
|
||
@dataclass
|
||
class PctRow:
|
||
pct_open_interest: Optional[float] = None
|
||
pct_noncomm_long: Optional[float] = None
|
||
pct_noncomm_short: Optional[float] = None
|
||
pct_noncomm_spreading: Optional[float] = None
|
||
pct_comm_long: Optional[float] = None
|
||
pct_comm_short: Optional[float] = None
|
||
pct_total_long: Optional[float] = None
|
||
pct_total_short: Optional[float] = None
|
||
pct_nonrept_long: Optional[float] = None
|
||
pct_nonrept_short: Optional[float] = None
|
||
|
||
|
||
@dataclass
|
||
class TraderRow:
|
||
traders_total: Optional[int] = None
|
||
traders_noncomm_long: Optional[int] = None
|
||
traders_noncomm_short: Optional[int] = None
|
||
traders_noncomm_spread: Optional[int] = None
|
||
traders_comm_long: Optional[int] = None
|
||
traders_comm_short: Optional[int] = None
|
||
traders_total_long: Optional[int] = None
|
||
traders_total_short: Optional[int] = None
|
||
|
||
|
||
@dataclass
|
||
class ConcentrationRow:
|
||
conc_gross_long_4: Optional[float] = None
|
||
conc_gross_short_4: Optional[float] = None
|
||
conc_gross_long_8: Optional[float] = None
|
||
conc_gross_short_8: Optional[float] = None
|
||
conc_net_long_4: Optional[float] = None
|
||
conc_net_short_4: Optional[float] = None
|
||
conc_net_long_8: Optional[float] = None
|
||
conc_net_short_8: Optional[float] = None
|
||
|
||
|
||
@dataclass
|
||
class CommodityBlock:
|
||
cftc_code: str
|
||
name: str
|
||
exchange: str
|
||
exchange_abbr: str
|
||
contract_unit: str
|
||
report_date: str
|
||
prev_report_date: Optional[str]
|
||
|
||
positions: dict = field(default_factory=dict) # row_type -> PositionRow
|
||
changes: Optional[ChangesRow] = None
|
||
percentages: dict = field(default_factory=dict) # row_type -> PctRow
|
||
traders: dict = field(default_factory=dict) # row_type -> TraderRow
|
||
concentration: dict = field(default_factory=dict) # row_type -> ConcentrationRow
|
||
|
||
|
||
def _assign_position_values(values: list, as_float: bool = False) -> dict:
|
||
"""Map a 10-value list to position field names."""
|
||
keys = ['open_interest', 'noncomm_long', 'noncomm_short', 'noncomm_spreading',
|
||
'comm_long', 'comm_short', 'total_long', 'total_short',
|
||
'nonrept_long', 'nonrept_short']
|
||
return {k: values[i] if i < len(values) else None for i, k in enumerate(keys)}
|
||
|
||
|
||
def _parse_block(lines: list[str]) -> Optional[CommodityBlock]:
|
||
"""Parse a single commodity block into a CommodityBlock."""
|
||
if not lines:
|
||
return None
|
||
|
||
# --- Header line (line 0): NAME - EXCHANGE ... Code-XXXXXX ---
|
||
header = lines[0].strip()
|
||
m = COMMODITY_HEADER_RE.match(lines[0].rstrip())
|
||
if not m:
|
||
return None
|
||
|
||
full_name = m.group(1).strip()
|
||
cftc_code = m.group(2)
|
||
|
||
# Split "NAME - EXCHANGE" on first " - "
|
||
if ' - ' in full_name:
|
||
name, exchange = full_name.split(' - ', 1)
|
||
else:
|
||
name, exchange = full_name, ''
|
||
name = name.strip()
|
||
exchange = exchange.strip()
|
||
exchange_abbr = EXCHANGE_ABBR.get(exchange.upper(), exchange[:6].upper().replace(' ', ''))
|
||
|
||
# --- Report date line (line 1) ---
|
||
report_date = None
|
||
if len(lines) > 1:
|
||
report_date = _parse_date(lines[1])
|
||
if not report_date:
|
||
return None
|
||
|
||
contract_unit = ''
|
||
prev_report_date = None
|
||
|
||
positions: dict = {}
|
||
changes: Optional[ChangesRow] = None
|
||
percentages: dict = {}
|
||
traders: dict = {}
|
||
concentration: dict = {}
|
||
|
||
# State machine
|
||
section = 'POSITIONS'
|
||
expect_changes = False
|
||
|
||
for line in lines[2:]:
|
||
stripped = line.strip()
|
||
|
||
# Skip pure separator / empty lines
|
||
if not stripped or stripped.startswith('---') or stripped == ':':
|
||
continue
|
||
|
||
# Contract unit
|
||
if '(CONTRACTS OF' in line:
|
||
m2 = re.search(r'\(CONTRACTS OF[^)]+\)', line)
|
||
if m2:
|
||
contract_unit = m2.group(0)
|
||
continue
|
||
|
||
# Section triggers
|
||
if 'Changes in Commitments from' in line:
|
||
prev_report_date = _parse_date(line)
|
||
expect_changes = True
|
||
section = 'CHANGES'
|
||
continue
|
||
if 'Percent of Open Interest Represented' in line:
|
||
section = 'PERCENT'
|
||
expect_changes = False
|
||
continue
|
||
if '# Traders' in line or 'Number of Traders in Each Category' in line:
|
||
section = 'TRADERS'
|
||
expect_changes = False
|
||
continue
|
||
if 'Percent of Open Interest Held by' in line:
|
||
section = 'CONCENTRATION'
|
||
expect_changes = False
|
||
continue
|
||
|
||
# Skip other header/label-only lines
|
||
if ':' not in line:
|
||
continue
|
||
|
||
label_part = line.split(':')[0].strip()
|
||
|
||
if section == 'POSITIONS' or section == 'PERCENT':
|
||
if label_part not in ('All', 'Old', 'Other'):
|
||
continue
|
||
if section == 'POSITIONS':
|
||
_, vals = _parse_position_line(line, as_float=False)
|
||
if len(vals) >= 1:
|
||
d = _assign_position_values(vals)
|
||
positions[label_part] = PositionRow(**d)
|
||
else:
|
||
_, vals = _parse_position_line(line, as_float=True)
|
||
if len(vals) >= 1:
|
||
keys = ['pct_open_interest', 'pct_noncomm_long', 'pct_noncomm_short',
|
||
'pct_noncomm_spreading', 'pct_comm_long', 'pct_comm_short',
|
||
'pct_total_long', 'pct_total_short', 'pct_nonrept_long', 'pct_nonrept_short']
|
||
d = {k: vals[i] if i < len(vals) else None for i, k in enumerate(keys)}
|
||
percentages[label_part] = PctRow(**d)
|
||
|
||
elif section == 'CHANGES':
|
||
# Changes row has blank label
|
||
if label_part == '' or label_part == ':':
|
||
_, vals = _parse_position_line(line, as_float=False)
|
||
if len(vals) >= 1:
|
||
keys = ['chg_open_interest', 'chg_noncomm_long', 'chg_noncomm_short',
|
||
'chg_noncomm_spreading', 'chg_comm_long', 'chg_comm_short',
|
||
'chg_total_long', 'chg_total_short', 'chg_nonrept_long', 'chg_nonrept_short']
|
||
d = {k: vals[i] if i < len(vals) else None for i, k in enumerate(keys)}
|
||
changes = ChangesRow(**d)
|
||
section = 'CHANGES_DONE'
|
||
|
||
elif section == 'TRADERS':
|
||
if label_part not in ('All', 'Old', 'Other'):
|
||
continue
|
||
_, vals = _parse_trader_line(line)
|
||
if len(vals) >= 1:
|
||
keys = ['traders_total', 'traders_noncomm_long', 'traders_noncomm_short',
|
||
'traders_noncomm_spread', 'traders_comm_long', 'traders_comm_short',
|
||
'traders_total_long', 'traders_total_short']
|
||
d = {k: vals[i] if i < len(vals) else None for i, k in enumerate(keys)}
|
||
traders[label_part] = TraderRow(**d)
|
||
|
||
elif section == 'CONCENTRATION':
|
||
if label_part not in ('All', 'Old', 'Other'):
|
||
continue
|
||
_, vals = _parse_concentration_line(line)
|
||
if len(vals) >= 8:
|
||
concentration[label_part] = ConcentrationRow(
|
||
conc_gross_long_4=vals[0],
|
||
conc_gross_short_4=vals[1],
|
||
conc_gross_long_8=vals[2],
|
||
conc_gross_short_8=vals[3],
|
||
conc_net_long_4=vals[4],
|
||
conc_net_short_4=vals[5],
|
||
conc_net_long_8=vals[6],
|
||
conc_net_short_8=vals[7],
|
||
)
|
||
|
||
if not positions:
|
||
return None
|
||
|
||
return CommodityBlock(
|
||
cftc_code=cftc_code,
|
||
name=name,
|
||
exchange=exchange,
|
||
exchange_abbr=exchange_abbr,
|
||
contract_unit=contract_unit,
|
||
report_date=report_date,
|
||
prev_report_date=prev_report_date,
|
||
positions=positions,
|
||
changes=changes,
|
||
percentages=percentages,
|
||
traders=traders,
|
||
concentration=concentration,
|
||
)
|
||
|
||
|
||
def parse_text_blocks(text: str) -> Iterator[CommodityBlock]:
|
||
"""
|
||
Split raw fixed-width text into commodity blocks and parse each one.
|
||
Each block starts with a line matching the commodity header pattern.
|
||
"""
|
||
lines = text.splitlines()
|
||
block_lines: list[str] = []
|
||
|
||
for line in lines:
|
||
if COMMODITY_HEADER_RE.match(line.rstrip()):
|
||
if block_lines:
|
||
block = _parse_block(block_lines)
|
||
if block:
|
||
yield block
|
||
block_lines = [line]
|
||
else:
|
||
block_lines.append(line)
|
||
|
||
if block_lines:
|
||
block = _parse_block(block_lines)
|
||
if block:
|
||
yield block
|
||
|
||
|
||
def extract_text_from_html(html: str) -> str:
|
||
"""Extract raw text content from the <pre> block in a CFTC HTML file."""
|
||
from bs4 import BeautifulSoup
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
pre = soup.find('pre')
|
||
if pre:
|
||
return pre.get_text()
|
||
# Fallback: strip HTML tags
|
||
return re.sub(r'<[^>]+>', '', html)
|
||
|
||
|
||
def parse_html_file(path: str) -> Iterator[CommodityBlock]:
|
||
"""Parse a weekly HTML file downloaded from CFTC."""
|
||
content = Path(path).read_text(encoding='latin-1')
|
||
text = extract_text_from_html(content)
|
||
yield from parse_text_blocks(text)
|
||
|
||
|
||
def parse_zip_file(zip_path: str) -> Iterator[CommodityBlock]:
|
||
"""
|
||
Parse a historical CFTC ZIP archive.
|
||
Annual/historical ZIPs contain a CSV file ('annualof.txt' or similar).
|
||
Falls back to fixed-width text parsing if no CSV header detected.
|
||
"""
|
||
with zipfile.ZipFile(zip_path) as zf:
|
||
txt_files = [n for n in zf.namelist() if n.lower().endswith('.txt')]
|
||
for fname in txt_files:
|
||
with zf.open(fname) as f:
|
||
text = f.read().decode('latin-1')
|
||
# Detect CSV by checking for quoted header on first line
|
||
if text.lstrip().startswith('"Market'):
|
||
yield from parse_csv_text(text)
|
||
else:
|
||
yield from parse_text_blocks(text)
|
||
|
||
|
||
# ── CSV format (historical annual ZIPs) ────────────────────────────────────
|
||
|
||
# Map CSV column name suffixes to our field names
|
||
_POS_FIELDS = {
|
||
'Open Interest': 'open_interest',
|
||
'Noncommercial Positions-Long': 'noncomm_long',
|
||
'Noncommercial Positions-Short': 'noncomm_short',
|
||
'Noncommercial Positions-Spreading': 'noncomm_spreading',
|
||
'Commercial Positions-Long': 'comm_long',
|
||
'Commercial Positions-Short': 'comm_short',
|
||
'Total Reportable Positions-Long': 'total_long',
|
||
'Total Reportable Positions-Short': 'total_short',
|
||
'Nonreportable Positions-Long': 'nonrept_long',
|
||
'Nonreportable Positions-Short': 'nonrept_short',
|
||
}
|
||
_CHG_FIELDS = {
|
||
'Change in Open Interest': 'chg_open_interest',
|
||
'Change in Noncommercial-Long': 'chg_noncomm_long',
|
||
'Change in Noncommercial-Short': 'chg_noncomm_short',
|
||
'Change in Noncommercial-Spreading': 'chg_noncomm_spreading',
|
||
'Change in Commercial-Long': 'chg_comm_long',
|
||
'Change in Commercial-Short': 'chg_comm_short',
|
||
'Change in Total Reportable-Long': 'chg_total_long',
|
||
'Change in Total Reportable-Short': 'chg_total_short',
|
||
'Change in Nonreportable-Long': 'chg_nonrept_long',
|
||
'Change in Nonreportable-Short': 'chg_nonrept_short',
|
||
}
|
||
_PCT_FIELDS = {
|
||
'% of Open Interest (OI)': 'pct_open_interest',
|
||
'% of OI-Noncommercial-Long': 'pct_noncomm_long',
|
||
'% of OI-Noncommercial-Short': 'pct_noncomm_short',
|
||
'% of OI-Noncommercial-Spreading': 'pct_noncomm_spreading',
|
||
'% of OI-Commercial-Long': 'pct_comm_long',
|
||
'% of OI-Commercial-Short': 'pct_comm_short',
|
||
'% of OI-Total Reportable-Long': 'pct_total_long',
|
||
'% of OI-Total Reportable-Short': 'pct_total_short',
|
||
'% of OI-Nonreportable-Long': 'pct_nonrept_long',
|
||
'% of OI-Nonreportable-Short': 'pct_nonrept_short',
|
||
}
|
||
_TRD_FIELDS = {
|
||
'Traders-Total': 'traders_total',
|
||
'Traders-Noncommercial-Long': 'traders_noncomm_long',
|
||
'Traders-Noncommercial-Short':'traders_noncomm_short',
|
||
'Traders-Noncommercial-Spreading': 'traders_noncomm_spread',
|
||
'Traders-Commercial-Long': 'traders_comm_long',
|
||
'Traders-Commercial-Short': 'traders_comm_short',
|
||
'Traders-Total Reportable-Long': 'traders_total_long',
|
||
'Traders-Total Reportable-Short': 'traders_total_short',
|
||
}
|
||
_CONC_FIELDS = {} # populated dynamically — column names are inconsistent
|
||
|
||
|
||
def _csv_val(row: dict, key: str, as_float: bool = False):
|
||
"""Get a value from a CSV row by key prefix match, stripping whitespace."""
|
||
# Try exact key first, then strip leading/trailing spaces from all keys
|
||
for k, v in row.items():
|
||
if k.strip() == key.strip():
|
||
v = v.strip()
|
||
if not v:
|
||
return None
|
||
try:
|
||
return float(v) if as_float else int(float(v))
|
||
except ValueError:
|
||
return None
|
||
return None
|
||
|
||
|
||
def _build_position_row_from_csv(row: dict, suffix: str) -> PositionRow:
|
||
kwargs = {}
|
||
for prefix, field in _POS_FIELDS.items():
|
||
col = f'{prefix} ({suffix})'
|
||
# open_interest has slightly different format for Old/Other
|
||
kwargs[field] = _csv_val(row, col)
|
||
return PositionRow(**kwargs)
|
||
|
||
|
||
def _build_changes_from_csv(row: dict) -> ChangesRow:
|
||
kwargs = {}
|
||
for prefix, field in _CHG_FIELDS.items():
|
||
kwargs[field] = _csv_val(row, f'{prefix} (All)')
|
||
return ChangesRow(**kwargs)
|
||
|
||
|
||
def _build_pct_row_from_csv(row: dict, suffix: str) -> PctRow:
|
||
kwargs = {}
|
||
for prefix, field in _PCT_FIELDS.items():
|
||
# Percent columns have slightly inconsistent naming between All and Old/Other
|
||
col_all = f'{prefix} (OI) ({suffix})' if '% of Open Interest' in prefix else f'{prefix} ({suffix})'
|
||
val = _csv_val(row, f'{prefix} ({suffix})', as_float=True)
|
||
if val is None:
|
||
# Try alternate form
|
||
val = _csv_val(row, f'{prefix}(OI) ({suffix})', as_float=True)
|
||
kwargs[field] = val
|
||
return PctRow(**kwargs)
|
||
|
||
|
||
def _build_trader_row_from_csv(row: dict, suffix: str) -> TraderRow:
|
||
kwargs = {}
|
||
for prefix, field in _TRD_FIELDS.items():
|
||
kwargs[field] = _csv_val(row, f'{prefix} ({suffix})')
|
||
return TraderRow(**kwargs)
|
||
|
||
|
||
def _build_concentration_from_csv(row: dict, suffix: str) -> ConcentrationRow:
|
||
"""
|
||
Concentration columns have inconsistent spacing in CFTC CSVs, e.g.:
|
||
'Concentration-Gross LT = 4 TDR-Long (All)'
|
||
'Concentration-Gross LT =4 TDR-Short (All)'
|
||
Match by normalizing whitespace.
|
||
"""
|
||
import re as _re
|
||
|
||
def _norm(s: str) -> str:
|
||
return _re.sub(r'\s+', '', s).lower()
|
||
|
||
# Build a normalized lookup for this row
|
||
norm_row = {_norm(k): v for k, v in row.items()}
|
||
suf = suffix.lower()
|
||
|
||
def _get(pattern: str):
|
||
key = _norm(pattern + f'({suffix})')
|
||
v = norm_row.get(key, '').strip()
|
||
if not v:
|
||
return None
|
||
try:
|
||
return float(v)
|
||
except ValueError:
|
||
return None
|
||
|
||
return ConcentrationRow(
|
||
conc_gross_long_4=_get('Concentration-Gross LT =4 TDR-Long '),
|
||
conc_gross_short_4=_get('Concentration-Gross LT =4 TDR-Short '),
|
||
conc_gross_long_8=_get('Concentration-Gross LT =8 TDR-Long '),
|
||
conc_gross_short_8=_get('Concentration-Gross LT =8 TDR-Short '),
|
||
conc_net_long_4=_get('Concentration-Net LT =4 TDR-Long '),
|
||
conc_net_short_4=_get('Concentration-Net LT =4 TDR-Short '),
|
||
conc_net_long_8=_get('Concentration-Net LT =8 TDR-Long '),
|
||
conc_net_short_8=_get('Concentration-Net LT =8 TDR-Short '),
|
||
)
|
||
|
||
|
||
def _csv_row_to_block(row: dict) -> Optional[CommodityBlock]:
|
||
"""Convert one CSV row (= one commodity × one date) to a CommodityBlock."""
|
||
import csv as _csv
|
||
full_name = row.get('Market and Exchange Names', '').strip()
|
||
report_date = row.get('As of Date in Form YYYY-MM-DD', '').strip()
|
||
cftc_code = row.get('CFTC Contract Market Code', '').strip()
|
||
|
||
if not full_name or not report_date or not cftc_code:
|
||
return None
|
||
|
||
if ' - ' in full_name:
|
||
name, exchange = full_name.split(' - ', 1)
|
||
else:
|
||
name, exchange = full_name, ''
|
||
name = name.strip()
|
||
exchange = exchange.strip()
|
||
exchange_abbr = EXCHANGE_ABBR.get(exchange.upper(),
|
||
exchange[:6].upper().replace(' ', ''))
|
||
|
||
positions = {}
|
||
percentages = {}
|
||
traders = {}
|
||
concentration = {}
|
||
|
||
for suffix, label in [('All', 'All'), ('Old', 'Old'), ('Other', 'Other')]:
|
||
positions[label] = _build_position_row_from_csv(row, suffix)
|
||
percentages[label] = _build_pct_row_from_csv(row, suffix)
|
||
traders[label] = _build_trader_row_from_csv(row, suffix)
|
||
concentration[label] = _build_concentration_from_csv(row, suffix)
|
||
|
||
changes = _build_changes_from_csv(row)
|
||
|
||
return CommodityBlock(
|
||
cftc_code=cftc_code,
|
||
name=name,
|
||
exchange=exchange,
|
||
exchange_abbr=exchange_abbr,
|
||
contract_unit='',
|
||
report_date=report_date,
|
||
prev_report_date=None,
|
||
positions=positions,
|
||
changes=changes,
|
||
percentages=percentages,
|
||
traders=traders,
|
||
concentration=concentration,
|
||
)
|
||
|
||
|
||
def parse_csv_text(text: str) -> Iterator[CommodityBlock]:
|
||
"""Parse a CFTC historical CSV file (annualof.txt format)."""
|
||
import csv as _csv
|
||
reader = _csv.DictReader(text.splitlines())
|
||
for row in reader:
|
||
block = _csv_row_to_block(row)
|
||
if block:
|
||
yield block
|