COTexplorer/cftc_downloader.py
Greg 37f8eac932 Initial commit: CFTC COT Explorer
FastAPI application that ingests CFTC Commitments of Traders data into SQLite
and exposes it via a REST API with analytics endpoints (screener, percentile rank,
concentration). Includes CLI for historical and weekly data ingestion, Docker setup,
and a frontend.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-22 11:23:00 +01:00

154 lines
4.2 KiB
Python

#!/usr/bin/env python3
"""
CFTC COT Report Downloader
Downloads the CFTC Commitments of Traders CBT Long Form Combined report
and stores historical copies locally with dated filenames.
Usage:
python cftc_downloader.py
The report is typically published every Friday around 3:30 PM ET.
"""
import os
import re
import sys
from datetime import datetime
from pathlib import Path
import requests
from bs4 import BeautifulSoup
# Configuration
URL = "https://www.cftc.gov/dea/options/deacbtlof.htm"
DATA_DIR = Path(__file__).parent / "data"
LOG_FILE = DATA_DIR / "download_log.txt"
# Headers to avoid 403 errors
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
def extract_report_date(html_content: str) -> str | None:
"""
Extract the report date from the HTML content.
Looks for patterns like "February 17, 2026" in the report.
"""
# Pattern matches dates like "February 17, 2026" or "January 5, 2026"
pattern = r"(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2}),\s+(\d{4})"
match = re.search(pattern, html_content)
if match:
month_name, day, year = match.groups()
# Convert to date object for formatting
date_str = f"{month_name} {day}, {year}"
try:
date_obj = datetime.strptime(date_str, "%B %d, %Y")
return date_obj.strftime("%Y-%m-%d")
except ValueError:
return None
return None
def download_report() -> tuple[bool, str]:
"""
Download the CFTC report and save it with a dated filename.
Returns:
tuple: (success: bool, message: str)
"""
# Ensure data directory exists
DATA_DIR.mkdir(exist_ok=True)
# Download the page
print(f"Downloading from {URL}...")
try:
response = requests.get(URL, headers=HEADERS, timeout=30)
response.raise_for_status()
except requests.RequestException as e:
return False, f"Download failed: {e}"
html_content = response.text
# Extract the report date
report_date = extract_report_date(html_content)
if not report_date:
return False, "Could not extract report date from content"
print(f"Report date: {report_date}")
# Create filename
filename = f"{report_date}_deacbtlof.htm"
filepath = DATA_DIR / filename
# Check if already downloaded
if filepath.exists():
return False, f"Report for {report_date} already exists: {filepath}"
# Save the file
filepath.write_text(html_content, encoding="utf-8")
print(f"Saved to: {filepath}")
# Log the download
log_download(report_date, filepath)
# Import into database
try:
from app.ingestion.importer import import_html_file
result = import_html_file(str(filepath))
if result.error:
print(f"DB import warning: {result.error}")
else:
print(f"Imported to DB: {result.rows_inserted} rows inserted")
except ImportError:
pass # app package not available, skip DB import
return True, f"Successfully downloaded report for {report_date}"
def log_download(report_date: str, filepath: Path) -> None:
"""Log the download to the log file."""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
log_entry = f"{timestamp} | Report date: {report_date} | File: {filepath.name}\n"
with open(LOG_FILE, "a", encoding="utf-8") as f:
f.write(log_entry)
def list_downloads() -> None:
"""List all downloaded reports."""
if not DATA_DIR.exists():
print("No downloads yet.")
return
files = sorted(DATA_DIR.glob("*_deacbtlof.htm"))
if not files:
print("No downloaded reports found.")
return
print(f"\nDownloaded reports ({len(files)} files):")
print("-" * 40)
for f in files:
size_kb = f.stat().st_size / 1024
print(f" {f.name} ({size_kb:.1f} KB)")
def main():
"""Main entry point."""
print("CFTC COT Report Downloader")
print("=" * 40)
success, message = download_report()
print(f"\n{message}")
# Show current downloads
list_downloads()
return 0 if success else 1
if __name__ == "__main__":
sys.exit(main())