FastAPI application that ingests CFTC Commitments of Traders data into SQLite and exposes it via a REST API with analytics endpoints (screener, percentile rank, concentration). Includes CLI for historical and weekly data ingestion, Docker setup, and a frontend. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
154 lines
4.2 KiB
Python
154 lines
4.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
CFTC COT Report Downloader
|
|
|
|
Downloads the CFTC Commitments of Traders CBT Long Form Combined report
|
|
and stores historical copies locally with dated filenames.
|
|
|
|
Usage:
|
|
python cftc_downloader.py
|
|
|
|
The report is typically published every Friday around 3:30 PM ET.
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
# Configuration
|
|
URL = "https://www.cftc.gov/dea/options/deacbtlof.htm"
|
|
DATA_DIR = Path(__file__).parent / "data"
|
|
LOG_FILE = DATA_DIR / "download_log.txt"
|
|
|
|
# Headers to avoid 403 errors
|
|
HEADERS = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
}
|
|
|
|
|
|
def extract_report_date(html_content: str) -> str | None:
|
|
"""
|
|
Extract the report date from the HTML content.
|
|
Looks for patterns like "February 17, 2026" in the report.
|
|
"""
|
|
# Pattern matches dates like "February 17, 2026" or "January 5, 2026"
|
|
pattern = r"(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2}),\s+(\d{4})"
|
|
match = re.search(pattern, html_content)
|
|
|
|
if match:
|
|
month_name, day, year = match.groups()
|
|
# Convert to date object for formatting
|
|
date_str = f"{month_name} {day}, {year}"
|
|
try:
|
|
date_obj = datetime.strptime(date_str, "%B %d, %Y")
|
|
return date_obj.strftime("%Y-%m-%d")
|
|
except ValueError:
|
|
return None
|
|
return None
|
|
|
|
|
|
def download_report() -> tuple[bool, str]:
|
|
"""
|
|
Download the CFTC report and save it with a dated filename.
|
|
|
|
Returns:
|
|
tuple: (success: bool, message: str)
|
|
"""
|
|
# Ensure data directory exists
|
|
DATA_DIR.mkdir(exist_ok=True)
|
|
|
|
# Download the page
|
|
print(f"Downloading from {URL}...")
|
|
try:
|
|
response = requests.get(URL, headers=HEADERS, timeout=30)
|
|
response.raise_for_status()
|
|
except requests.RequestException as e:
|
|
return False, f"Download failed: {e}"
|
|
|
|
html_content = response.text
|
|
|
|
# Extract the report date
|
|
report_date = extract_report_date(html_content)
|
|
if not report_date:
|
|
return False, "Could not extract report date from content"
|
|
|
|
print(f"Report date: {report_date}")
|
|
|
|
# Create filename
|
|
filename = f"{report_date}_deacbtlof.htm"
|
|
filepath = DATA_DIR / filename
|
|
|
|
# Check if already downloaded
|
|
if filepath.exists():
|
|
return False, f"Report for {report_date} already exists: {filepath}"
|
|
|
|
# Save the file
|
|
filepath.write_text(html_content, encoding="utf-8")
|
|
print(f"Saved to: {filepath}")
|
|
|
|
# Log the download
|
|
log_download(report_date, filepath)
|
|
|
|
# Import into database
|
|
try:
|
|
from app.ingestion.importer import import_html_file
|
|
result = import_html_file(str(filepath))
|
|
if result.error:
|
|
print(f"DB import warning: {result.error}")
|
|
else:
|
|
print(f"Imported to DB: {result.rows_inserted} rows inserted")
|
|
except ImportError:
|
|
pass # app package not available, skip DB import
|
|
|
|
return True, f"Successfully downloaded report for {report_date}"
|
|
|
|
|
|
def log_download(report_date: str, filepath: Path) -> None:
|
|
"""Log the download to the log file."""
|
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
log_entry = f"{timestamp} | Report date: {report_date} | File: {filepath.name}\n"
|
|
|
|
with open(LOG_FILE, "a", encoding="utf-8") as f:
|
|
f.write(log_entry)
|
|
|
|
|
|
def list_downloads() -> None:
|
|
"""List all downloaded reports."""
|
|
if not DATA_DIR.exists():
|
|
print("No downloads yet.")
|
|
return
|
|
|
|
files = sorted(DATA_DIR.glob("*_deacbtlof.htm"))
|
|
if not files:
|
|
print("No downloaded reports found.")
|
|
return
|
|
|
|
print(f"\nDownloaded reports ({len(files)} files):")
|
|
print("-" * 40)
|
|
for f in files:
|
|
size_kb = f.stat().st_size / 1024
|
|
print(f" {f.name} ({size_kb:.1f} KB)")
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
print("CFTC COT Report Downloader")
|
|
print("=" * 40)
|
|
|
|
success, message = download_report()
|
|
print(f"\n{message}")
|
|
|
|
# Show current downloads
|
|
list_downloads()
|
|
|
|
return 0 if success else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|