feat: add pre-backtest data validation to detect missing price data
All checks were successful
Deploy to Production / deploy (push) Successful in 1m13s

Validates trading day count, benchmark coverage, per-date ticker
density, and date gaps before running simulation. Logs warnings for
holdings with missing prices during execution.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
zephyrdark 2026-02-18 22:55:50 +09:00
parent 5422383fd8
commit a33457ee6c
2 changed files with 139 additions and 1 deletions

View File

@ -1,10 +1,11 @@
from app.services.backtest.engine import BacktestEngine
from app.services.backtest.engine import BacktestEngine, DataValidationResult
from app.services.backtest.portfolio import VirtualPortfolio, Transaction, HoldingInfo
from app.services.backtest.metrics import MetricsCalculator, BacktestMetrics
from app.services.backtest.worker import submit_backtest, get_executor_status
__all__ = [
"BacktestEngine",
"DataValidationResult",
"VirtualPortfolio",
"Transaction",
"HoldingInfo",

View File

@ -1,11 +1,14 @@
"""
Main backtest engine.
"""
import logging
from dataclasses import dataclass, field
from datetime import date, timedelta
from decimal import Decimal
from typing import List, Dict, Optional
from dateutil.relativedelta import relativedelta
from sqlalchemy import func
from sqlalchemy.orm import Session
from app.models.backtest import (
@ -18,6 +21,16 @@ from app.services.backtest.metrics import MetricsCalculator
from app.services.strategy import MultiFactorStrategy, QualityStrategy, ValueMomentumStrategy
from app.schemas.strategy import UniverseFilter, FactorWeights
logger = logging.getLogger(__name__)
@dataclass
class DataValidationResult:
"""Result of pre-backtest data validation."""
is_valid: bool = True
errors: List[str] = field(default_factory=list)
warnings: List[str] = field(default_factory=list)
class BacktestEngine:
"""
@ -59,6 +72,23 @@ class BacktestEngine:
backtest.end_date,
)
# Pre-backtest data validation
validation = self._validate_data(
trading_days=trading_days,
benchmark_prices=benchmark_prices,
benchmark=backtest.benchmark,
start_date=backtest.start_date,
end_date=backtest.end_date,
)
for warning in validation.warnings:
logger.warning(f"Backtest {backtest_id}: {warning}")
if not validation.is_valid:
raise ValueError(
"데이터 검증 실패:\n" + "\n".join(validation.errors)
)
# Create strategy instance
strategy = self._create_strategy(
backtest.strategy_type,
@ -80,6 +110,17 @@ class BacktestEngine:
prices = self._get_prices_for_date(trading_date)
names = self._get_stock_names()
# Warn about holdings with missing prices
missing = [
t for t in portfolio.holdings
if portfolio.holdings[t] > 0 and t not in prices
]
if missing:
logger.warning(
f"{trading_date}: 보유 종목 가격 누락 {missing} "
f"(0원으로 처리됨)"
)
# Rebalance if needed
if trading_date in rebalance_dates:
# Run strategy to get target stocks
@ -143,6 +184,102 @@ class BacktestEngine:
transactions=all_transactions,
)
def _validate_data(
self,
trading_days: List[date],
benchmark_prices: Dict[date, Decimal],
benchmark: str,
start_date: date,
end_date: date,
) -> DataValidationResult:
"""
Validate price data completeness before running backtest.
Checks:
1. Minimum trading days requirement
2. Benchmark data coverage
3. Overall price data density (tickers per trading day)
4. Large date gaps in trading days
"""
result = DataValidationResult()
total_days = trading_days
num_trading_days = len(total_days)
calendar_days = (end_date - start_date).days
# 1. Minimum trading days check
if calendar_days > 30:
# Expect at least 60% of calendar days to be trading days
# (weekends ~28%, holidays ~3% => ~69% expected)
expected_min = int(calendar_days * 0.5)
if num_trading_days < expected_min:
result.errors.append(
f"거래일 수 부족: {num_trading_days}"
f"(기간 {calendar_days}일 중 최소 {expected_min}일 필요)"
)
result.is_valid = False
# 2. Benchmark data coverage
benchmark_ticker = "069500" if benchmark == "KOSPI" else "069500"
benchmark_coverage = sum(
1 for d in total_days if d in benchmark_prices
)
benchmark_pct = (
benchmark_coverage / num_trading_days * 100
if num_trading_days > 0 else 0
)
if benchmark_coverage == 0:
result.errors.append(
f"벤치마크({benchmark_ticker}) 가격 데이터 없음"
)
result.is_valid = False
elif benchmark_pct < 90:
result.warnings.append(
f"벤치마크({benchmark_ticker}) 데이터 커버리지 낮음: "
f"{benchmark_coverage}/{num_trading_days}일 ({benchmark_pct:.1f}%)"
)
# 3. Price data density per trading day (sample check)
# Check first, middle, last trading days
sample_dates = [
total_days[0],
total_days[num_trading_days // 2],
total_days[-1],
]
for sample_date in sample_dates:
ticker_count = (
self.db.query(func.count(Price.ticker))
.filter(Price.date == sample_date)
.scalar()
)
if ticker_count == 0:
result.errors.append(
f"{sample_date} 가격 데이터 없음 (종목 0개)"
)
result.is_valid = False
elif ticker_count < 100:
result.warnings.append(
f"{sample_date} 종목 수 적음: {ticker_count}"
)
# 4. Large gaps in trading days (> 7 calendar days excluding normal weekends)
for i in range(1, num_trading_days):
gap = (total_days[i] - total_days[i - 1]).days
if gap > 7:
result.warnings.append(
f"거래일 갭 발견: {total_days[i-1]} ~ {total_days[i]} "
f"({gap}일)"
)
if result.is_valid and not result.warnings:
logger.info(
f"데이터 검증 통과: 거래일 {num_trading_days}일, "
f"벤치마크 커버리지 {benchmark_pct:.1f}%"
)
return result
def _generate_rebalance_dates(
self,
start_date: date,