feat: add pre-backtest data validation to detect missing price data
All checks were successful
Deploy to Production / deploy (push) Successful in 1m13s
All checks were successful
Deploy to Production / deploy (push) Successful in 1m13s
Validates trading day count, benchmark coverage, per-date ticker density, and date gaps before running simulation. Logs warnings for holdings with missing prices during execution. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
5422383fd8
commit
a33457ee6c
@ -1,10 +1,11 @@
|
||||
from app.services.backtest.engine import BacktestEngine
|
||||
from app.services.backtest.engine import BacktestEngine, DataValidationResult
|
||||
from app.services.backtest.portfolio import VirtualPortfolio, Transaction, HoldingInfo
|
||||
from app.services.backtest.metrics import MetricsCalculator, BacktestMetrics
|
||||
from app.services.backtest.worker import submit_backtest, get_executor_status
|
||||
|
||||
__all__ = [
|
||||
"BacktestEngine",
|
||||
"DataValidationResult",
|
||||
"VirtualPortfolio",
|
||||
"Transaction",
|
||||
"HoldingInfo",
|
||||
|
||||
@ -1,11 +1,14 @@
|
||||
"""
|
||||
Main backtest engine.
|
||||
"""
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import date, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import List, Dict, Optional
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models.backtest import (
|
||||
@ -18,6 +21,16 @@ from app.services.backtest.metrics import MetricsCalculator
|
||||
from app.services.strategy import MultiFactorStrategy, QualityStrategy, ValueMomentumStrategy
|
||||
from app.schemas.strategy import UniverseFilter, FactorWeights
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DataValidationResult:
|
||||
"""Result of pre-backtest data validation."""
|
||||
is_valid: bool = True
|
||||
errors: List[str] = field(default_factory=list)
|
||||
warnings: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
class BacktestEngine:
|
||||
"""
|
||||
@ -59,6 +72,23 @@ class BacktestEngine:
|
||||
backtest.end_date,
|
||||
)
|
||||
|
||||
# Pre-backtest data validation
|
||||
validation = self._validate_data(
|
||||
trading_days=trading_days,
|
||||
benchmark_prices=benchmark_prices,
|
||||
benchmark=backtest.benchmark,
|
||||
start_date=backtest.start_date,
|
||||
end_date=backtest.end_date,
|
||||
)
|
||||
|
||||
for warning in validation.warnings:
|
||||
logger.warning(f"Backtest {backtest_id}: {warning}")
|
||||
|
||||
if not validation.is_valid:
|
||||
raise ValueError(
|
||||
"데이터 검증 실패:\n" + "\n".join(validation.errors)
|
||||
)
|
||||
|
||||
# Create strategy instance
|
||||
strategy = self._create_strategy(
|
||||
backtest.strategy_type,
|
||||
@ -80,6 +110,17 @@ class BacktestEngine:
|
||||
prices = self._get_prices_for_date(trading_date)
|
||||
names = self._get_stock_names()
|
||||
|
||||
# Warn about holdings with missing prices
|
||||
missing = [
|
||||
t for t in portfolio.holdings
|
||||
if portfolio.holdings[t] > 0 and t not in prices
|
||||
]
|
||||
if missing:
|
||||
logger.warning(
|
||||
f"{trading_date}: 보유 종목 가격 누락 {missing} "
|
||||
f"(0원으로 처리됨)"
|
||||
)
|
||||
|
||||
# Rebalance if needed
|
||||
if trading_date in rebalance_dates:
|
||||
# Run strategy to get target stocks
|
||||
@ -143,6 +184,102 @@ class BacktestEngine:
|
||||
transactions=all_transactions,
|
||||
)
|
||||
|
||||
def _validate_data(
|
||||
self,
|
||||
trading_days: List[date],
|
||||
benchmark_prices: Dict[date, Decimal],
|
||||
benchmark: str,
|
||||
start_date: date,
|
||||
end_date: date,
|
||||
) -> DataValidationResult:
|
||||
"""
|
||||
Validate price data completeness before running backtest.
|
||||
|
||||
Checks:
|
||||
1. Minimum trading days requirement
|
||||
2. Benchmark data coverage
|
||||
3. Overall price data density (tickers per trading day)
|
||||
4. Large date gaps in trading days
|
||||
"""
|
||||
result = DataValidationResult()
|
||||
|
||||
total_days = trading_days
|
||||
num_trading_days = len(total_days)
|
||||
calendar_days = (end_date - start_date).days
|
||||
|
||||
# 1. Minimum trading days check
|
||||
if calendar_days > 30:
|
||||
# Expect at least 60% of calendar days to be trading days
|
||||
# (weekends ~28%, holidays ~3% => ~69% expected)
|
||||
expected_min = int(calendar_days * 0.5)
|
||||
if num_trading_days < expected_min:
|
||||
result.errors.append(
|
||||
f"거래일 수 부족: {num_trading_days}일 "
|
||||
f"(기간 {calendar_days}일 중 최소 {expected_min}일 필요)"
|
||||
)
|
||||
result.is_valid = False
|
||||
|
||||
# 2. Benchmark data coverage
|
||||
benchmark_ticker = "069500" if benchmark == "KOSPI" else "069500"
|
||||
benchmark_coverage = sum(
|
||||
1 for d in total_days if d in benchmark_prices
|
||||
)
|
||||
benchmark_pct = (
|
||||
benchmark_coverage / num_trading_days * 100
|
||||
if num_trading_days > 0 else 0
|
||||
)
|
||||
|
||||
if benchmark_coverage == 0:
|
||||
result.errors.append(
|
||||
f"벤치마크({benchmark_ticker}) 가격 데이터 없음"
|
||||
)
|
||||
result.is_valid = False
|
||||
elif benchmark_pct < 90:
|
||||
result.warnings.append(
|
||||
f"벤치마크({benchmark_ticker}) 데이터 커버리지 낮음: "
|
||||
f"{benchmark_coverage}/{num_trading_days}일 ({benchmark_pct:.1f}%)"
|
||||
)
|
||||
|
||||
# 3. Price data density per trading day (sample check)
|
||||
# Check first, middle, last trading days
|
||||
sample_dates = [
|
||||
total_days[0],
|
||||
total_days[num_trading_days // 2],
|
||||
total_days[-1],
|
||||
]
|
||||
for sample_date in sample_dates:
|
||||
ticker_count = (
|
||||
self.db.query(func.count(Price.ticker))
|
||||
.filter(Price.date == sample_date)
|
||||
.scalar()
|
||||
)
|
||||
if ticker_count == 0:
|
||||
result.errors.append(
|
||||
f"{sample_date} 가격 데이터 없음 (종목 0개)"
|
||||
)
|
||||
result.is_valid = False
|
||||
elif ticker_count < 100:
|
||||
result.warnings.append(
|
||||
f"{sample_date} 종목 수 적음: {ticker_count}개"
|
||||
)
|
||||
|
||||
# 4. Large gaps in trading days (> 7 calendar days excluding normal weekends)
|
||||
for i in range(1, num_trading_days):
|
||||
gap = (total_days[i] - total_days[i - 1]).days
|
||||
if gap > 7:
|
||||
result.warnings.append(
|
||||
f"거래일 갭 발견: {total_days[i-1]} ~ {total_days[i]} "
|
||||
f"({gap}일)"
|
||||
)
|
||||
|
||||
if result.is_valid and not result.warnings:
|
||||
logger.info(
|
||||
f"데이터 검증 통과: 거래일 {num_trading_days}일, "
|
||||
f"벤치마크 커버리지 {benchmark_pct:.1f}%"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _generate_rebalance_dates(
|
||||
self,
|
||||
start_date: date,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user