diff --git a/backend/app/services/backtest/__init__.py b/backend/app/services/backtest/__init__.py index 3826f3d..ebb9b57 100644 --- a/backend/app/services/backtest/__init__.py +++ b/backend/app/services/backtest/__init__.py @@ -1,10 +1,11 @@ -from app.services.backtest.engine import BacktestEngine +from app.services.backtest.engine import BacktestEngine, DataValidationResult from app.services.backtest.portfolio import VirtualPortfolio, Transaction, HoldingInfo from app.services.backtest.metrics import MetricsCalculator, BacktestMetrics from app.services.backtest.worker import submit_backtest, get_executor_status __all__ = [ "BacktestEngine", + "DataValidationResult", "VirtualPortfolio", "Transaction", "HoldingInfo", diff --git a/backend/app/services/backtest/engine.py b/backend/app/services/backtest/engine.py index e9f822c..1c63547 100644 --- a/backend/app/services/backtest/engine.py +++ b/backend/app/services/backtest/engine.py @@ -1,11 +1,14 @@ """ Main backtest engine. """ +import logging +from dataclasses import dataclass, field from datetime import date, timedelta from decimal import Decimal from typing import List, Dict, Optional from dateutil.relativedelta import relativedelta +from sqlalchemy import func from sqlalchemy.orm import Session from app.models.backtest import ( @@ -18,6 +21,16 @@ from app.services.backtest.metrics import MetricsCalculator from app.services.strategy import MultiFactorStrategy, QualityStrategy, ValueMomentumStrategy from app.schemas.strategy import UniverseFilter, FactorWeights +logger = logging.getLogger(__name__) + + +@dataclass +class DataValidationResult: + """Result of pre-backtest data validation.""" + is_valid: bool = True + errors: List[str] = field(default_factory=list) + warnings: List[str] = field(default_factory=list) + class BacktestEngine: """ @@ -59,6 +72,23 @@ class BacktestEngine: backtest.end_date, ) + # Pre-backtest data validation + validation = self._validate_data( + trading_days=trading_days, + benchmark_prices=benchmark_prices, + benchmark=backtest.benchmark, + start_date=backtest.start_date, + end_date=backtest.end_date, + ) + + for warning in validation.warnings: + logger.warning(f"Backtest {backtest_id}: {warning}") + + if not validation.is_valid: + raise ValueError( + "데이터 검증 실패:\n" + "\n".join(validation.errors) + ) + # Create strategy instance strategy = self._create_strategy( backtest.strategy_type, @@ -80,6 +110,17 @@ class BacktestEngine: prices = self._get_prices_for_date(trading_date) names = self._get_stock_names() + # Warn about holdings with missing prices + missing = [ + t for t in portfolio.holdings + if portfolio.holdings[t] > 0 and t not in prices + ] + if missing: + logger.warning( + f"{trading_date}: 보유 종목 가격 누락 {missing} " + f"(0원으로 처리됨)" + ) + # Rebalance if needed if trading_date in rebalance_dates: # Run strategy to get target stocks @@ -143,6 +184,102 @@ class BacktestEngine: transactions=all_transactions, ) + def _validate_data( + self, + trading_days: List[date], + benchmark_prices: Dict[date, Decimal], + benchmark: str, + start_date: date, + end_date: date, + ) -> DataValidationResult: + """ + Validate price data completeness before running backtest. + + Checks: + 1. Minimum trading days requirement + 2. Benchmark data coverage + 3. Overall price data density (tickers per trading day) + 4. Large date gaps in trading days + """ + result = DataValidationResult() + + total_days = trading_days + num_trading_days = len(total_days) + calendar_days = (end_date - start_date).days + + # 1. Minimum trading days check + if calendar_days > 30: + # Expect at least 60% of calendar days to be trading days + # (weekends ~28%, holidays ~3% => ~69% expected) + expected_min = int(calendar_days * 0.5) + if num_trading_days < expected_min: + result.errors.append( + f"거래일 수 부족: {num_trading_days}일 " + f"(기간 {calendar_days}일 중 최소 {expected_min}일 필요)" + ) + result.is_valid = False + + # 2. Benchmark data coverage + benchmark_ticker = "069500" if benchmark == "KOSPI" else "069500" + benchmark_coverage = sum( + 1 for d in total_days if d in benchmark_prices + ) + benchmark_pct = ( + benchmark_coverage / num_trading_days * 100 + if num_trading_days > 0 else 0 + ) + + if benchmark_coverage == 0: + result.errors.append( + f"벤치마크({benchmark_ticker}) 가격 데이터 없음" + ) + result.is_valid = False + elif benchmark_pct < 90: + result.warnings.append( + f"벤치마크({benchmark_ticker}) 데이터 커버리지 낮음: " + f"{benchmark_coverage}/{num_trading_days}일 ({benchmark_pct:.1f}%)" + ) + + # 3. Price data density per trading day (sample check) + # Check first, middle, last trading days + sample_dates = [ + total_days[0], + total_days[num_trading_days // 2], + total_days[-1], + ] + for sample_date in sample_dates: + ticker_count = ( + self.db.query(func.count(Price.ticker)) + .filter(Price.date == sample_date) + .scalar() + ) + if ticker_count == 0: + result.errors.append( + f"{sample_date} 가격 데이터 없음 (종목 0개)" + ) + result.is_valid = False + elif ticker_count < 100: + result.warnings.append( + f"{sample_date} 종목 수 적음: {ticker_count}개" + ) + + # 4. Large gaps in trading days (> 7 calendar days excluding normal weekends) + for i in range(1, num_trading_days): + gap = (total_days[i] - total_days[i - 1]).days + if gap > 7: + result.warnings.append( + f"거래일 갭 발견: {total_days[i-1]} ~ {total_days[i]} " + f"({gap}일)" + ) + + if result.is_valid and not result.warnings: + logger.info( + f"데이터 검증 통과: 거래일 {num_trading_days}일, " + f"벤치마크 커버리지 {benchmark_pct:.1f}%" + ) + + return result + def _generate_rebalance_dates( self, start_date: date,