feat: add pre-backtest data validation to detect missing price data
All checks were successful
Deploy to Production / deploy (push) Successful in 1m13s
All checks were successful
Deploy to Production / deploy (push) Successful in 1m13s
Validates trading day count, benchmark coverage, per-date ticker density, and date gaps before running simulation. Logs warnings for holdings with missing prices during execution. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
5422383fd8
commit
a33457ee6c
@ -1,10 +1,11 @@
|
|||||||
from app.services.backtest.engine import BacktestEngine
|
from app.services.backtest.engine import BacktestEngine, DataValidationResult
|
||||||
from app.services.backtest.portfolio import VirtualPortfolio, Transaction, HoldingInfo
|
from app.services.backtest.portfolio import VirtualPortfolio, Transaction, HoldingInfo
|
||||||
from app.services.backtest.metrics import MetricsCalculator, BacktestMetrics
|
from app.services.backtest.metrics import MetricsCalculator, BacktestMetrics
|
||||||
from app.services.backtest.worker import submit_backtest, get_executor_status
|
from app.services.backtest.worker import submit_backtest, get_executor_status
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"BacktestEngine",
|
"BacktestEngine",
|
||||||
|
"DataValidationResult",
|
||||||
"VirtualPortfolio",
|
"VirtualPortfolio",
|
||||||
"Transaction",
|
"Transaction",
|
||||||
"HoldingInfo",
|
"HoldingInfo",
|
||||||
|
|||||||
@ -1,11 +1,14 @@
|
|||||||
"""
|
"""
|
||||||
Main backtest engine.
|
Main backtest engine.
|
||||||
"""
|
"""
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
from datetime import date, timedelta
|
from datetime import date, timedelta
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
from typing import List, Dict, Optional
|
from typing import List, Dict, Optional
|
||||||
from dateutil.relativedelta import relativedelta
|
from dateutil.relativedelta import relativedelta
|
||||||
|
|
||||||
|
from sqlalchemy import func
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from app.models.backtest import (
|
from app.models.backtest import (
|
||||||
@ -18,6 +21,16 @@ from app.services.backtest.metrics import MetricsCalculator
|
|||||||
from app.services.strategy import MultiFactorStrategy, QualityStrategy, ValueMomentumStrategy
|
from app.services.strategy import MultiFactorStrategy, QualityStrategy, ValueMomentumStrategy
|
||||||
from app.schemas.strategy import UniverseFilter, FactorWeights
|
from app.schemas.strategy import UniverseFilter, FactorWeights
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DataValidationResult:
|
||||||
|
"""Result of pre-backtest data validation."""
|
||||||
|
is_valid: bool = True
|
||||||
|
errors: List[str] = field(default_factory=list)
|
||||||
|
warnings: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
class BacktestEngine:
|
class BacktestEngine:
|
||||||
"""
|
"""
|
||||||
@ -59,6 +72,23 @@ class BacktestEngine:
|
|||||||
backtest.end_date,
|
backtest.end_date,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Pre-backtest data validation
|
||||||
|
validation = self._validate_data(
|
||||||
|
trading_days=trading_days,
|
||||||
|
benchmark_prices=benchmark_prices,
|
||||||
|
benchmark=backtest.benchmark,
|
||||||
|
start_date=backtest.start_date,
|
||||||
|
end_date=backtest.end_date,
|
||||||
|
)
|
||||||
|
|
||||||
|
for warning in validation.warnings:
|
||||||
|
logger.warning(f"Backtest {backtest_id}: {warning}")
|
||||||
|
|
||||||
|
if not validation.is_valid:
|
||||||
|
raise ValueError(
|
||||||
|
"데이터 검증 실패:\n" + "\n".join(validation.errors)
|
||||||
|
)
|
||||||
|
|
||||||
# Create strategy instance
|
# Create strategy instance
|
||||||
strategy = self._create_strategy(
|
strategy = self._create_strategy(
|
||||||
backtest.strategy_type,
|
backtest.strategy_type,
|
||||||
@ -80,6 +110,17 @@ class BacktestEngine:
|
|||||||
prices = self._get_prices_for_date(trading_date)
|
prices = self._get_prices_for_date(trading_date)
|
||||||
names = self._get_stock_names()
|
names = self._get_stock_names()
|
||||||
|
|
||||||
|
# Warn about holdings with missing prices
|
||||||
|
missing = [
|
||||||
|
t for t in portfolio.holdings
|
||||||
|
if portfolio.holdings[t] > 0 and t not in prices
|
||||||
|
]
|
||||||
|
if missing:
|
||||||
|
logger.warning(
|
||||||
|
f"{trading_date}: 보유 종목 가격 누락 {missing} "
|
||||||
|
f"(0원으로 처리됨)"
|
||||||
|
)
|
||||||
|
|
||||||
# Rebalance if needed
|
# Rebalance if needed
|
||||||
if trading_date in rebalance_dates:
|
if trading_date in rebalance_dates:
|
||||||
# Run strategy to get target stocks
|
# Run strategy to get target stocks
|
||||||
@ -143,6 +184,102 @@ class BacktestEngine:
|
|||||||
transactions=all_transactions,
|
transactions=all_transactions,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _validate_data(
|
||||||
|
self,
|
||||||
|
trading_days: List[date],
|
||||||
|
benchmark_prices: Dict[date, Decimal],
|
||||||
|
benchmark: str,
|
||||||
|
start_date: date,
|
||||||
|
end_date: date,
|
||||||
|
) -> DataValidationResult:
|
||||||
|
"""
|
||||||
|
Validate price data completeness before running backtest.
|
||||||
|
|
||||||
|
Checks:
|
||||||
|
1. Minimum trading days requirement
|
||||||
|
2. Benchmark data coverage
|
||||||
|
3. Overall price data density (tickers per trading day)
|
||||||
|
4. Large date gaps in trading days
|
||||||
|
"""
|
||||||
|
result = DataValidationResult()
|
||||||
|
|
||||||
|
total_days = trading_days
|
||||||
|
num_trading_days = len(total_days)
|
||||||
|
calendar_days = (end_date - start_date).days
|
||||||
|
|
||||||
|
# 1. Minimum trading days check
|
||||||
|
if calendar_days > 30:
|
||||||
|
# Expect at least 60% of calendar days to be trading days
|
||||||
|
# (weekends ~28%, holidays ~3% => ~69% expected)
|
||||||
|
expected_min = int(calendar_days * 0.5)
|
||||||
|
if num_trading_days < expected_min:
|
||||||
|
result.errors.append(
|
||||||
|
f"거래일 수 부족: {num_trading_days}일 "
|
||||||
|
f"(기간 {calendar_days}일 중 최소 {expected_min}일 필요)"
|
||||||
|
)
|
||||||
|
result.is_valid = False
|
||||||
|
|
||||||
|
# 2. Benchmark data coverage
|
||||||
|
benchmark_ticker = "069500" if benchmark == "KOSPI" else "069500"
|
||||||
|
benchmark_coverage = sum(
|
||||||
|
1 for d in total_days if d in benchmark_prices
|
||||||
|
)
|
||||||
|
benchmark_pct = (
|
||||||
|
benchmark_coverage / num_trading_days * 100
|
||||||
|
if num_trading_days > 0 else 0
|
||||||
|
)
|
||||||
|
|
||||||
|
if benchmark_coverage == 0:
|
||||||
|
result.errors.append(
|
||||||
|
f"벤치마크({benchmark_ticker}) 가격 데이터 없음"
|
||||||
|
)
|
||||||
|
result.is_valid = False
|
||||||
|
elif benchmark_pct < 90:
|
||||||
|
result.warnings.append(
|
||||||
|
f"벤치마크({benchmark_ticker}) 데이터 커버리지 낮음: "
|
||||||
|
f"{benchmark_coverage}/{num_trading_days}일 ({benchmark_pct:.1f}%)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. Price data density per trading day (sample check)
|
||||||
|
# Check first, middle, last trading days
|
||||||
|
sample_dates = [
|
||||||
|
total_days[0],
|
||||||
|
total_days[num_trading_days // 2],
|
||||||
|
total_days[-1],
|
||||||
|
]
|
||||||
|
for sample_date in sample_dates:
|
||||||
|
ticker_count = (
|
||||||
|
self.db.query(func.count(Price.ticker))
|
||||||
|
.filter(Price.date == sample_date)
|
||||||
|
.scalar()
|
||||||
|
)
|
||||||
|
if ticker_count == 0:
|
||||||
|
result.errors.append(
|
||||||
|
f"{sample_date} 가격 데이터 없음 (종목 0개)"
|
||||||
|
)
|
||||||
|
result.is_valid = False
|
||||||
|
elif ticker_count < 100:
|
||||||
|
result.warnings.append(
|
||||||
|
f"{sample_date} 종목 수 적음: {ticker_count}개"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 4. Large gaps in trading days (> 7 calendar days excluding normal weekends)
|
||||||
|
for i in range(1, num_trading_days):
|
||||||
|
gap = (total_days[i] - total_days[i - 1]).days
|
||||||
|
if gap > 7:
|
||||||
|
result.warnings.append(
|
||||||
|
f"거래일 갭 발견: {total_days[i-1]} ~ {total_days[i]} "
|
||||||
|
f"({gap}일)"
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.is_valid and not result.warnings:
|
||||||
|
logger.info(
|
||||||
|
f"데이터 검증 통과: 거래일 {num_trading_days}일, "
|
||||||
|
f"벤치마크 커버리지 {benchmark_pct:.1f}%"
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
def _generate_rebalance_dates(
|
def _generate_rebalance_dates(
|
||||||
self,
|
self,
|
||||||
start_date: date,
|
start_date: date,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user