galaxis-po/backend/app/services/collectors/valuation_collector.py
머니페니 072b6059d4
Some checks are pending
Deploy to Production / deploy (push) Waiting to run
fix: KRX data collection + TIGER 200 ticker fix + trade history seed
- Upgrade pykrx 1.2.3 → 1.2.6 (KRX login session support)
- Add KRX_ID/KRX_PW env vars for KRX authentication
- Enhance error handling in all pykrx-dependent collectors
  - ETFCollector: raise KRXDataError with login hint
  - ValuationCollector: raise RuntimeError with login hint
  - StockCollector/PriceCollector/ETFPriceCollector: JSONDecodeError handling
- Fix TIGER 200 ticker: 069500 → 102110 in seed data
- Rebuild seed_data.py from actual 33 trade records
- Add trade_history_raw.csv as source data
- Fix pension_allocation recommendation: KODEX 200 → TIGER 200
- Add ticker dropdown to transaction add modal (frontend)
- Update .env.example with KRX credentials
- All 276 tests passing
2026-04-15 22:16:42 +09:00

153 lines
5.7 KiB
Python

"""
Valuation data collector using pykrx.
"""
import logging
from datetime import datetime, timedelta
from json import JSONDecodeError
import pandas as pd
from pykrx import stock as pykrx_stock
from sqlalchemy.orm import Session
from sqlalchemy.dialects.postgresql import insert
from app.services.collectors.base import BaseCollector
from app.models.stock import Valuation
logger = logging.getLogger(__name__)
REQUIRED_FUNDAMENTAL_COLS = {"BPS", "PER", "PBR", "EPS", "DIV", "DPS"}
class ValuationCollector(BaseCollector):
"""Collects valuation metrics (PER, PBR, etc.) using pykrx."""
def __init__(self, db: Session, biz_day: str = None):
super().__init__(db)
self.biz_day = biz_day or self._get_latest_biz_day()
self._validate_biz_day()
def _validate_biz_day(self) -> None:
"""Validate business day format."""
try:
datetime.strptime(self.biz_day, "%Y%m%d")
except ValueError:
raise ValueError(f"Invalid biz_day format. Expected YYYYMMDD, got: {self.biz_day}")
def _safe_float(self, value) -> float | None:
"""Safely convert value to float."""
if pd.isna(value):
return None
try:
return float(value)
except (ValueError, TypeError):
return None
def _fetch_fundamental_data(self) -> tuple[pd.DataFrame, str]:
"""Fetch fundamental data with fallback to previous business days (up to 3 days back).
Raises:
RuntimeError: When KRX returns data without expected columns
(typically means login is required).
"""
target_date = datetime.strptime(self.biz_day, "%Y%m%d")
krx_auth_error = False
for day_offset in range(4): # today + 3 days back
try_date = target_date - timedelta(days=day_offset)
try_date_str = try_date.strftime("%Y%m%d")
try:
df = pykrx_stock.get_market_fundamental_by_ticker(try_date_str, market="ALL")
if df is not None and not df.empty:
# Validate expected columns exist
missing = REQUIRED_FUNDAMENTAL_COLS - set(df.columns)
if missing:
logger.warning(
f"Fundamental data for {try_date_str} missing columns: {missing}. "
"KRX may require login."
)
krx_auth_error = True
continue
if day_offset > 0:
logger.info(f"Fell back to {try_date_str} (offset -{day_offset}d)")
return df, try_date_str
except KeyError as e:
if "BPS" in str(e) or "PER" in str(e):
logger.warning(
f"Fundamental fetch for {try_date_str}: column mismatch ({e}). "
"KRX may require login — set KRX_ID/KRX_PW env vars."
)
krx_auth_error = True
else:
logger.warning(f"Fundamental fetch failed for {try_date_str}: {e}")
continue
except (JSONDecodeError, ConnectionError, ValueError) as e:
if isinstance(e, JSONDecodeError):
krx_auth_error = True
logger.warning(f"Fundamental fetch failed for {try_date_str}: {e}")
continue
if krx_auth_error:
raise RuntimeError(
f"KRX fundamental data unavailable for {self.biz_day}: "
"KRX requires login — set KRX_ID and KRX_PW environment variables. "
"Register at https://data.krx.co.kr"
)
logger.error(f"Fundamental fetch failed for {self.biz_day} and 3 previous days")
return pd.DataFrame(), self.biz_day
def collect(self) -> int:
"""Collect valuation data."""
fund_df, effective_biz_day = self._fetch_fundamental_data()
if fund_df.empty:
logger.warning(f"No fundamental data returned for {self.biz_day}")
return 0
base_date = datetime.strptime(effective_biz_day, "%Y%m%d").date()
logger.info(f"Processing {len(fund_df)} valuation records for {effective_biz_day}")
records = []
for ticker in fund_df.index:
per = self._safe_float(fund_df.at[ticker, "PER"])
pbr = self._safe_float(fund_df.at[ticker, "PBR"])
div_yield = self._safe_float(fund_df.at[ticker, "DIV"])
# Skip records where all metrics are None
if all(v is None for v in [per, pbr, div_yield]):
continue
records.append({
"ticker": ticker,
"base_date": base_date,
"per": per,
"pbr": pbr,
"psr": None,
"pcr": None,
"dividend_yield": div_yield,
})
if records:
try:
stmt = insert(Valuation).values(records)
stmt = stmt.on_conflict_do_update(
index_elements=["ticker", "base_date"],
set_={
"per": stmt.excluded.per,
"pbr": stmt.excluded.pbr,
"psr": stmt.excluded.psr,
"pcr": stmt.excluded.pcr,
"dividend_yield": stmt.excluded.dividend_yield,
},
)
self.db.execute(stmt)
self.db.commit()
except Exception as e:
self.db.rollback()
raise RuntimeError(f"Failed to insert valuation data: {e}")
logger.info(f"Collected {len(records)} valuation records")
return len(records)