Some checks are pending
Deploy to Production / deploy (push) Waiting to run
- Upgrade pykrx 1.2.3 → 1.2.6 (KRX login session support) - Add KRX_ID/KRX_PW env vars for KRX authentication - Enhance error handling in all pykrx-dependent collectors - ETFCollector: raise KRXDataError with login hint - ValuationCollector: raise RuntimeError with login hint - StockCollector/PriceCollector/ETFPriceCollector: JSONDecodeError handling - Fix TIGER 200 ticker: 069500 → 102110 in seed data - Rebuild seed_data.py from actual 33 trade records - Add trade_history_raw.csv as source data - Fix pension_allocation recommendation: KODEX 200 → TIGER 200 - Add ticker dropdown to transaction add modal (frontend) - Update .env.example with KRX credentials - All 276 tests passing
153 lines
5.7 KiB
Python
153 lines
5.7 KiB
Python
"""
|
|
Valuation data collector using pykrx.
|
|
"""
|
|
import logging
|
|
from datetime import datetime, timedelta
|
|
from json import JSONDecodeError
|
|
|
|
import pandas as pd
|
|
from pykrx import stock as pykrx_stock
|
|
|
|
from sqlalchemy.orm import Session
|
|
from sqlalchemy.dialects.postgresql import insert
|
|
|
|
from app.services.collectors.base import BaseCollector
|
|
from app.models.stock import Valuation
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
REQUIRED_FUNDAMENTAL_COLS = {"BPS", "PER", "PBR", "EPS", "DIV", "DPS"}
|
|
|
|
|
|
class ValuationCollector(BaseCollector):
|
|
"""Collects valuation metrics (PER, PBR, etc.) using pykrx."""
|
|
|
|
def __init__(self, db: Session, biz_day: str = None):
|
|
super().__init__(db)
|
|
self.biz_day = biz_day or self._get_latest_biz_day()
|
|
self._validate_biz_day()
|
|
|
|
def _validate_biz_day(self) -> None:
|
|
"""Validate business day format."""
|
|
try:
|
|
datetime.strptime(self.biz_day, "%Y%m%d")
|
|
except ValueError:
|
|
raise ValueError(f"Invalid biz_day format. Expected YYYYMMDD, got: {self.biz_day}")
|
|
|
|
def _safe_float(self, value) -> float | None:
|
|
"""Safely convert value to float."""
|
|
if pd.isna(value):
|
|
return None
|
|
try:
|
|
return float(value)
|
|
except (ValueError, TypeError):
|
|
return None
|
|
|
|
def _fetch_fundamental_data(self) -> tuple[pd.DataFrame, str]:
|
|
"""Fetch fundamental data with fallback to previous business days (up to 3 days back).
|
|
|
|
Raises:
|
|
RuntimeError: When KRX returns data without expected columns
|
|
(typically means login is required).
|
|
"""
|
|
target_date = datetime.strptime(self.biz_day, "%Y%m%d")
|
|
krx_auth_error = False
|
|
|
|
for day_offset in range(4): # today + 3 days back
|
|
try_date = target_date - timedelta(days=day_offset)
|
|
try_date_str = try_date.strftime("%Y%m%d")
|
|
try:
|
|
df = pykrx_stock.get_market_fundamental_by_ticker(try_date_str, market="ALL")
|
|
if df is not None and not df.empty:
|
|
# Validate expected columns exist
|
|
missing = REQUIRED_FUNDAMENTAL_COLS - set(df.columns)
|
|
if missing:
|
|
logger.warning(
|
|
f"Fundamental data for {try_date_str} missing columns: {missing}. "
|
|
"KRX may require login."
|
|
)
|
|
krx_auth_error = True
|
|
continue
|
|
if day_offset > 0:
|
|
logger.info(f"Fell back to {try_date_str} (offset -{day_offset}d)")
|
|
return df, try_date_str
|
|
except KeyError as e:
|
|
if "BPS" in str(e) or "PER" in str(e):
|
|
logger.warning(
|
|
f"Fundamental fetch for {try_date_str}: column mismatch ({e}). "
|
|
"KRX may require login — set KRX_ID/KRX_PW env vars."
|
|
)
|
|
krx_auth_error = True
|
|
else:
|
|
logger.warning(f"Fundamental fetch failed for {try_date_str}: {e}")
|
|
continue
|
|
except (JSONDecodeError, ConnectionError, ValueError) as e:
|
|
if isinstance(e, JSONDecodeError):
|
|
krx_auth_error = True
|
|
logger.warning(f"Fundamental fetch failed for {try_date_str}: {e}")
|
|
continue
|
|
|
|
if krx_auth_error:
|
|
raise RuntimeError(
|
|
f"KRX fundamental data unavailable for {self.biz_day}: "
|
|
"KRX requires login — set KRX_ID and KRX_PW environment variables. "
|
|
"Register at https://data.krx.co.kr"
|
|
)
|
|
logger.error(f"Fundamental fetch failed for {self.biz_day} and 3 previous days")
|
|
return pd.DataFrame(), self.biz_day
|
|
|
|
def collect(self) -> int:
|
|
"""Collect valuation data."""
|
|
fund_df, effective_biz_day = self._fetch_fundamental_data()
|
|
|
|
if fund_df.empty:
|
|
logger.warning(f"No fundamental data returned for {self.biz_day}")
|
|
return 0
|
|
|
|
base_date = datetime.strptime(effective_biz_day, "%Y%m%d").date()
|
|
|
|
logger.info(f"Processing {len(fund_df)} valuation records for {effective_biz_day}")
|
|
|
|
records = []
|
|
for ticker in fund_df.index:
|
|
per = self._safe_float(fund_df.at[ticker, "PER"])
|
|
pbr = self._safe_float(fund_df.at[ticker, "PBR"])
|
|
div_yield = self._safe_float(fund_df.at[ticker, "DIV"])
|
|
|
|
# Skip records where all metrics are None
|
|
if all(v is None for v in [per, pbr, div_yield]):
|
|
continue
|
|
|
|
records.append({
|
|
"ticker": ticker,
|
|
"base_date": base_date,
|
|
"per": per,
|
|
"pbr": pbr,
|
|
"psr": None,
|
|
"pcr": None,
|
|
"dividend_yield": div_yield,
|
|
})
|
|
|
|
if records:
|
|
try:
|
|
stmt = insert(Valuation).values(records)
|
|
stmt = stmt.on_conflict_do_update(
|
|
index_elements=["ticker", "base_date"],
|
|
set_={
|
|
"per": stmt.excluded.per,
|
|
"pbr": stmt.excluded.pbr,
|
|
"psr": stmt.excluded.psr,
|
|
"pcr": stmt.excluded.pcr,
|
|
"dividend_yield": stmt.excluded.dividend_yield,
|
|
},
|
|
)
|
|
self.db.execute(stmt)
|
|
self.db.commit()
|
|
except Exception as e:
|
|
self.db.rollback()
|
|
raise RuntimeError(f"Failed to insert valuation data: {e}")
|
|
|
|
logger.info(f"Collected {len(records)} valuation records")
|
|
return len(records)
|