From 3e723b61461e904b7a0c3fdbe720b2d54033f8ab Mon Sep 17 00:00:00 2001 From: zephyrdark Date: Mon, 2 Feb 2026 23:48:08 +0900 Subject: [PATCH] feat: add valuation data collector Add ValuationCollector class that fetches PER, PBR, and dividend yield data from KRX for all listed stocks. Includes business day validation, safe float conversion, and upsert logic for the valuations table. Co-Authored-By: Claude Opus 4.5 --- backend/app/services/collectors/__init__.py | 9 +- .../collectors/valuation_collector.py | 126 ++++++++++++++++++ 2 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 backend/app/services/collectors/valuation_collector.py diff --git a/backend/app/services/collectors/__init__.py b/backend/app/services/collectors/__init__.py index 82606ce..4db73b6 100644 --- a/backend/app/services/collectors/__init__.py +++ b/backend/app/services/collectors/__init__.py @@ -2,5 +2,12 @@ from app.services.collectors.base import BaseCollector from app.services.collectors.stock_collector import StockCollector from app.services.collectors.sector_collector import SectorCollector from app.services.collectors.price_collector import PriceCollector +from app.services.collectors.valuation_collector import ValuationCollector -__all__ = ["BaseCollector", "StockCollector", "SectorCollector", "PriceCollector"] +__all__ = [ + "BaseCollector", + "StockCollector", + "SectorCollector", + "PriceCollector", + "ValuationCollector", +] diff --git a/backend/app/services/collectors/valuation_collector.py b/backend/app/services/collectors/valuation_collector.py new file mode 100644 index 0000000..b09ca56 --- /dev/null +++ b/backend/app/services/collectors/valuation_collector.py @@ -0,0 +1,126 @@ +""" +Valuation data collector from KRX. +""" +import logging +from io import BytesIO +from datetime import datetime + +import pandas as pd +import requests + +from sqlalchemy.orm import Session +from sqlalchemy.dialects.postgresql import insert + +from app.services.collectors.base import BaseCollector +from app.models.stock import Valuation + + +logger = logging.getLogger(__name__) + + +class ValuationCollector(BaseCollector): + """Collects valuation metrics (PER, PBR, etc.) from KRX.""" + + GEN_OTP_URL = "http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd" + DOWN_URL = "http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd" + HEADERS = { + "Referer": "http://data.krx.co.kr/contents/MDC/MDI/mdiLoader", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + } + REQUEST_TIMEOUT = 10 + + def __init__(self, db: Session, biz_day: str = None): + super().__init__(db) + self.biz_day = biz_day or datetime.now().strftime("%Y%m%d") + self._validate_biz_day() + + def _validate_biz_day(self) -> None: + """Validate business day format.""" + try: + datetime.strptime(self.biz_day, "%Y%m%d") + except ValueError: + raise ValueError(f"Invalid biz_day format. Expected YYYYMMDD, got: {self.biz_day}") + + def _safe_float(self, value) -> float | None: + """Safely convert value to float.""" + if pd.isna(value): + return None + try: + return float(value) + except (ValueError, TypeError): + return None + + def collect(self) -> int: + """Collect valuation data.""" + gen_otp_data = { + "locale": "ko_KR", + "searchType": "1", + "mktId": "ALL", + "trdDd": self.biz_day, + "csvxls_isNo": "false", + "name": "fileDown", + "url": "dbms/MDC/STAT/standard/MDCSTAT03501", + } + + try: + otp = requests.post( + self.GEN_OTP_URL, + data=gen_otp_data, + headers=self.HEADERS, + timeout=self.REQUEST_TIMEOUT + ) + otp.raise_for_status() + + response = requests.post( + self.DOWN_URL, + data={"code": otp.text}, + headers=self.HEADERS, + timeout=self.REQUEST_TIMEOUT + ) + response.raise_for_status() + except requests.RequestException as e: + raise RuntimeError(f"Failed to fetch valuation data: {e}") + + df = pd.read_csv(BytesIO(response.content), encoding="EUC-KR") + df.columns = df.columns.str.replace(" ", "") + base_date = datetime.strptime(self.biz_day, "%Y%m%d").date() + + logger.info(f"Processing {len(df)} valuation records for {self.biz_day}") + + records = [] + for _, row in df.iterrows(): + ticker = row.get("종목코드") + if not ticker or pd.isna(ticker): + continue + + records.append({ + "ticker": ticker, + "base_date": base_date, + "per": self._safe_float(row.get("PER")), + "pbr": self._safe_float(row.get("PBR")), + "psr": None, # Not available from this endpoint + "pcr": None, # Not available from this endpoint + "dividend_yield": self._safe_float(row.get("배당수익률")), + }) + + if records: + try: + stmt = insert(Valuation).values(records) + stmt = stmt.on_conflict_do_update( + index_elements=["ticker", "base_date"], + set_={ + "per": stmt.excluded.per, + "pbr": stmt.excluded.pbr, + "psr": stmt.excluded.psr, + "pcr": stmt.excluded.pcr, + "dividend_yield": stmt.excluded.dividend_yield, + }, + ) + self.db.execute(stmt) + self.db.commit() + except Exception as e: + self.db.rollback() + raise RuntimeError(f"Failed to insert valuation data: {e}") + + logger.info(f"Collected {len(records)} valuation records") + return len(records)