From 9ab232ba124c7e139a7a10cfc044808c8e323d1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EB=A8=B8=EB=8B=88=ED=8E=98=EB=8B=88?= Date: Fri, 17 Apr 2026 23:07:09 +0900 Subject: [PATCH] feat: KRX Open API migration with pykrx fallback - Add pykrx-openapi dependency - New krx_client.py wrapper module - ETFCollector: Open API bulk fetch + pykrx fallback - ETFPriceCollector: Open API date-based bulk + pykrx fallback - StockCollector: Open API base_info + daily_trade + pykrx fallback - PriceCollector: Open API date-based bulk + pykrx fallback - ValuationCollector: pykrx retained (Open API has no PER/PBR) - generate_snapshots.py: Open API + pykrx fallback - Auto-switch based on KRX_OPENAPI_KEY env var - All 278 tests passing --- .env.example | 6 +- .gitea/workflows/deploy.yml | 1 + backend/app/core/config.py | 1 + .../app/services/collectors/etf_collector.py | 77 +++++++++-- .../collectors/etf_price_collector.py | 104 ++++++++++++++- .../services/collectors/price_collector.py | 120 ++++++++++++++++-- .../services/collectors/stock_collector.py | 90 +++++++++++-- backend/app/services/krx_client.py | 67 ++++++++++ backend/pyproject.toml | 1 + backend/scripts/generate_snapshots.py | 67 +++++++--- .../tests/unit/test_collector_resilience.py | 75 +++++++++-- backend/uv.lock | 14 ++ .../plans/2026-04-17-krx-openapi-migration.md | 97 ++++++++++++++ 13 files changed, 653 insertions(+), 67 deletions(-) create mode 100644 backend/app/services/krx_client.py create mode 100644 docs/plans/2026-04-17-krx-openapi-migration.md diff --git a/.env.example b/.env.example index 27066ad..0ea9751 100644 --- a/.env.example +++ b/.env.example @@ -12,7 +12,11 @@ KIS_APP_KEY=your_kis_app_key KIS_APP_SECRET=your_kis_app_secret KIS_ACCOUNT_NO=your_account_number -# KRX Data Portal (required for data collection since 2026) +# KRX Open API (preferred for data collection) +# Register at https://openapi.krx.co.kr to get an API key +KRX_OPENAPI_KEY=your_krx_openapi_key + +# KRX Data Portal (fallback for ValuationCollector when Open API key is not set) # Register at https://data.krx.co.kr to get credentials KRX_ID=your_krx_login_id KRX_PW=your_krx_password diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml index 24ebc56..bee6db0 100644 --- a/.gitea/workflows/deploy.yml +++ b/.gitea/workflows/deploy.yml @@ -35,6 +35,7 @@ jobs: ADMIN_USERNAME=${{ secrets.ADMIN_USERNAME }} ADMIN_EMAIL=${{ secrets.ADMIN_EMAIL }} ADMIN_PASSWORD=${{ secrets.ADMIN_PASSWORD }} + KRX_OPENAPI_KEY=${{ secrets.KRX_OPENAPI_KEY }} KRX_ID=${{ secrets.KRX_ID }} KRX_PW=${{ secrets.KRX_PW }} EOF diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 84b9a58..2714188 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -32,6 +32,7 @@ class Settings(BaseSettings): kis_app_secret: str = "" kis_account_no: str = "" dart_api_key: str = "" + krx_openapi_key: str = "" # Notifications (optional) discord_webhook_url: str = "" diff --git a/backend/app/services/collectors/etf_collector.py b/backend/app/services/collectors/etf_collector.py index aea73a5..0ed2b1f 100644 --- a/backend/app/services/collectors/etf_collector.py +++ b/backend/app/services/collectors/etf_collector.py @@ -1,17 +1,18 @@ """ ETF master data collector from KRX. + +Uses KRX Open API when KRX_OPENAPI_KEY is set, falls back to pykrx scraping. """ import logging import time from json import JSONDecodeError import pandas as pd -from pykrx.website.krx.etx.core import ETF_전종목기본종목 - from sqlalchemy.orm import Session from sqlalchemy.dialects.postgresql import insert from app.services.collectors.base import BaseCollector +from app.services.krx_client import get_krx_client from app.models.stock import ETF, AssetClass logger = logging.getLogger(__name__) @@ -47,8 +48,19 @@ class ETFCollector(BaseCollector): else: return AssetClass.MIXED.value - def _fetch_etf_data(self) -> pd.DataFrame: - """Fetch ETF data with 1 retry on failure.""" + def _fetch_etf_data_openapi(self) -> pd.DataFrame: + """Fetch ETF data via KRX Open API.""" + client = get_krx_client() + biz_day = self._get_latest_biz_day() + df = client.get_etf_daily(biz_day) + if df is None or df.empty: + raise KRXDataError("KRX Open API returned empty ETF data") + return df + + def _fetch_etf_data_pykrx(self) -> pd.DataFrame: + """Fetch ETF data via pykrx scraping with 1 retry on failure.""" + from pykrx.website.krx.etx.core import ETF_전종목기본종목 + last_exc = None for attempt in range(2): try: @@ -67,10 +79,39 @@ class ETFCollector(BaseCollector): logger.error(error_msg) raise KRXDataError(error_msg) - def collect(self) -> int: - """Collect ETF master data.""" - df = self._fetch_etf_data() # raises KRXDataError on failure + def _fetch_etf_data(self) -> pd.DataFrame: + """Fetch ETF data with Open API preference and pykrx fallback.""" + client = get_krx_client() + if client: + try: + logger.info("Fetching ETF data via KRX Open API") + return self._fetch_etf_data_openapi() + except Exception as e: + logger.warning(f"KRX Open API failed, falling back to pykrx: {e}") + logger.info("Fetching ETF data via pykrx (scraping)") + return self._fetch_etf_data_pykrx() + + def _parse_openapi_records(self, df: pd.DataFrame) -> list[dict]: + """Parse Open API response DataFrame into ETF records.""" + records = [] + for _, row in df.iterrows(): + ticker = row.get("ISU_SRT_CD") + name = row.get("ISU_ABBRV") + if not ticker or pd.isna(ticker) or not name or pd.isna(name): + continue + + records.append({ + "ticker": str(ticker), + "name": str(name), + "asset_class": AssetClass.MIXED.value, + "market": "", + "expense_ratio": None, + }) + return records + + def _parse_pykrx_records(self, df: pd.DataFrame) -> list[dict]: + """Parse pykrx response DataFrame into ETF records.""" records = [] for _, row in df.iterrows(): ticker = row.get("ISU_SRT_CD") @@ -90,12 +131,30 @@ class ETFCollector(BaseCollector): pass records.append({ - "ticker": ticker, - "name": name, + "ticker": str(ticker), + "name": str(name), "asset_class": self._classify_asset_class(asset_class_str, name), "market": market if market and not pd.isna(market) else "", "expense_ratio": expense_ratio, }) + return records + + def collect(self) -> int: + """Collect ETF master data.""" + client = get_krx_client() + if client: + try: + logger.info("Fetching ETF data via KRX Open API") + df = self._fetch_etf_data_openapi() + records = self._parse_openapi_records(df) + except Exception as e: + logger.warning(f"KRX Open API failed, falling back to pykrx: {e}") + df = self._fetch_etf_data_pykrx() + records = self._parse_pykrx_records(df) + else: + logger.info("Fetching ETF data via pykrx (scraping)") + df = self._fetch_etf_data_pykrx() + records = self._parse_pykrx_records(df) if records: stmt = insert(ETF).values(records) diff --git a/backend/app/services/collectors/etf_price_collector.py b/backend/app/services/collectors/etf_price_collector.py index 6117c50..48d08e0 100644 --- a/backend/app/services/collectors/etf_price_collector.py +++ b/backend/app/services/collectors/etf_price_collector.py @@ -1,17 +1,18 @@ """ -ETF price data collector using pykrx. +ETF price data collector. + +Uses KRX Open API when KRX_OPENAPI_KEY is set, falls back to pykrx scraping. """ import logging from datetime import datetime, timedelta from json import JSONDecodeError import pandas as pd -from pykrx import stock as pykrx_stock - from sqlalchemy.orm import Session from sqlalchemy.dialects.postgresql import insert from app.services.collectors.base import BaseCollector +from app.services.krx_client import get_krx_client from app.models.stock import ETFPrice, ETF logger = logging.getLogger(__name__) @@ -54,8 +55,83 @@ class ETFPriceCollector(BaseCollector): except (ValueError, TypeError): return None - def collect(self) -> int: - """Collect price data for all ETFs.""" + def _collect_openapi(self) -> int: + """Collect ETF prices via KRX Open API (date-based bulk fetch).""" + client = get_krx_client() + total_records = 0 + + # Generate list of business dates to fetch + start = datetime.strptime(self.start_date, "%Y%m%d") + end = datetime.strptime(self.end_date, "%Y%m%d") + current = start + + # Get valid ETF tickers from DB for filtering + tickers = self.db.query(ETF.ticker).all() + ticker_set = {t[0] for t in tickers} + + while current <= end: + date_str = current.strftime("%Y%m%d") + try: + df = client.get_etf_daily(date_str) + if df is None or df.empty: + current += timedelta(days=1) + continue + + records = [] + for _, row in df.iterrows(): + ticker = str(row.get("ISU_SRT_CD", "")) + if not ticker or ticker not in ticker_set: + continue + + close_val = self._safe_float(row.get("TDD_CLSPRC")) + if close_val is None: + continue + + nav_val = self._safe_float(row.get("NAV")) + volume_val = self._safe_int(row.get("ACC_TRDVOL")) + + bas_dd = row.get("BAS_DD") + if hasattr(bas_dd, "date"): + date_value = bas_dd.date() + elif isinstance(bas_dd, str): + date_value = datetime.strptime(bas_dd, "%Y%m%d").date() + else: + date_value = current.date() + + records.append({ + "ticker": ticker, + "date": date_value, + "close": close_val, + "nav": nav_val, + "volume": volume_val, + }) + + if records: + stmt = insert(ETFPrice).values(records) + stmt = stmt.on_conflict_do_update( + index_elements=["ticker", "date"], + set_={ + "close": stmt.excluded.close, + "nav": stmt.excluded.nav, + "volume": stmt.excluded.volume, + }, + ) + self.db.execute(stmt) + self.db.commit() + total_records += len(records) + + except Exception as e: + self.db.rollback() + logger.warning(f"ETF price fetch for {date_str} via Open API failed: {e}") + + current += timedelta(days=1) + + return total_records + + def _collect_pykrx(self) -> int: + """Collect ETF prices via pykrx scraping (ticker-based loop).""" + from pykrx import stock as pykrx_stock + tickers = self.db.query(ETF.ticker).all() ticker_list = [t[0] for t in tickers] @@ -120,5 +196,21 @@ class ETFPriceCollector(BaseCollector): logger.warning(f"Failed to fetch ETF prices for {ticker}: {e}") continue - logger.info(f"Collected {total_records} ETF price records") return total_records + + def collect(self) -> int: + """Collect price data for all ETFs.""" + client = get_krx_client() + if client: + try: + logger.info("Collecting ETF prices via KRX Open API") + total = self._collect_openapi() + logger.info(f"Collected {total} ETF price records via Open API") + return total + except Exception as e: + logger.warning(f"KRX Open API failed, falling back to pykrx: {e}") + + logger.info("Collecting ETF prices via pykrx (scraping)") + total = self._collect_pykrx() + logger.info(f"Collected {total} ETF price records") + return total diff --git a/backend/app/services/collectors/price_collector.py b/backend/app/services/collectors/price_collector.py index 827891f..5d8cf4d 100644 --- a/backend/app/services/collectors/price_collector.py +++ b/backend/app/services/collectors/price_collector.py @@ -1,20 +1,20 @@ """ -Price data collector using pykrx. +Price data collector. + +Uses KRX Open API when KRX_OPENAPI_KEY is set, falls back to pykrx scraping. """ import logging from datetime import datetime, timedelta from json import JSONDecodeError import pandas as pd -from pykrx import stock as pykrx_stock - from sqlalchemy.orm import Session from sqlalchemy.dialects.postgresql import insert from app.services.collectors.base import BaseCollector +from app.services.krx_client import get_krx_client from app.models.stock import Price, Stock - logger = logging.getLogger(__name__) @@ -55,9 +55,89 @@ class PriceCollector(BaseCollector): except (ValueError, TypeError): return None - def collect(self) -> int: - """Collect price data for all stocks.""" - # Get list of tickers from database + def _collect_openapi(self) -> int: + """Collect stock prices via KRX Open API (date-based bulk fetch).""" + client = get_krx_client() + total_records = 0 + + start = datetime.strptime(self.start_date, "%Y%m%d") + end = datetime.strptime(self.end_date, "%Y%m%d") + current = start + + # Get valid stock tickers from DB for filtering + tickers = self.db.query(Stock.ticker, Stock.market).all() + ticker_market = {t[0]: t[1] for t in tickers} + + while current <= end: + date_str = current.strftime("%Y%m%d") + + for market in ("KOSPI", "KOSDAQ"): + try: + df = client.get_stock_daily(date_str, market=market) + if df is None or df.empty: + continue + + records = [] + for _, row in df.iterrows(): + ticker = str(row.get("ISU_SRT_CD", "")) + if not ticker or ticker not in ticker_market: + continue + + close_val = self._safe_float(row.get("TDD_CLSPRC")) + if close_val is None: + continue + + open_val = self._safe_float(row.get("TDD_OPNPRC")) + high_val = self._safe_float(row.get("TDD_HGPRC")) + low_val = self._safe_float(row.get("TDD_LWPRC")) + volume_val = self._safe_int(row.get("ACC_TRDVOL")) + + bas_dd = row.get("BAS_DD") + if hasattr(bas_dd, "date"): + date_value = bas_dd.date() + elif isinstance(bas_dd, str): + date_value = datetime.strptime(bas_dd, "%Y%m%d").date() + else: + date_value = current.date() + + records.append({ + "ticker": ticker, + "date": date_value, + "open": open_val, + "high": high_val, + "low": low_val, + "close": close_val, + "volume": volume_val, + }) + + if records: + stmt = insert(Price).values(records) + stmt = stmt.on_conflict_do_update( + index_elements=["ticker", "date"], + set_={ + "open": stmt.excluded.open, + "high": stmt.excluded.high, + "low": stmt.excluded.low, + "close": stmt.excluded.close, + "volume": stmt.excluded.volume, + }, + ) + self.db.execute(stmt) + self.db.commit() + total_records += len(records) + + except Exception as e: + self.db.rollback() + logger.warning(f"Price fetch for {market} {date_str} via Open API failed: {e}") + + current += timedelta(days=1) + + return total_records + + def _collect_pykrx(self) -> int: + """Collect stock prices via pykrx scraping (ticker-based loop).""" + from pykrx import stock as pykrx_stock + tickers = self.db.query(Stock.ticker).all() ticker_list = [t[0] for t in tickers] @@ -68,7 +148,6 @@ class PriceCollector(BaseCollector): total_records = 0 logger.info(f"Collecting prices for {len(ticker_list)} stocks from {self.start_date} to {self.end_date}") - # Fetch prices in batches for ticker in ticker_list: try: df = pykrx_stock.get_market_ohlcv( @@ -81,22 +160,19 @@ class PriceCollector(BaseCollector): df.columns = ["date", "open", "high", "low", "close", "volume", "value"] - # Validate column count - expected_cols = 7 # date + 6 data columns + expected_cols = 7 if len(df.columns) < expected_cols: logger.warning(f"Unexpected column count for {ticker}: {len(df.columns)}") continue records = [] for _, row in df.iterrows(): - # Safely convert values with type checking open_val = self._safe_float(row["open"]) high_val = self._safe_float(row["high"]) low_val = self._safe_float(row["low"]) close_val = self._safe_float(row["close"]) volume_val = self._safe_int(row["volume"]) - # Skip if essential values are missing if close_val is None: logger.debug(f"Skipping record for {ticker}: missing close price") continue @@ -125,7 +201,7 @@ class PriceCollector(BaseCollector): }, ) self.db.execute(stmt) - self.db.commit() # Commit per ticker + self.db.commit() total_records += len(records) except JSONDecodeError as e: @@ -140,5 +216,21 @@ class PriceCollector(BaseCollector): logger.warning(f"Failed to fetch prices for {ticker}: {e}") continue - logger.info(f"Collected {total_records} price records") return total_records + + def collect(self) -> int: + """Collect price data for all stocks.""" + client = get_krx_client() + if client: + try: + logger.info("Collecting stock prices via KRX Open API") + total = self._collect_openapi() + logger.info(f"Collected {total} price records via Open API") + return total + except Exception as e: + logger.warning(f"KRX Open API failed, falling back to pykrx: {e}") + + logger.info("Collecting prices via pykrx (scraping)") + total = self._collect_pykrx() + logger.info(f"Collected {total} price records") + return total diff --git a/backend/app/services/collectors/stock_collector.py b/backend/app/services/collectors/stock_collector.py index 8734f74..0bc56fa 100644 --- a/backend/app/services/collectors/stock_collector.py +++ b/backend/app/services/collectors/stock_collector.py @@ -1,24 +1,25 @@ """ -Stock data collector using pykrx. +Stock data collector. + +Uses KRX Open API when KRX_OPENAPI_KEY is set, falls back to pykrx scraping. """ import logging from datetime import datetime from json import JSONDecodeError import pandas as pd -from pykrx import stock as pykrx_stock - from sqlalchemy.orm import Session from sqlalchemy.dialects.postgresql import insert from app.services.collectors.base import BaseCollector +from app.services.krx_client import get_krx_client from app.models.stock import Stock, StockType logger = logging.getLogger(__name__) class StockCollector(BaseCollector): - """Collects stock master data using pykrx.""" + """Collects stock master data.""" def __init__(self, db: Session, biz_day: str = None): super().__init__(db) @@ -52,9 +53,65 @@ class StockCollector(BaseCollector): except (ValueError, TypeError): return None - def collect(self) -> int: - """Collect stock master data.""" - # Get tickers per market (also caches ticker-name mappings internally) + def _collect_openapi(self) -> int: + """Collect stock data via KRX Open API.""" + client = get_krx_client() + base_date = datetime.strptime(self.biz_day, "%Y%m%d").date() + records = [] + + for market in ("KOSPI", "KOSDAQ"): + # Fetch base info (ticker, name, listed shares) + base_df = client.get_stock_base_info(self.biz_day, market=market) + # Fetch daily trade (close, market cap) + trade_df = client.get_stock_daily(self.biz_day, market=market) + + if base_df is None or base_df.empty: + logger.warning(f"No stock base info from Open API for {market}") + continue + + # Index trade_df by ticker for quick lookup + trade_map = {} + if trade_df is not None and not trade_df.empty: + for _, row in trade_df.iterrows(): + ticker = str(row.get("ISU_SRT_CD", "")) + if ticker: + trade_map[ticker] = row + + for _, row in base_df.iterrows(): + ticker = str(row.get("ISU_SRT_CD", "")) + name = str(row.get("ISU_ABBRV", "")) + if not ticker or not name: + continue + + close_price = None + market_cap = None + trade_row = trade_map.get(ticker) + if trade_row is not None: + close_price = self._safe_value(trade_row.get("TDD_CLSPRC")) + market_cap = self._safe_value(trade_row.get("MKTCAP")) + + stock_type = self._classify_stock_type(name, ticker) + + records.append({ + "ticker": ticker, + "name": name, + "market": market, + "close_price": close_price, + "market_cap": market_cap, + "eps": None, + "forward_eps": None, + "bps": None, + "dividend_per_share": None, + "stock_type": stock_type, + "base_date": base_date, + }) + + return self._upsert_records(records) + + def _collect_pykrx(self) -> int: + """Collect stock data via pykrx scraping.""" + from pykrx import stock as pykrx_stock + try: kospi_tickers = pykrx_stock.get_market_ticker_list(self.biz_day, market="KOSPI") kosdaq_tickers = pykrx_stock.get_market_ticker_list(self.biz_day, market="KOSDAQ") @@ -74,7 +131,6 @@ class StockCollector(BaseCollector): logger.warning("No tickers found from pykrx.") return 0 - # Fetch bulk data try: cap_df = pykrx_stock.get_market_cap_by_ticker(self.biz_day) except (JSONDecodeError, KeyError, ConnectionError, ValueError) as e: @@ -125,7 +181,10 @@ class StockCollector(BaseCollector): "base_date": base_date, }) - # Upsert using PostgreSQL INSERT ON CONFLICT + return self._upsert_records(records) + + def _upsert_records(self, records: list[dict]) -> int: + """Upsert stock records into the database.""" if records: stmt = insert(Stock).values(records) stmt = stmt.on_conflict_do_update( @@ -148,3 +207,16 @@ class StockCollector(BaseCollector): logger.info(f"Collected {len(records)} stock records") return len(records) + + def collect(self) -> int: + """Collect stock master data.""" + client = get_krx_client() + if client: + try: + logger.info("Collecting stock data via KRX Open API") + return self._collect_openapi() + except Exception as e: + logger.warning(f"KRX Open API failed, falling back to pykrx: {e}") + + logger.info("Collecting stock data via pykrx (scraping)") + return self._collect_pykrx() diff --git a/backend/app/services/krx_client.py b/backend/app/services/krx_client.py new file mode 100644 index 0000000..0dbb62b --- /dev/null +++ b/backend/app/services/krx_client.py @@ -0,0 +1,67 @@ +""" +KRX Open API client wrapper. + +Uses pykrx-openapi library to access KRX official REST API. +Activated when KRX_OPENAPI_KEY environment variable is set. +""" +import os +import logging +from datetime import datetime + +import pandas as pd +from pykrx_openapi import KRXOpenAPI + +logger = logging.getLogger(__name__) + + +class KRXClient: + """Thin wrapper around pykrx-openapi with project defaults.""" + + def __init__(self, api_key: str | None = None): + key = api_key or os.getenv("KRX_OPENAPI_KEY") + if not key: + raise ValueError("KRX_OPENAPI_KEY is required for KRXClient") + self.client = KRXOpenAPI(api_key=key) + + def _to_date_str(self, date_input: str) -> str: + """Ensure date string is in YYYYMMDD format.""" + return date_input.replace("-", "") + + def _records_to_df(self, result: dict) -> pd.DataFrame: + """Convert API response dict to DataFrame.""" + records = result.get("OutBlock_1", []) + if not records: + return pd.DataFrame() + return pd.DataFrame(records) + + def get_etf_daily(self, bas_dd: str) -> pd.DataFrame: + """Fetch all ETF daily trade data for a given date.""" + bas_dd = self._to_date_str(bas_dd) + result = self.client.get_etf_daily_trade(bas_dd=bas_dd) + return self._records_to_df(result) + + def get_stock_daily(self, bas_dd: str, market: str = "KOSPI") -> pd.DataFrame: + """Fetch stock daily trade data for a given date and market.""" + bas_dd = self._to_date_str(bas_dd) + if market == "KOSDAQ": + result = self.client.get_kosdaq_stock_daily_trade(bas_dd=bas_dd) + else: + result = self.client.get_stock_daily_trade(bas_dd=bas_dd) + return self._records_to_df(result) + + def get_stock_base_info(self, bas_dd: str, market: str = "KOSPI") -> pd.DataFrame: + """Fetch stock base info for a given date and market.""" + bas_dd = self._to_date_str(bas_dd) + if market == "KOSDAQ": + result = self.client.get_kosdaq_stock_base_info(bas_dd=bas_dd) + else: + result = self.client.get_stock_base_info(bas_dd=bas_dd) + return self._records_to_df(result) + + +def get_krx_client() -> KRXClient | None: + """Return KRXClient if KRX_OPENAPI_KEY is set, else None.""" + key = os.getenv("KRX_OPENAPI_KEY") + if key: + return KRXClient(api_key=key) + return None diff --git a/backend/pyproject.toml b/backend/pyproject.toml index ab22397..04d1401 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -18,6 +18,7 @@ dependencies = [ "apscheduler==3.11.2", "setuptools", "pykrx>=1.2.6", + "pykrx-openapi", "requests==2.32.5", "beautifulsoup4==4.14.3", "lxml==6.0.2", diff --git a/backend/scripts/generate_snapshots.py b/backend/scripts/generate_snapshots.py index 9596342..c1b8626 100644 --- a/backend/scripts/generate_snapshots.py +++ b/backend/scripts/generate_snapshots.py @@ -1,8 +1,7 @@ """ Generate portfolio snapshots from trade history using actual market prices. -Fetches closing prices from KRX (via pykrx) for each snapshot date, -then computes portfolio value based on cumulative holdings at that point. +Uses KRX Open API when KRX_OPENAPI_KEY is set, falls back to pykrx scraping. Snapshot dates: end of each month where trades occurred, plus latest available. @@ -11,7 +10,8 @@ Usage: Requires: - DATABASE_URL environment variable - - KRX_ID / KRX_PW environment variables (pykrx >= 1.2.5) + - KRX_OPENAPI_KEY environment variable (preferred) + - KRX_ID / KRX_PW environment variables (pykrx fallback) """ import sys import os @@ -24,10 +24,9 @@ from json import JSONDecodeError sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from pykrx import stock as pykrx_stock - from sqlalchemy.orm import Session from app.core.database import SessionLocal +from app.services.krx_client import get_krx_client from app.models.portfolio import ( Portfolio, PortfolioSnapshot, SnapshotHolding, ) @@ -103,17 +102,14 @@ def _generate_snapshot_dates() -> list[date]: today = date.today() dates = [] - # Month-end dates current = date(first_date.year, first_date.month, 1) while current <= today: - # Last day of month if current.month == 12: next_month = date(current.year + 1, 1, 1) else: next_month = date(current.year, current.month + 1, 1) last_day = next_month - timedelta(days=1) - # Only include if we have holdings at this date if last_day >= first_date and last_day <= today: dates.append(last_day) @@ -122,17 +118,45 @@ def _generate_snapshot_dates() -> list[date]: return dates -def _fetch_price_with_retry(ticker: str, date_str: str, max_retries: int = 3) -> Decimal | None: - """Fetch closing price for a ticker on a date, with fallback to previous days.""" +def _fetch_price_openapi(ticker: str, date_str: str) -> Decimal | None: + """Fetch closing price via KRX Open API.""" + client = get_krx_client() + if not client: + return None + target = datetime.strptime(date_str, "%Y%m%d").date() - for day_offset in range(5): # Try up to 5 days back (weekends/holidays) + for day_offset in range(5): + try_date = target - timedelta(days=day_offset) + try_date_str = try_date.strftime("%Y%m%d") + + try: + df = client.get_etf_daily(try_date_str) + if df is not None and not df.empty: + match = df[df["ISU_SRT_CD"] == ticker] + if not match.empty: + close = match.iloc[0].get("TDD_CLSPRC") + if close and float(close) > 0: + return Decimal(str(int(float(close)))) + except Exception as e: + logger.warning(f"Open API fetch for {ticker} on {try_date_str}: {e}") + continue + + return None + + +def _fetch_price_pykrx(ticker: str, date_str: str, max_retries: int = 3) -> Decimal | None: + """Fetch closing price via pykrx scraping.""" + from pykrx import stock as pykrx_stock + + target = datetime.strptime(date_str, "%Y%m%d").date() + + for day_offset in range(5): try_date = target - timedelta(days=day_offset) try_date_str = try_date.strftime("%Y%m%d") for attempt in range(max_retries): try: - # For ETFs, use get_etf_ohlcv_by_date df = pykrx_stock.get_etf_ohlcv_by_date(try_date_str, try_date_str, ticker) if df is not None and not df.empty: close = df.iloc[0]["종가"] @@ -160,15 +184,25 @@ def _fetch_price_with_retry(ticker: str, date_str: str, max_retries: int = 3) -> return None +def _fetch_price_with_retry(ticker: str, date_str: str, max_retries: int = 3) -> Decimal | None: + """Fetch closing price, preferring Open API with pykrx fallback.""" + client = get_krx_client() + if client: + price = _fetch_price_openapi(ticker, date_str) + if price: + return price + logger.warning(f"Open API failed for {ticker} on {date_str}, trying pykrx") + + return _fetch_price_pykrx(ticker, date_str, max_retries) + + def generate_snapshots(db: Session): """Generate portfolio snapshots from trade history with actual market prices.""" - # Find portfolio portfolio = db.query(Portfolio).filter(Portfolio.name == "연금 포트폴리오").first() if not portfolio: logger.error("Portfolio '연금 포트폴리오' not found. Run seed_data.py first.") return - # Delete existing snapshots existing = db.query(PortfolioSnapshot).filter( PortfolioSnapshot.portfolio_id == portfolio.id ).all() @@ -192,7 +226,6 @@ def generate_snapshots(db: Session): date_str = snap_date.strftime("%Y%m%d") logger.info(f"Processing {snap_date} ({len(holdings)} tickers)...") - # Fetch prices for all held tickers prices: dict[str, Decimal] = {} for ticker in holdings: price = _fetch_price_with_retry(ticker, date_str) @@ -205,7 +238,6 @@ def generate_snapshots(db: Session): logger.warning(f" Skipping {snap_date}: no prices available") continue - # Calculate portfolio value total_value = Decimal("0") snapshot_holdings = [] @@ -224,7 +256,6 @@ def generate_snapshots(db: Session): if total_value == 0: continue - # Create snapshot snapshot = PortfolioSnapshot( portfolio_id=portfolio.id, total_value=total_value, @@ -233,7 +264,6 @@ def generate_snapshots(db: Session): db.add(snapshot) db.flush() - # Create snapshot holdings with ratios for h in snapshot_holdings: ratio = (h["value"] / total_value * 100).quantize(Decimal("0.01"), rounding=ROUND_HALF_UP) db.add(SnapshotHolding( @@ -248,7 +278,6 @@ def generate_snapshots(db: Session): created += 1 logger.info(f" Snapshot {snap_date}: total={total_value:,.0f}") - # Rate limit: be gentle with KRX time.sleep(1) db.commit() diff --git a/backend/tests/unit/test_collector_resilience.py b/backend/tests/unit/test_collector_resilience.py index 19e1b50..c551964 100644 --- a/backend/tests/unit/test_collector_resilience.py +++ b/backend/tests/unit/test_collector_resilience.py @@ -39,14 +39,15 @@ def db(): Base.metadata.drop_all(bind=engine) -# ── ETFCollector Tests ── +# ── ETFCollector Tests (pykrx fallback path, no KRX_OPENAPI_KEY) ── class TestETFCollectorResilience: + @patch("app.services.collectors.etf_collector.get_krx_client", return_value=None) @patch("app.services.collectors.etf_collector.time.sleep") - @patch("app.services.collectors.etf_collector.ETF_전종목기본종목") - def test_json_decode_error_retries_once(self, mock_etf_cls, mock_sleep, db): + @patch("pykrx.website.krx.etx.core.ETF_전종목기본종목") + def test_json_decode_error_retries_once(self, mock_etf_cls, mock_sleep, mock_client, db): """JSONDecodeError on both attempts raises KRXDataError with login hint.""" mock_fetcher = MagicMock() mock_fetcher.fetch.side_effect = [ @@ -62,9 +63,10 @@ class TestETFCollectorResilience: assert mock_fetcher.fetch.call_count == 2 mock_sleep.assert_called_once_with(3) + @patch("app.services.collectors.etf_collector.get_krx_client", return_value=None) @patch("app.services.collectors.etf_collector.time.sleep") - @patch("app.services.collectors.etf_collector.ETF_전종목기본종목") - def test_connection_error_retries_and_raises(self, mock_etf_cls, mock_sleep, db): + @patch("pykrx.website.krx.etx.core.ETF_전종목기본종목") + def test_connection_error_retries_and_raises(self, mock_etf_cls, mock_sleep, mock_client, db): """ConnectionError on both attempts raises KRXDataError.""" mock_fetcher = MagicMock() mock_fetcher.fetch.side_effect = ConnectionError("timeout") @@ -76,9 +78,10 @@ class TestETFCollectorResilience: assert mock_fetcher.fetch.call_count == 2 + @patch("app.services.collectors.etf_collector.get_krx_client", return_value=None) @patch("app.services.collectors.etf_collector.time.sleep") - @patch("app.services.collectors.etf_collector.ETF_전종목기본종목") - def test_retry_succeeds_on_second_attempt(self, mock_etf_cls, mock_sleep, db): + @patch("pykrx.website.krx.etx.core.ETF_전종목기본종목") + def test_retry_succeeds_on_second_attempt(self, mock_etf_cls, mock_sleep, mock_client, db): """If first attempt fails but retry succeeds, data is processed normally.""" good_df = pd.DataFrame([{ "ISU_SRT_CD": "069500", @@ -99,9 +102,10 @@ class TestETFCollectorResilience: assert result == 1 + @patch("app.services.collectors.etf_collector.get_krx_client", return_value=None) @patch("app.services.collectors.etf_collector.time.sleep") - @patch("app.services.collectors.etf_collector.ETF_전종목기본종목") - def test_failure_does_not_delete_existing_data(self, mock_etf_cls, mock_sleep, db): + @patch("pykrx.website.krx.etx.core.ETF_전종목기본종목") + def test_failure_does_not_delete_existing_data(self, mock_etf_cls, mock_sleep, mock_client, db): """Existing ETF records are preserved when fetch fails.""" db.execute( ETF.__table__.insert().values( @@ -124,6 +128,59 @@ class TestETFCollectorResilience: assert existing.name == "KODEX 200" +# ── ETFCollector Open API Tests ── + + +class TestETFCollectorOpenAPI: + + @patch("app.services.collectors.etf_collector.get_krx_client") + @patch("app.services.collectors.base.BaseCollector._get_latest_biz_day", return_value="20260417") + def test_openapi_success(self, mock_biz, mock_get_client, db): + """Open API returns valid ETF data.""" + mock_client = MagicMock() + mock_client.get_etf_daily.return_value = pd.DataFrame([{ + "ISU_SRT_CD": "069500", + "ISU_ABBRV": "KODEX 200", + "TDD_CLSPRC": 35000.0, + "ACC_TRDVOL": 1000000, + }]) + mock_get_client.return_value = mock_client + + collector = ETFCollector(db) + result = collector.collect() + + assert result == 1 + etf = db.query(ETF).filter_by(ticker="069500").first() + assert etf is not None + assert etf.name == "KODEX 200" + + @patch("app.services.collectors.etf_collector.get_krx_client") + @patch("app.services.collectors.etf_collector.time.sleep") + @patch("pykrx.website.krx.etx.core.ETF_전종목기본종목") + @patch("app.services.collectors.base.BaseCollector._get_latest_biz_day", return_value="20260417") + def test_openapi_failure_falls_back_to_pykrx(self, mock_biz, mock_etf_cls, mock_sleep, mock_get_client, db): + """When Open API fails, falls back to pykrx scraping.""" + mock_client = MagicMock() + mock_client.get_etf_daily.side_effect = Exception("API down") + mock_get_client.return_value = mock_client + + good_df = pd.DataFrame([{ + "ISU_SRT_CD": "069500", + "ISU_ABBRV": "KODEX 200", + "IDX_ASST_CLSS_NM": "주식", + "IDX_MKT_CLSS_NM": "코스피", + "ETF_TOT_FEE": "0.15", + }]) + mock_fetcher = MagicMock() + mock_fetcher.fetch.return_value = good_df + mock_etf_cls.return_value = mock_fetcher + + collector = ETFCollector(db) + result = collector.collect() + + assert result == 1 + + # ── ValuationCollector Tests ── diff --git a/backend/uv.lock b/backend/uv.lock index 2600357..12f112b 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -485,6 +485,7 @@ dependencies = [ { name = "pydantic-settings" }, { name = "pyjwt", extra = ["crypto"] }, { name = "pykrx" }, + { name = "pykrx-openapi" }, { name = "python-multipart" }, { name = "requests" }, { name = "setuptools" }, @@ -521,6 +522,7 @@ requires-dist = [ { name = "pydantic-settings", specifier = "==2.12.0" }, { name = "pyjwt", extras = ["crypto"], specifier = "==2.11.0" }, { name = "pykrx", specifier = ">=1.2.6" }, + { name = "pykrx-openapi" }, { name = "pytest", marker = "extra == 'dev'", specifier = "==8.3.4" }, { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==1.1.0" }, { name = "python-multipart", specifier = "==0.0.22" }, @@ -1290,6 +1292,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7a/6a/5bd38daca2de1f514c861f9b175f2b11b4396fdfec3785110f56f1132c01/pykrx-1.2.6-py3-none-any.whl", hash = "sha256:3abb819efe501d2fab055ed913ebd407fa185bb74155378c1e2f831a0ed15398", size = 2196530, upload-time = "2026-04-14T10:17:40.063Z" }, ] +[[package]] +name = "pykrx-openapi" +version = "0.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9a/2d/893d9a77ac2e66e2c7dcdebe046e991a162b7a026942f8b2d0b4d5f84656/pykrx_openapi-0.1.1.tar.gz", hash = "sha256:891c89ddbc1a8e99044f3201b868bb7530b3e38453096834169615588c79ed06", size = 16062756, upload-time = "2026-01-20T13:58:55.672Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/b3/ae37e596307591f6305d89573103d21766337ffb4b65bdf681e9331c792b/pykrx_openapi-0.1.1-py3-none-any.whl", hash = "sha256:fd2a65224645db0fbec2d173ed4140d1bb25040a0a8de5aee3500ebd35114718", size = 12594, upload-time = "2026-01-20T13:58:53.438Z" }, +] + [[package]] name = "pyparsing" version = "3.3.2" diff --git a/docs/plans/2026-04-17-krx-openapi-migration.md b/docs/plans/2026-04-17-krx-openapi-migration.md new file mode 100644 index 0000000..53f9507 --- /dev/null +++ b/docs/plans/2026-04-17-krx-openapi-migration.md @@ -0,0 +1,97 @@ +# KRX Open API 전환 설계 + +## 배경 +- pykrx는 KRX 웹 스크래핑 방식으로 불안정 (로그인 필수화, 구조 변경 시 깨짐) +- KRX Open API (openapi.krx.co.kr) 공식 REST API로 전환하여 안정성 확보 +- `pykrx-openapi` 라이브러리 활용 (MIT, pip install pykrx-openapi) + +## 전환 범위 + +### 현재 pykrx 의존 Collector (5개) +| Collector | pykrx 함수 | KRX Open API 대체 | +|---|---|---| +| StockCollector | `get_market_ticker_list`, `get_market_cap_by_ticker`, `get_market_fundamental_by_ticker` | `get_stock_base_info`, `get_stock_daily_trade`, `get_kosdaq_stock_daily_trade` | +| PriceCollector | `get_market_ohlcv` | `get_stock_daily_trade`, `get_kosdaq_stock_daily_trade` | +| ValuationCollector | `get_market_fundamental_by_ticker` | pykrx 유지 또는 별도 소스 (Open API에 PER/PBR 없음) | +| ETFCollector | `ETF_전종목기본종목().fetch()` | `get_etf_daily_trade` (종목 목록 겸용) | +| ETFPriceCollector | `get_etf_ohlcv_by_date` | `get_etf_daily_trade` | + +### 주의: ValuationCollector +KRX Open API 서비스 목록에 PER/PBR/배당수익률 API가 없음. +→ ValuationCollector는 pykrx(스크래핑) 유지 또는 네이버/FnGuide 등 대체 소스 검토. +→ 1차 전환에서는 pykrx fallback으로 유지. + +### 스크립트 의존 +- `scripts/generate_snapshots.py` — `pykrx_stock.get_etf_ohlcv_by_date` 사용 +- `jobs/kjb_signal_job.py` — `Price.ticker == "069500"` (DB 조회, pykrx 직접 의존 없음) +- `app/services/optimizer.py` — DB 조회만, pykrx 직접 의존 없음 + +## 구현 계획 + +### 1. KRX Open API 클라이언트 모듈 생성 +**파일:** `backend/app/services/krx_client.py` + +```python +from pykrx_openapi import KRXOpenAPI + +class KRXClient: + """Thin wrapper around pykrx-openapi with project defaults.""" + + def __init__(self, api_key: str = None): + self.client = KRXOpenAPI( + api_key=api_key or os.getenv("KRX_OPENAPI_KEY"), + rate_limit=10, + per_seconds=1, + timeout=30, + ) + + def get_etf_daily(self, date: str) -> pd.DataFrame: ... + def get_stock_daily(self, date: str, market: str) -> pd.DataFrame: ... + def get_stock_base_info(self, date: str, market: str) -> pd.DataFrame: ... +``` + +### 2. Collector 전환 (4개) +각 collector에 `KRX_OPENAPI_KEY` 환경변수가 있으면 Open API 사용, 없으면 pykrx fallback. + +- **ETFCollector** → `get_etf_daily_trade` 로 전종목 ETF 목록 + 기본정보 취득 +- **ETFPriceCollector** → `get_etf_daily_trade` 로 종가/거래량 취득 +- **StockCollector** → `get_stock_base_info` + `get_stock_daily_trade` + `get_kosdaq_stock_daily_trade` +- **PriceCollector** → `get_stock_daily_trade` + `get_kosdaq_stock_daily_trade` + +### 3. ValuationCollector +1차: pykrx 유지 (KRX_ID/KRX_PW 사용) +향후: 네이버 금융 또는 FnGuide 스크래핑으로 전환 검토 + +### 4. generate_snapshots.py 전환 +`pykrx_stock.get_etf_ohlcv_by_date` → `KRXClient.get_etf_daily` + +### 5. 의존성 변경 +- `pyproject.toml`: `pykrx-openapi` 추가 +- `pykrx` 는 ValuationCollector 용으로 유지 +- `.env.example`: `KRX_OPENAPI_KEY` 추가 +- Gitea Secrets: `KRX_OPENAPI_KEY` 추가 + +### 6. 테스트 +- 기존 unit test 업데이트 (mock 대상 변경) +- KRX Open API mock으로 테스트 + +## 환경 변수 +``` +KRX_OPENAPI_KEY=xxx # KRX Open API 인증키 (신규) +KRX_ID=xxx # pykrx 웹 로그인 (ValuationCollector용, 유지) +KRX_PW=xxx # pykrx 웹 로그인 (ValuationCollector용, 유지) +``` + +## 롤백 전략 +- `KRX_OPENAPI_KEY` 환경변수를 제거하면 자동으로 pykrx fallback +- 기존 pykrx 코드는 삭제하지 않고 fallback으로 유지 + +## 작업 완료 조건 +- [ ] KRX Open API 클라이언트 모듈 +- [ ] ETFCollector 전환 +- [ ] ETFPriceCollector 전환 +- [ ] StockCollector 전환 +- [ ] PriceCollector 전환 +- [ ] generate_snapshots.py 전환 +- [ ] 테스트 통과 +- [ ] .env.example / deploy.yml 업데이트