feat: KRX Open API migration with pykrx fallback

- Add pykrx-openapi dependency
- New krx_client.py wrapper module
- ETFCollector: Open API bulk fetch + pykrx fallback
- ETFPriceCollector: Open API date-based bulk + pykrx fallback
- StockCollector: Open API base_info + daily_trade + pykrx fallback
- PriceCollector: Open API date-based bulk + pykrx fallback
- ValuationCollector: pykrx retained (Open API has no PER/PBR)
- generate_snapshots.py: Open API + pykrx fallback
- Auto-switch based on KRX_OPENAPI_KEY env var
- All 278 tests passing
This commit is contained in:
머니페니 2026-04-17 23:07:09 +09:00
parent 5009168246
commit 9ab232ba12
13 changed files with 653 additions and 67 deletions

View File

@ -12,7 +12,11 @@ KIS_APP_KEY=your_kis_app_key
KIS_APP_SECRET=your_kis_app_secret
KIS_ACCOUNT_NO=your_account_number
# KRX Data Portal (required for data collection since 2026)
# KRX Open API (preferred for data collection)
# Register at https://openapi.krx.co.kr to get an API key
KRX_OPENAPI_KEY=your_krx_openapi_key
# KRX Data Portal (fallback for ValuationCollector when Open API key is not set)
# Register at https://data.krx.co.kr to get credentials
KRX_ID=your_krx_login_id
KRX_PW=your_krx_password

View File

@ -35,6 +35,7 @@ jobs:
ADMIN_USERNAME=${{ secrets.ADMIN_USERNAME }}
ADMIN_EMAIL=${{ secrets.ADMIN_EMAIL }}
ADMIN_PASSWORD=${{ secrets.ADMIN_PASSWORD }}
KRX_OPENAPI_KEY=${{ secrets.KRX_OPENAPI_KEY }}
KRX_ID=${{ secrets.KRX_ID }}
KRX_PW=${{ secrets.KRX_PW }}
EOF

View File

@ -32,6 +32,7 @@ class Settings(BaseSettings):
kis_app_secret: str = ""
kis_account_no: str = ""
dart_api_key: str = ""
krx_openapi_key: str = ""
# Notifications (optional)
discord_webhook_url: str = ""

View File

@ -1,17 +1,18 @@
"""
ETF master data collector from KRX.
Uses KRX Open API when KRX_OPENAPI_KEY is set, falls back to pykrx scraping.
"""
import logging
import time
from json import JSONDecodeError
import pandas as pd
from pykrx.website.krx.etx.core import ETF_전종목기본종목
from sqlalchemy.orm import Session
from sqlalchemy.dialects.postgresql import insert
from app.services.collectors.base import BaseCollector
from app.services.krx_client import get_krx_client
from app.models.stock import ETF, AssetClass
logger = logging.getLogger(__name__)
@ -47,8 +48,19 @@ class ETFCollector(BaseCollector):
else:
return AssetClass.MIXED.value
def _fetch_etf_data(self) -> pd.DataFrame:
"""Fetch ETF data with 1 retry on failure."""
def _fetch_etf_data_openapi(self) -> pd.DataFrame:
"""Fetch ETF data via KRX Open API."""
client = get_krx_client()
biz_day = self._get_latest_biz_day()
df = client.get_etf_daily(biz_day)
if df is None or df.empty:
raise KRXDataError("KRX Open API returned empty ETF data")
return df
def _fetch_etf_data_pykrx(self) -> pd.DataFrame:
"""Fetch ETF data via pykrx scraping with 1 retry on failure."""
from pykrx.website.krx.etx.core import ETF_전종목기본종목
last_exc = None
for attempt in range(2):
try:
@ -67,10 +79,39 @@ class ETFCollector(BaseCollector):
logger.error(error_msg)
raise KRXDataError(error_msg)
def collect(self) -> int:
"""Collect ETF master data."""
df = self._fetch_etf_data() # raises KRXDataError on failure
def _fetch_etf_data(self) -> pd.DataFrame:
"""Fetch ETF data with Open API preference and pykrx fallback."""
client = get_krx_client()
if client:
try:
logger.info("Fetching ETF data via KRX Open API")
return self._fetch_etf_data_openapi()
except Exception as e:
logger.warning(f"KRX Open API failed, falling back to pykrx: {e}")
logger.info("Fetching ETF data via pykrx (scraping)")
return self._fetch_etf_data_pykrx()
def _parse_openapi_records(self, df: pd.DataFrame) -> list[dict]:
"""Parse Open API response DataFrame into ETF records."""
records = []
for _, row in df.iterrows():
ticker = row.get("ISU_SRT_CD")
name = row.get("ISU_ABBRV")
if not ticker or pd.isna(ticker) or not name or pd.isna(name):
continue
records.append({
"ticker": str(ticker),
"name": str(name),
"asset_class": AssetClass.MIXED.value,
"market": "",
"expense_ratio": None,
})
return records
def _parse_pykrx_records(self, df: pd.DataFrame) -> list[dict]:
"""Parse pykrx response DataFrame into ETF records."""
records = []
for _, row in df.iterrows():
ticker = row.get("ISU_SRT_CD")
@ -90,12 +131,30 @@ class ETFCollector(BaseCollector):
pass
records.append({
"ticker": ticker,
"name": name,
"ticker": str(ticker),
"name": str(name),
"asset_class": self._classify_asset_class(asset_class_str, name),
"market": market if market and not pd.isna(market) else "",
"expense_ratio": expense_ratio,
})
return records
def collect(self) -> int:
"""Collect ETF master data."""
client = get_krx_client()
if client:
try:
logger.info("Fetching ETF data via KRX Open API")
df = self._fetch_etf_data_openapi()
records = self._parse_openapi_records(df)
except Exception as e:
logger.warning(f"KRX Open API failed, falling back to pykrx: {e}")
df = self._fetch_etf_data_pykrx()
records = self._parse_pykrx_records(df)
else:
logger.info("Fetching ETF data via pykrx (scraping)")
df = self._fetch_etf_data_pykrx()
records = self._parse_pykrx_records(df)
if records:
stmt = insert(ETF).values(records)

View File

@ -1,17 +1,18 @@
"""
ETF price data collector using pykrx.
ETF price data collector.
Uses KRX Open API when KRX_OPENAPI_KEY is set, falls back to pykrx scraping.
"""
import logging
from datetime import datetime, timedelta
from json import JSONDecodeError
import pandas as pd
from pykrx import stock as pykrx_stock
from sqlalchemy.orm import Session
from sqlalchemy.dialects.postgresql import insert
from app.services.collectors.base import BaseCollector
from app.services.krx_client import get_krx_client
from app.models.stock import ETFPrice, ETF
logger = logging.getLogger(__name__)
@ -54,8 +55,83 @@ class ETFPriceCollector(BaseCollector):
except (ValueError, TypeError):
return None
def collect(self) -> int:
"""Collect price data for all ETFs."""
def _collect_openapi(self) -> int:
"""Collect ETF prices via KRX Open API (date-based bulk fetch)."""
client = get_krx_client()
total_records = 0
# Generate list of business dates to fetch
start = datetime.strptime(self.start_date, "%Y%m%d")
end = datetime.strptime(self.end_date, "%Y%m%d")
current = start
# Get valid ETF tickers from DB for filtering
tickers = self.db.query(ETF.ticker).all()
ticker_set = {t[0] for t in tickers}
while current <= end:
date_str = current.strftime("%Y%m%d")
try:
df = client.get_etf_daily(date_str)
if df is None or df.empty:
current += timedelta(days=1)
continue
records = []
for _, row in df.iterrows():
ticker = str(row.get("ISU_SRT_CD", ""))
if not ticker or ticker not in ticker_set:
continue
close_val = self._safe_float(row.get("TDD_CLSPRC"))
if close_val is None:
continue
nav_val = self._safe_float(row.get("NAV"))
volume_val = self._safe_int(row.get("ACC_TRDVOL"))
bas_dd = row.get("BAS_DD")
if hasattr(bas_dd, "date"):
date_value = bas_dd.date()
elif isinstance(bas_dd, str):
date_value = datetime.strptime(bas_dd, "%Y%m%d").date()
else:
date_value = current.date()
records.append({
"ticker": ticker,
"date": date_value,
"close": close_val,
"nav": nav_val,
"volume": volume_val,
})
if records:
stmt = insert(ETFPrice).values(records)
stmt = stmt.on_conflict_do_update(
index_elements=["ticker", "date"],
set_={
"close": stmt.excluded.close,
"nav": stmt.excluded.nav,
"volume": stmt.excluded.volume,
},
)
self.db.execute(stmt)
self.db.commit()
total_records += len(records)
except Exception as e:
self.db.rollback()
logger.warning(f"ETF price fetch for {date_str} via Open API failed: {e}")
current += timedelta(days=1)
return total_records
def _collect_pykrx(self) -> int:
"""Collect ETF prices via pykrx scraping (ticker-based loop)."""
from pykrx import stock as pykrx_stock
tickers = self.db.query(ETF.ticker).all()
ticker_list = [t[0] for t in tickers]
@ -120,5 +196,21 @@ class ETFPriceCollector(BaseCollector):
logger.warning(f"Failed to fetch ETF prices for {ticker}: {e}")
continue
logger.info(f"Collected {total_records} ETF price records")
return total_records
def collect(self) -> int:
"""Collect price data for all ETFs."""
client = get_krx_client()
if client:
try:
logger.info("Collecting ETF prices via KRX Open API")
total = self._collect_openapi()
logger.info(f"Collected {total} ETF price records via Open API")
return total
except Exception as e:
logger.warning(f"KRX Open API failed, falling back to pykrx: {e}")
logger.info("Collecting ETF prices via pykrx (scraping)")
total = self._collect_pykrx()
logger.info(f"Collected {total} ETF price records")
return total

View File

@ -1,20 +1,20 @@
"""
Price data collector using pykrx.
Price data collector.
Uses KRX Open API when KRX_OPENAPI_KEY is set, falls back to pykrx scraping.
"""
import logging
from datetime import datetime, timedelta
from json import JSONDecodeError
import pandas as pd
from pykrx import stock as pykrx_stock
from sqlalchemy.orm import Session
from sqlalchemy.dialects.postgresql import insert
from app.services.collectors.base import BaseCollector
from app.services.krx_client import get_krx_client
from app.models.stock import Price, Stock
logger = logging.getLogger(__name__)
@ -55,9 +55,89 @@ class PriceCollector(BaseCollector):
except (ValueError, TypeError):
return None
def collect(self) -> int:
"""Collect price data for all stocks."""
# Get list of tickers from database
def _collect_openapi(self) -> int:
"""Collect stock prices via KRX Open API (date-based bulk fetch)."""
client = get_krx_client()
total_records = 0
start = datetime.strptime(self.start_date, "%Y%m%d")
end = datetime.strptime(self.end_date, "%Y%m%d")
current = start
# Get valid stock tickers from DB for filtering
tickers = self.db.query(Stock.ticker, Stock.market).all()
ticker_market = {t[0]: t[1] for t in tickers}
while current <= end:
date_str = current.strftime("%Y%m%d")
for market in ("KOSPI", "KOSDAQ"):
try:
df = client.get_stock_daily(date_str, market=market)
if df is None or df.empty:
continue
records = []
for _, row in df.iterrows():
ticker = str(row.get("ISU_SRT_CD", ""))
if not ticker or ticker not in ticker_market:
continue
close_val = self._safe_float(row.get("TDD_CLSPRC"))
if close_val is None:
continue
open_val = self._safe_float(row.get("TDD_OPNPRC"))
high_val = self._safe_float(row.get("TDD_HGPRC"))
low_val = self._safe_float(row.get("TDD_LWPRC"))
volume_val = self._safe_int(row.get("ACC_TRDVOL"))
bas_dd = row.get("BAS_DD")
if hasattr(bas_dd, "date"):
date_value = bas_dd.date()
elif isinstance(bas_dd, str):
date_value = datetime.strptime(bas_dd, "%Y%m%d").date()
else:
date_value = current.date()
records.append({
"ticker": ticker,
"date": date_value,
"open": open_val,
"high": high_val,
"low": low_val,
"close": close_val,
"volume": volume_val,
})
if records:
stmt = insert(Price).values(records)
stmt = stmt.on_conflict_do_update(
index_elements=["ticker", "date"],
set_={
"open": stmt.excluded.open,
"high": stmt.excluded.high,
"low": stmt.excluded.low,
"close": stmt.excluded.close,
"volume": stmt.excluded.volume,
},
)
self.db.execute(stmt)
self.db.commit()
total_records += len(records)
except Exception as e:
self.db.rollback()
logger.warning(f"Price fetch for {market} {date_str} via Open API failed: {e}")
current += timedelta(days=1)
return total_records
def _collect_pykrx(self) -> int:
"""Collect stock prices via pykrx scraping (ticker-based loop)."""
from pykrx import stock as pykrx_stock
tickers = self.db.query(Stock.ticker).all()
ticker_list = [t[0] for t in tickers]
@ -68,7 +148,6 @@ class PriceCollector(BaseCollector):
total_records = 0
logger.info(f"Collecting prices for {len(ticker_list)} stocks from {self.start_date} to {self.end_date}")
# Fetch prices in batches
for ticker in ticker_list:
try:
df = pykrx_stock.get_market_ohlcv(
@ -81,22 +160,19 @@ class PriceCollector(BaseCollector):
df.columns = ["date", "open", "high", "low", "close", "volume",
"value"]
# Validate column count
expected_cols = 7 # date + 6 data columns
expected_cols = 7
if len(df.columns) < expected_cols:
logger.warning(f"Unexpected column count for {ticker}: {len(df.columns)}")
continue
records = []
for _, row in df.iterrows():
# Safely convert values with type checking
open_val = self._safe_float(row["open"])
high_val = self._safe_float(row["high"])
low_val = self._safe_float(row["low"])
close_val = self._safe_float(row["close"])
volume_val = self._safe_int(row["volume"])
# Skip if essential values are missing
if close_val is None:
logger.debug(f"Skipping record for {ticker}: missing close price")
continue
@ -125,7 +201,7 @@ class PriceCollector(BaseCollector):
},
)
self.db.execute(stmt)
self.db.commit() # Commit per ticker
self.db.commit()
total_records += len(records)
except JSONDecodeError as e:
@ -140,5 +216,21 @@ class PriceCollector(BaseCollector):
logger.warning(f"Failed to fetch prices for {ticker}: {e}")
continue
logger.info(f"Collected {total_records} price records")
return total_records
def collect(self) -> int:
"""Collect price data for all stocks."""
client = get_krx_client()
if client:
try:
logger.info("Collecting stock prices via KRX Open API")
total = self._collect_openapi()
logger.info(f"Collected {total} price records via Open API")
return total
except Exception as e:
logger.warning(f"KRX Open API failed, falling back to pykrx: {e}")
logger.info("Collecting prices via pykrx (scraping)")
total = self._collect_pykrx()
logger.info(f"Collected {total} price records")
return total

View File

@ -1,24 +1,25 @@
"""
Stock data collector using pykrx.
Stock data collector.
Uses KRX Open API when KRX_OPENAPI_KEY is set, falls back to pykrx scraping.
"""
import logging
from datetime import datetime
from json import JSONDecodeError
import pandas as pd
from pykrx import stock as pykrx_stock
from sqlalchemy.orm import Session
from sqlalchemy.dialects.postgresql import insert
from app.services.collectors.base import BaseCollector
from app.services.krx_client import get_krx_client
from app.models.stock import Stock, StockType
logger = logging.getLogger(__name__)
class StockCollector(BaseCollector):
"""Collects stock master data using pykrx."""
"""Collects stock master data."""
def __init__(self, db: Session, biz_day: str = None):
super().__init__(db)
@ -52,9 +53,65 @@ class StockCollector(BaseCollector):
except (ValueError, TypeError):
return None
def collect(self) -> int:
"""Collect stock master data."""
# Get tickers per market (also caches ticker-name mappings internally)
def _collect_openapi(self) -> int:
"""Collect stock data via KRX Open API."""
client = get_krx_client()
base_date = datetime.strptime(self.biz_day, "%Y%m%d").date()
records = []
for market in ("KOSPI", "KOSDAQ"):
# Fetch base info (ticker, name, listed shares)
base_df = client.get_stock_base_info(self.biz_day, market=market)
# Fetch daily trade (close, market cap)
trade_df = client.get_stock_daily(self.biz_day, market=market)
if base_df is None or base_df.empty:
logger.warning(f"No stock base info from Open API for {market}")
continue
# Index trade_df by ticker for quick lookup
trade_map = {}
if trade_df is not None and not trade_df.empty:
for _, row in trade_df.iterrows():
ticker = str(row.get("ISU_SRT_CD", ""))
if ticker:
trade_map[ticker] = row
for _, row in base_df.iterrows():
ticker = str(row.get("ISU_SRT_CD", ""))
name = str(row.get("ISU_ABBRV", ""))
if not ticker or not name:
continue
close_price = None
market_cap = None
trade_row = trade_map.get(ticker)
if trade_row is not None:
close_price = self._safe_value(trade_row.get("TDD_CLSPRC"))
market_cap = self._safe_value(trade_row.get("MKTCAP"))
stock_type = self._classify_stock_type(name, ticker)
records.append({
"ticker": ticker,
"name": name,
"market": market,
"close_price": close_price,
"market_cap": market_cap,
"eps": None,
"forward_eps": None,
"bps": None,
"dividend_per_share": None,
"stock_type": stock_type,
"base_date": base_date,
})
return self._upsert_records(records)
def _collect_pykrx(self) -> int:
"""Collect stock data via pykrx scraping."""
from pykrx import stock as pykrx_stock
try:
kospi_tickers = pykrx_stock.get_market_ticker_list(self.biz_day, market="KOSPI")
kosdaq_tickers = pykrx_stock.get_market_ticker_list(self.biz_day, market="KOSDAQ")
@ -74,7 +131,6 @@ class StockCollector(BaseCollector):
logger.warning("No tickers found from pykrx.")
return 0
# Fetch bulk data
try:
cap_df = pykrx_stock.get_market_cap_by_ticker(self.biz_day)
except (JSONDecodeError, KeyError, ConnectionError, ValueError) as e:
@ -125,7 +181,10 @@ class StockCollector(BaseCollector):
"base_date": base_date,
})
# Upsert using PostgreSQL INSERT ON CONFLICT
return self._upsert_records(records)
def _upsert_records(self, records: list[dict]) -> int:
"""Upsert stock records into the database."""
if records:
stmt = insert(Stock).values(records)
stmt = stmt.on_conflict_do_update(
@ -148,3 +207,16 @@ class StockCollector(BaseCollector):
logger.info(f"Collected {len(records)} stock records")
return len(records)
def collect(self) -> int:
"""Collect stock master data."""
client = get_krx_client()
if client:
try:
logger.info("Collecting stock data via KRX Open API")
return self._collect_openapi()
except Exception as e:
logger.warning(f"KRX Open API failed, falling back to pykrx: {e}")
logger.info("Collecting stock data via pykrx (scraping)")
return self._collect_pykrx()

View File

@ -0,0 +1,67 @@
"""
KRX Open API client wrapper.
Uses pykrx-openapi library to access KRX official REST API.
Activated when KRX_OPENAPI_KEY environment variable is set.
"""
import os
import logging
from datetime import datetime
import pandas as pd
from pykrx_openapi import KRXOpenAPI
logger = logging.getLogger(__name__)
class KRXClient:
"""Thin wrapper around pykrx-openapi with project defaults."""
def __init__(self, api_key: str | None = None):
key = api_key or os.getenv("KRX_OPENAPI_KEY")
if not key:
raise ValueError("KRX_OPENAPI_KEY is required for KRXClient")
self.client = KRXOpenAPI(api_key=key)
def _to_date_str(self, date_input: str) -> str:
"""Ensure date string is in YYYYMMDD format."""
return date_input.replace("-", "")
def _records_to_df(self, result: dict) -> pd.DataFrame:
"""Convert API response dict to DataFrame."""
records = result.get("OutBlock_1", [])
if not records:
return pd.DataFrame()
return pd.DataFrame(records)
def get_etf_daily(self, bas_dd: str) -> pd.DataFrame:
"""Fetch all ETF daily trade data for a given date."""
bas_dd = self._to_date_str(bas_dd)
result = self.client.get_etf_daily_trade(bas_dd=bas_dd)
return self._records_to_df(result)
def get_stock_daily(self, bas_dd: str, market: str = "KOSPI") -> pd.DataFrame:
"""Fetch stock daily trade data for a given date and market."""
bas_dd = self._to_date_str(bas_dd)
if market == "KOSDAQ":
result = self.client.get_kosdaq_stock_daily_trade(bas_dd=bas_dd)
else:
result = self.client.get_stock_daily_trade(bas_dd=bas_dd)
return self._records_to_df(result)
def get_stock_base_info(self, bas_dd: str, market: str = "KOSPI") -> pd.DataFrame:
"""Fetch stock base info for a given date and market."""
bas_dd = self._to_date_str(bas_dd)
if market == "KOSDAQ":
result = self.client.get_kosdaq_stock_base_info(bas_dd=bas_dd)
else:
result = self.client.get_stock_base_info(bas_dd=bas_dd)
return self._records_to_df(result)
def get_krx_client() -> KRXClient | None:
"""Return KRXClient if KRX_OPENAPI_KEY is set, else None."""
key = os.getenv("KRX_OPENAPI_KEY")
if key:
return KRXClient(api_key=key)
return None

View File

@ -18,6 +18,7 @@ dependencies = [
"apscheduler==3.11.2",
"setuptools",
"pykrx>=1.2.6",
"pykrx-openapi",
"requests==2.32.5",
"beautifulsoup4==4.14.3",
"lxml==6.0.2",

View File

@ -1,8 +1,7 @@
"""
Generate portfolio snapshots from trade history using actual market prices.
Fetches closing prices from KRX (via pykrx) for each snapshot date,
then computes portfolio value based on cumulative holdings at that point.
Uses KRX Open API when KRX_OPENAPI_KEY is set, falls back to pykrx scraping.
Snapshot dates: end of each month where trades occurred, plus latest available.
@ -11,7 +10,8 @@ Usage:
Requires:
- DATABASE_URL environment variable
- KRX_ID / KRX_PW environment variables (pykrx >= 1.2.5)
- KRX_OPENAPI_KEY environment variable (preferred)
- KRX_ID / KRX_PW environment variables (pykrx fallback)
"""
import sys
import os
@ -24,10 +24,9 @@ from json import JSONDecodeError
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from pykrx import stock as pykrx_stock
from sqlalchemy.orm import Session
from app.core.database import SessionLocal
from app.services.krx_client import get_krx_client
from app.models.portfolio import (
Portfolio, PortfolioSnapshot, SnapshotHolding,
)
@ -103,17 +102,14 @@ def _generate_snapshot_dates() -> list[date]:
today = date.today()
dates = []
# Month-end dates
current = date(first_date.year, first_date.month, 1)
while current <= today:
# Last day of month
if current.month == 12:
next_month = date(current.year + 1, 1, 1)
else:
next_month = date(current.year, current.month + 1, 1)
last_day = next_month - timedelta(days=1)
# Only include if we have holdings at this date
if last_day >= first_date and last_day <= today:
dates.append(last_day)
@ -122,17 +118,45 @@ def _generate_snapshot_dates() -> list[date]:
return dates
def _fetch_price_with_retry(ticker: str, date_str: str, max_retries: int = 3) -> Decimal | None:
"""Fetch closing price for a ticker on a date, with fallback to previous days."""
def _fetch_price_openapi(ticker: str, date_str: str) -> Decimal | None:
"""Fetch closing price via KRX Open API."""
client = get_krx_client()
if not client:
return None
target = datetime.strptime(date_str, "%Y%m%d").date()
for day_offset in range(5): # Try up to 5 days back (weekends/holidays)
for day_offset in range(5):
try_date = target - timedelta(days=day_offset)
try_date_str = try_date.strftime("%Y%m%d")
try:
df = client.get_etf_daily(try_date_str)
if df is not None and not df.empty:
match = df[df["ISU_SRT_CD"] == ticker]
if not match.empty:
close = match.iloc[0].get("TDD_CLSPRC")
if close and float(close) > 0:
return Decimal(str(int(float(close))))
except Exception as e:
logger.warning(f"Open API fetch for {ticker} on {try_date_str}: {e}")
continue
return None
def _fetch_price_pykrx(ticker: str, date_str: str, max_retries: int = 3) -> Decimal | None:
"""Fetch closing price via pykrx scraping."""
from pykrx import stock as pykrx_stock
target = datetime.strptime(date_str, "%Y%m%d").date()
for day_offset in range(5):
try_date = target - timedelta(days=day_offset)
try_date_str = try_date.strftime("%Y%m%d")
for attempt in range(max_retries):
try:
# For ETFs, use get_etf_ohlcv_by_date
df = pykrx_stock.get_etf_ohlcv_by_date(try_date_str, try_date_str, ticker)
if df is not None and not df.empty:
close = df.iloc[0]["종가"]
@ -160,15 +184,25 @@ def _fetch_price_with_retry(ticker: str, date_str: str, max_retries: int = 3) ->
return None
def _fetch_price_with_retry(ticker: str, date_str: str, max_retries: int = 3) -> Decimal | None:
"""Fetch closing price, preferring Open API with pykrx fallback."""
client = get_krx_client()
if client:
price = _fetch_price_openapi(ticker, date_str)
if price:
return price
logger.warning(f"Open API failed for {ticker} on {date_str}, trying pykrx")
return _fetch_price_pykrx(ticker, date_str, max_retries)
def generate_snapshots(db: Session):
"""Generate portfolio snapshots from trade history with actual market prices."""
# Find portfolio
portfolio = db.query(Portfolio).filter(Portfolio.name == "연금 포트폴리오").first()
if not portfolio:
logger.error("Portfolio '연금 포트폴리오' not found. Run seed_data.py first.")
return
# Delete existing snapshots
existing = db.query(PortfolioSnapshot).filter(
PortfolioSnapshot.portfolio_id == portfolio.id
).all()
@ -192,7 +226,6 @@ def generate_snapshots(db: Session):
date_str = snap_date.strftime("%Y%m%d")
logger.info(f"Processing {snap_date} ({len(holdings)} tickers)...")
# Fetch prices for all held tickers
prices: dict[str, Decimal] = {}
for ticker in holdings:
price = _fetch_price_with_retry(ticker, date_str)
@ -205,7 +238,6 @@ def generate_snapshots(db: Session):
logger.warning(f" Skipping {snap_date}: no prices available")
continue
# Calculate portfolio value
total_value = Decimal("0")
snapshot_holdings = []
@ -224,7 +256,6 @@ def generate_snapshots(db: Session):
if total_value == 0:
continue
# Create snapshot
snapshot = PortfolioSnapshot(
portfolio_id=portfolio.id,
total_value=total_value,
@ -233,7 +264,6 @@ def generate_snapshots(db: Session):
db.add(snapshot)
db.flush()
# Create snapshot holdings with ratios
for h in snapshot_holdings:
ratio = (h["value"] / total_value * 100).quantize(Decimal("0.01"), rounding=ROUND_HALF_UP)
db.add(SnapshotHolding(
@ -248,7 +278,6 @@ def generate_snapshots(db: Session):
created += 1
logger.info(f" Snapshot {snap_date}: total={total_value:,.0f}")
# Rate limit: be gentle with KRX
time.sleep(1)
db.commit()

View File

@ -39,14 +39,15 @@ def db():
Base.metadata.drop_all(bind=engine)
# ── ETFCollector Tests ──
# ── ETFCollector Tests (pykrx fallback path, no KRX_OPENAPI_KEY) ──
class TestETFCollectorResilience:
@patch("app.services.collectors.etf_collector.get_krx_client", return_value=None)
@patch("app.services.collectors.etf_collector.time.sleep")
@patch("app.services.collectors.etf_collector.ETF_전종목기본종목")
def test_json_decode_error_retries_once(self, mock_etf_cls, mock_sleep, db):
@patch("pykrx.website.krx.etx.core.ETF_전종목기본종목")
def test_json_decode_error_retries_once(self, mock_etf_cls, mock_sleep, mock_client, db):
"""JSONDecodeError on both attempts raises KRXDataError with login hint."""
mock_fetcher = MagicMock()
mock_fetcher.fetch.side_effect = [
@ -62,9 +63,10 @@ class TestETFCollectorResilience:
assert mock_fetcher.fetch.call_count == 2
mock_sleep.assert_called_once_with(3)
@patch("app.services.collectors.etf_collector.get_krx_client", return_value=None)
@patch("app.services.collectors.etf_collector.time.sleep")
@patch("app.services.collectors.etf_collector.ETF_전종목기본종목")
def test_connection_error_retries_and_raises(self, mock_etf_cls, mock_sleep, db):
@patch("pykrx.website.krx.etx.core.ETF_전종목기본종목")
def test_connection_error_retries_and_raises(self, mock_etf_cls, mock_sleep, mock_client, db):
"""ConnectionError on both attempts raises KRXDataError."""
mock_fetcher = MagicMock()
mock_fetcher.fetch.side_effect = ConnectionError("timeout")
@ -76,9 +78,10 @@ class TestETFCollectorResilience:
assert mock_fetcher.fetch.call_count == 2
@patch("app.services.collectors.etf_collector.get_krx_client", return_value=None)
@patch("app.services.collectors.etf_collector.time.sleep")
@patch("app.services.collectors.etf_collector.ETF_전종목기본종목")
def test_retry_succeeds_on_second_attempt(self, mock_etf_cls, mock_sleep, db):
@patch("pykrx.website.krx.etx.core.ETF_전종목기본종목")
def test_retry_succeeds_on_second_attempt(self, mock_etf_cls, mock_sleep, mock_client, db):
"""If first attempt fails but retry succeeds, data is processed normally."""
good_df = pd.DataFrame([{
"ISU_SRT_CD": "069500",
@ -99,9 +102,10 @@ class TestETFCollectorResilience:
assert result == 1
@patch("app.services.collectors.etf_collector.get_krx_client", return_value=None)
@patch("app.services.collectors.etf_collector.time.sleep")
@patch("app.services.collectors.etf_collector.ETF_전종목기본종목")
def test_failure_does_not_delete_existing_data(self, mock_etf_cls, mock_sleep, db):
@patch("pykrx.website.krx.etx.core.ETF_전종목기본종목")
def test_failure_does_not_delete_existing_data(self, mock_etf_cls, mock_sleep, mock_client, db):
"""Existing ETF records are preserved when fetch fails."""
db.execute(
ETF.__table__.insert().values(
@ -124,6 +128,59 @@ class TestETFCollectorResilience:
assert existing.name == "KODEX 200"
# ── ETFCollector Open API Tests ──
class TestETFCollectorOpenAPI:
@patch("app.services.collectors.etf_collector.get_krx_client")
@patch("app.services.collectors.base.BaseCollector._get_latest_biz_day", return_value="20260417")
def test_openapi_success(self, mock_biz, mock_get_client, db):
"""Open API returns valid ETF data."""
mock_client = MagicMock()
mock_client.get_etf_daily.return_value = pd.DataFrame([{
"ISU_SRT_CD": "069500",
"ISU_ABBRV": "KODEX 200",
"TDD_CLSPRC": 35000.0,
"ACC_TRDVOL": 1000000,
}])
mock_get_client.return_value = mock_client
collector = ETFCollector(db)
result = collector.collect()
assert result == 1
etf = db.query(ETF).filter_by(ticker="069500").first()
assert etf is not None
assert etf.name == "KODEX 200"
@patch("app.services.collectors.etf_collector.get_krx_client")
@patch("app.services.collectors.etf_collector.time.sleep")
@patch("pykrx.website.krx.etx.core.ETF_전종목기본종목")
@patch("app.services.collectors.base.BaseCollector._get_latest_biz_day", return_value="20260417")
def test_openapi_failure_falls_back_to_pykrx(self, mock_biz, mock_etf_cls, mock_sleep, mock_get_client, db):
"""When Open API fails, falls back to pykrx scraping."""
mock_client = MagicMock()
mock_client.get_etf_daily.side_effect = Exception("API down")
mock_get_client.return_value = mock_client
good_df = pd.DataFrame([{
"ISU_SRT_CD": "069500",
"ISU_ABBRV": "KODEX 200",
"IDX_ASST_CLSS_NM": "주식",
"IDX_MKT_CLSS_NM": "코스피",
"ETF_TOT_FEE": "0.15",
}])
mock_fetcher = MagicMock()
mock_fetcher.fetch.return_value = good_df
mock_etf_cls.return_value = mock_fetcher
collector = ETFCollector(db)
result = collector.collect()
assert result == 1
# ── ValuationCollector Tests ──

14
backend/uv.lock generated
View File

@ -485,6 +485,7 @@ dependencies = [
{ name = "pydantic-settings" },
{ name = "pyjwt", extra = ["crypto"] },
{ name = "pykrx" },
{ name = "pykrx-openapi" },
{ name = "python-multipart" },
{ name = "requests" },
{ name = "setuptools" },
@ -521,6 +522,7 @@ requires-dist = [
{ name = "pydantic-settings", specifier = "==2.12.0" },
{ name = "pyjwt", extras = ["crypto"], specifier = "==2.11.0" },
{ name = "pykrx", specifier = ">=1.2.6" },
{ name = "pykrx-openapi" },
{ name = "pytest", marker = "extra == 'dev'", specifier = "==8.3.4" },
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==1.1.0" },
{ name = "python-multipart", specifier = "==0.0.22" },
@ -1290,6 +1292,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7a/6a/5bd38daca2de1f514c861f9b175f2b11b4396fdfec3785110f56f1132c01/pykrx-1.2.6-py3-none-any.whl", hash = "sha256:3abb819efe501d2fab055ed913ebd407fa185bb74155378c1e2f831a0ed15398", size = 2196530, upload-time = "2026-04-14T10:17:40.063Z" },
]
[[package]]
name = "pykrx-openapi"
version = "0.1.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "requests" },
]
sdist = { url = "https://files.pythonhosted.org/packages/9a/2d/893d9a77ac2e66e2c7dcdebe046e991a162b7a026942f8b2d0b4d5f84656/pykrx_openapi-0.1.1.tar.gz", hash = "sha256:891c89ddbc1a8e99044f3201b868bb7530b3e38453096834169615588c79ed06", size = 16062756, upload-time = "2026-01-20T13:58:55.672Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/49/b3/ae37e596307591f6305d89573103d21766337ffb4b65bdf681e9331c792b/pykrx_openapi-0.1.1-py3-none-any.whl", hash = "sha256:fd2a65224645db0fbec2d173ed4140d1bb25040a0a8de5aee3500ebd35114718", size = 12594, upload-time = "2026-01-20T13:58:53.438Z" },
]
[[package]]
name = "pyparsing"
version = "3.3.2"

View File

@ -0,0 +1,97 @@
# KRX Open API 전환 설계
## 배경
- pykrx는 KRX 웹 스크래핑 방식으로 불안정 (로그인 필수화, 구조 변경 시 깨짐)
- KRX Open API (openapi.krx.co.kr) 공식 REST API로 전환하여 안정성 확보
- `pykrx-openapi` 라이브러리 활용 (MIT, pip install pykrx-openapi)
## 전환 범위
### 현재 pykrx 의존 Collector (5개)
| Collector | pykrx 함수 | KRX Open API 대체 |
|---|---|---|
| StockCollector | `get_market_ticker_list`, `get_market_cap_by_ticker`, `get_market_fundamental_by_ticker` | `get_stock_base_info`, `get_stock_daily_trade`, `get_kosdaq_stock_daily_trade` |
| PriceCollector | `get_market_ohlcv` | `get_stock_daily_trade`, `get_kosdaq_stock_daily_trade` |
| ValuationCollector | `get_market_fundamental_by_ticker` | pykrx 유지 또는 별도 소스 (Open API에 PER/PBR 없음) |
| ETFCollector | `ETF_전종목기본종목().fetch()` | `get_etf_daily_trade` (종목 목록 겸용) |
| ETFPriceCollector | `get_etf_ohlcv_by_date` | `get_etf_daily_trade` |
### 주의: ValuationCollector
KRX Open API 서비스 목록에 PER/PBR/배당수익률 API가 없음.
→ ValuationCollector는 pykrx(스크래핑) 유지 또는 네이버/FnGuide 등 대체 소스 검토.
→ 1차 전환에서는 pykrx fallback으로 유지.
### 스크립트 의존
- `scripts/generate_snapshots.py``pykrx_stock.get_etf_ohlcv_by_date` 사용
- `jobs/kjb_signal_job.py``Price.ticker == "069500"` (DB 조회, pykrx 직접 의존 없음)
- `app/services/optimizer.py` — DB 조회만, pykrx 직접 의존 없음
## 구현 계획
### 1. KRX Open API 클라이언트 모듈 생성
**파일:** `backend/app/services/krx_client.py`
```python
from pykrx_openapi import KRXOpenAPI
class KRXClient:
"""Thin wrapper around pykrx-openapi with project defaults."""
def __init__(self, api_key: str = None):
self.client = KRXOpenAPI(
api_key=api_key or os.getenv("KRX_OPENAPI_KEY"),
rate_limit=10,
per_seconds=1,
timeout=30,
)
def get_etf_daily(self, date: str) -> pd.DataFrame: ...
def get_stock_daily(self, date: str, market: str) -> pd.DataFrame: ...
def get_stock_base_info(self, date: str, market: str) -> pd.DataFrame: ...
```
### 2. Collector 전환 (4개)
각 collector에 `KRX_OPENAPI_KEY` 환경변수가 있으면 Open API 사용, 없으면 pykrx fallback.
- **ETFCollector**`get_etf_daily_trade` 로 전종목 ETF 목록 + 기본정보 취득
- **ETFPriceCollector**`get_etf_daily_trade` 로 종가/거래량 취득
- **StockCollector**`get_stock_base_info` + `get_stock_daily_trade` + `get_kosdaq_stock_daily_trade`
- **PriceCollector**`get_stock_daily_trade` + `get_kosdaq_stock_daily_trade`
### 3. ValuationCollector
1차: pykrx 유지 (KRX_ID/KRX_PW 사용)
향후: 네이버 금융 또는 FnGuide 스크래핑으로 전환 검토
### 4. generate_snapshots.py 전환
`pykrx_stock.get_etf_ohlcv_by_date``KRXClient.get_etf_daily`
### 5. 의존성 변경
- `pyproject.toml`: `pykrx-openapi` 추가
- `pykrx` 는 ValuationCollector 용으로 유지
- `.env.example`: `KRX_OPENAPI_KEY` 추가
- Gitea Secrets: `KRX_OPENAPI_KEY` 추가
### 6. 테스트
- 기존 unit test 업데이트 (mock 대상 변경)
- KRX Open API mock으로 테스트
## 환경 변수
```
KRX_OPENAPI_KEY=xxx # KRX Open API 인증키 (신규)
KRX_ID=xxx # pykrx 웹 로그인 (ValuationCollector용, 유지)
KRX_PW=xxx # pykrx 웹 로그인 (ValuationCollector용, 유지)
```
## 롤백 전략
- `KRX_OPENAPI_KEY` 환경변수를 제거하면 자동으로 pykrx fallback
- 기존 pykrx 코드는 삭제하지 않고 fallback으로 유지
## 작업 완료 조건
- [ ] KRX Open API 클라이언트 모듈
- [ ] ETFCollector 전환
- [ ] ETFPriceCollector 전환
- [ ] StockCollector 전환
- [ ] PriceCollector 전환
- [ ] generate_snapshots.py 전환
- [ ] 테스트 통과
- [ ] .env.example / deploy.yml 업데이트