feat: KRX Open API migration with pykrx fallback
- Add pykrx-openapi dependency - New krx_client.py wrapper module - ETFCollector: Open API bulk fetch + pykrx fallback - ETFPriceCollector: Open API date-based bulk + pykrx fallback - StockCollector: Open API base_info + daily_trade + pykrx fallback - PriceCollector: Open API date-based bulk + pykrx fallback - ValuationCollector: pykrx retained (Open API has no PER/PBR) - generate_snapshots.py: Open API + pykrx fallback - Auto-switch based on KRX_OPENAPI_KEY env var - All 278 tests passing
This commit is contained in:
parent
5009168246
commit
9ab232ba12
@ -12,7 +12,11 @@ KIS_APP_KEY=your_kis_app_key
|
||||
KIS_APP_SECRET=your_kis_app_secret
|
||||
KIS_ACCOUNT_NO=your_account_number
|
||||
|
||||
# KRX Data Portal (required for data collection since 2026)
|
||||
# KRX Open API (preferred for data collection)
|
||||
# Register at https://openapi.krx.co.kr to get an API key
|
||||
KRX_OPENAPI_KEY=your_krx_openapi_key
|
||||
|
||||
# KRX Data Portal (fallback for ValuationCollector when Open API key is not set)
|
||||
# Register at https://data.krx.co.kr to get credentials
|
||||
KRX_ID=your_krx_login_id
|
||||
KRX_PW=your_krx_password
|
||||
|
||||
@ -35,6 +35,7 @@ jobs:
|
||||
ADMIN_USERNAME=${{ secrets.ADMIN_USERNAME }}
|
||||
ADMIN_EMAIL=${{ secrets.ADMIN_EMAIL }}
|
||||
ADMIN_PASSWORD=${{ secrets.ADMIN_PASSWORD }}
|
||||
KRX_OPENAPI_KEY=${{ secrets.KRX_OPENAPI_KEY }}
|
||||
KRX_ID=${{ secrets.KRX_ID }}
|
||||
KRX_PW=${{ secrets.KRX_PW }}
|
||||
EOF
|
||||
|
||||
@ -32,6 +32,7 @@ class Settings(BaseSettings):
|
||||
kis_app_secret: str = ""
|
||||
kis_account_no: str = ""
|
||||
dart_api_key: str = ""
|
||||
krx_openapi_key: str = ""
|
||||
|
||||
# Notifications (optional)
|
||||
discord_webhook_url: str = ""
|
||||
|
||||
@ -1,17 +1,18 @@
|
||||
"""
|
||||
ETF master data collector from KRX.
|
||||
|
||||
Uses KRX Open API when KRX_OPENAPI_KEY is set, falls back to pykrx scraping.
|
||||
"""
|
||||
import logging
|
||||
import time
|
||||
from json import JSONDecodeError
|
||||
|
||||
import pandas as pd
|
||||
from pykrx.website.krx.etx.core import ETF_전종목기본종목
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
|
||||
from app.services.collectors.base import BaseCollector
|
||||
from app.services.krx_client import get_krx_client
|
||||
from app.models.stock import ETF, AssetClass
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -47,8 +48,19 @@ class ETFCollector(BaseCollector):
|
||||
else:
|
||||
return AssetClass.MIXED.value
|
||||
|
||||
def _fetch_etf_data(self) -> pd.DataFrame:
|
||||
"""Fetch ETF data with 1 retry on failure."""
|
||||
def _fetch_etf_data_openapi(self) -> pd.DataFrame:
|
||||
"""Fetch ETF data via KRX Open API."""
|
||||
client = get_krx_client()
|
||||
biz_day = self._get_latest_biz_day()
|
||||
df = client.get_etf_daily(biz_day)
|
||||
if df is None or df.empty:
|
||||
raise KRXDataError("KRX Open API returned empty ETF data")
|
||||
return df
|
||||
|
||||
def _fetch_etf_data_pykrx(self) -> pd.DataFrame:
|
||||
"""Fetch ETF data via pykrx scraping with 1 retry on failure."""
|
||||
from pykrx.website.krx.etx.core import ETF_전종목기본종목
|
||||
|
||||
last_exc = None
|
||||
for attempt in range(2):
|
||||
try:
|
||||
@ -67,10 +79,39 @@ class ETFCollector(BaseCollector):
|
||||
logger.error(error_msg)
|
||||
raise KRXDataError(error_msg)
|
||||
|
||||
def collect(self) -> int:
|
||||
"""Collect ETF master data."""
|
||||
df = self._fetch_etf_data() # raises KRXDataError on failure
|
||||
def _fetch_etf_data(self) -> pd.DataFrame:
|
||||
"""Fetch ETF data with Open API preference and pykrx fallback."""
|
||||
client = get_krx_client()
|
||||
if client:
|
||||
try:
|
||||
logger.info("Fetching ETF data via KRX Open API")
|
||||
return self._fetch_etf_data_openapi()
|
||||
except Exception as e:
|
||||
logger.warning(f"KRX Open API failed, falling back to pykrx: {e}")
|
||||
|
||||
logger.info("Fetching ETF data via pykrx (scraping)")
|
||||
return self._fetch_etf_data_pykrx()
|
||||
|
||||
def _parse_openapi_records(self, df: pd.DataFrame) -> list[dict]:
|
||||
"""Parse Open API response DataFrame into ETF records."""
|
||||
records = []
|
||||
for _, row in df.iterrows():
|
||||
ticker = row.get("ISU_SRT_CD")
|
||||
name = row.get("ISU_ABBRV")
|
||||
if not ticker or pd.isna(ticker) or not name or pd.isna(name):
|
||||
continue
|
||||
|
||||
records.append({
|
||||
"ticker": str(ticker),
|
||||
"name": str(name),
|
||||
"asset_class": AssetClass.MIXED.value,
|
||||
"market": "",
|
||||
"expense_ratio": None,
|
||||
})
|
||||
return records
|
||||
|
||||
def _parse_pykrx_records(self, df: pd.DataFrame) -> list[dict]:
|
||||
"""Parse pykrx response DataFrame into ETF records."""
|
||||
records = []
|
||||
for _, row in df.iterrows():
|
||||
ticker = row.get("ISU_SRT_CD")
|
||||
@ -90,12 +131,30 @@ class ETFCollector(BaseCollector):
|
||||
pass
|
||||
|
||||
records.append({
|
||||
"ticker": ticker,
|
||||
"name": name,
|
||||
"ticker": str(ticker),
|
||||
"name": str(name),
|
||||
"asset_class": self._classify_asset_class(asset_class_str, name),
|
||||
"market": market if market and not pd.isna(market) else "",
|
||||
"expense_ratio": expense_ratio,
|
||||
})
|
||||
return records
|
||||
|
||||
def collect(self) -> int:
|
||||
"""Collect ETF master data."""
|
||||
client = get_krx_client()
|
||||
if client:
|
||||
try:
|
||||
logger.info("Fetching ETF data via KRX Open API")
|
||||
df = self._fetch_etf_data_openapi()
|
||||
records = self._parse_openapi_records(df)
|
||||
except Exception as e:
|
||||
logger.warning(f"KRX Open API failed, falling back to pykrx: {e}")
|
||||
df = self._fetch_etf_data_pykrx()
|
||||
records = self._parse_pykrx_records(df)
|
||||
else:
|
||||
logger.info("Fetching ETF data via pykrx (scraping)")
|
||||
df = self._fetch_etf_data_pykrx()
|
||||
records = self._parse_pykrx_records(df)
|
||||
|
||||
if records:
|
||||
stmt = insert(ETF).values(records)
|
||||
|
||||
@ -1,17 +1,18 @@
|
||||
"""
|
||||
ETF price data collector using pykrx.
|
||||
ETF price data collector.
|
||||
|
||||
Uses KRX Open API when KRX_OPENAPI_KEY is set, falls back to pykrx scraping.
|
||||
"""
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from json import JSONDecodeError
|
||||
|
||||
import pandas as pd
|
||||
from pykrx import stock as pykrx_stock
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
|
||||
from app.services.collectors.base import BaseCollector
|
||||
from app.services.krx_client import get_krx_client
|
||||
from app.models.stock import ETFPrice, ETF
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -54,8 +55,83 @@ class ETFPriceCollector(BaseCollector):
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
def collect(self) -> int:
|
||||
"""Collect price data for all ETFs."""
|
||||
def _collect_openapi(self) -> int:
|
||||
"""Collect ETF prices via KRX Open API (date-based bulk fetch)."""
|
||||
client = get_krx_client()
|
||||
total_records = 0
|
||||
|
||||
# Generate list of business dates to fetch
|
||||
start = datetime.strptime(self.start_date, "%Y%m%d")
|
||||
end = datetime.strptime(self.end_date, "%Y%m%d")
|
||||
current = start
|
||||
|
||||
# Get valid ETF tickers from DB for filtering
|
||||
tickers = self.db.query(ETF.ticker).all()
|
||||
ticker_set = {t[0] for t in tickers}
|
||||
|
||||
while current <= end:
|
||||
date_str = current.strftime("%Y%m%d")
|
||||
try:
|
||||
df = client.get_etf_daily(date_str)
|
||||
if df is None or df.empty:
|
||||
current += timedelta(days=1)
|
||||
continue
|
||||
|
||||
records = []
|
||||
for _, row in df.iterrows():
|
||||
ticker = str(row.get("ISU_SRT_CD", ""))
|
||||
if not ticker or ticker not in ticker_set:
|
||||
continue
|
||||
|
||||
close_val = self._safe_float(row.get("TDD_CLSPRC"))
|
||||
if close_val is None:
|
||||
continue
|
||||
|
||||
nav_val = self._safe_float(row.get("NAV"))
|
||||
volume_val = self._safe_int(row.get("ACC_TRDVOL"))
|
||||
|
||||
bas_dd = row.get("BAS_DD")
|
||||
if hasattr(bas_dd, "date"):
|
||||
date_value = bas_dd.date()
|
||||
elif isinstance(bas_dd, str):
|
||||
date_value = datetime.strptime(bas_dd, "%Y%m%d").date()
|
||||
else:
|
||||
date_value = current.date()
|
||||
|
||||
records.append({
|
||||
"ticker": ticker,
|
||||
"date": date_value,
|
||||
"close": close_val,
|
||||
"nav": nav_val,
|
||||
"volume": volume_val,
|
||||
})
|
||||
|
||||
if records:
|
||||
stmt = insert(ETFPrice).values(records)
|
||||
stmt = stmt.on_conflict_do_update(
|
||||
index_elements=["ticker", "date"],
|
||||
set_={
|
||||
"close": stmt.excluded.close,
|
||||
"nav": stmt.excluded.nav,
|
||||
"volume": stmt.excluded.volume,
|
||||
},
|
||||
)
|
||||
self.db.execute(stmt)
|
||||
self.db.commit()
|
||||
total_records += len(records)
|
||||
|
||||
except Exception as e:
|
||||
self.db.rollback()
|
||||
logger.warning(f"ETF price fetch for {date_str} via Open API failed: {e}")
|
||||
|
||||
current += timedelta(days=1)
|
||||
|
||||
return total_records
|
||||
|
||||
def _collect_pykrx(self) -> int:
|
||||
"""Collect ETF prices via pykrx scraping (ticker-based loop)."""
|
||||
from pykrx import stock as pykrx_stock
|
||||
|
||||
tickers = self.db.query(ETF.ticker).all()
|
||||
ticker_list = [t[0] for t in tickers]
|
||||
|
||||
@ -120,5 +196,21 @@ class ETFPriceCollector(BaseCollector):
|
||||
logger.warning(f"Failed to fetch ETF prices for {ticker}: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"Collected {total_records} ETF price records")
|
||||
return total_records
|
||||
|
||||
def collect(self) -> int:
|
||||
"""Collect price data for all ETFs."""
|
||||
client = get_krx_client()
|
||||
if client:
|
||||
try:
|
||||
logger.info("Collecting ETF prices via KRX Open API")
|
||||
total = self._collect_openapi()
|
||||
logger.info(f"Collected {total} ETF price records via Open API")
|
||||
return total
|
||||
except Exception as e:
|
||||
logger.warning(f"KRX Open API failed, falling back to pykrx: {e}")
|
||||
|
||||
logger.info("Collecting ETF prices via pykrx (scraping)")
|
||||
total = self._collect_pykrx()
|
||||
logger.info(f"Collected {total} ETF price records")
|
||||
return total
|
||||
|
||||
@ -1,20 +1,20 @@
|
||||
"""
|
||||
Price data collector using pykrx.
|
||||
Price data collector.
|
||||
|
||||
Uses KRX Open API when KRX_OPENAPI_KEY is set, falls back to pykrx scraping.
|
||||
"""
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from json import JSONDecodeError
|
||||
|
||||
import pandas as pd
|
||||
from pykrx import stock as pykrx_stock
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
|
||||
from app.services.collectors.base import BaseCollector
|
||||
from app.services.krx_client import get_krx_client
|
||||
from app.models.stock import Price, Stock
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -55,9 +55,89 @@ class PriceCollector(BaseCollector):
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
def collect(self) -> int:
|
||||
"""Collect price data for all stocks."""
|
||||
# Get list of tickers from database
|
||||
def _collect_openapi(self) -> int:
|
||||
"""Collect stock prices via KRX Open API (date-based bulk fetch)."""
|
||||
client = get_krx_client()
|
||||
total_records = 0
|
||||
|
||||
start = datetime.strptime(self.start_date, "%Y%m%d")
|
||||
end = datetime.strptime(self.end_date, "%Y%m%d")
|
||||
current = start
|
||||
|
||||
# Get valid stock tickers from DB for filtering
|
||||
tickers = self.db.query(Stock.ticker, Stock.market).all()
|
||||
ticker_market = {t[0]: t[1] for t in tickers}
|
||||
|
||||
while current <= end:
|
||||
date_str = current.strftime("%Y%m%d")
|
||||
|
||||
for market in ("KOSPI", "KOSDAQ"):
|
||||
try:
|
||||
df = client.get_stock_daily(date_str, market=market)
|
||||
if df is None or df.empty:
|
||||
continue
|
||||
|
||||
records = []
|
||||
for _, row in df.iterrows():
|
||||
ticker = str(row.get("ISU_SRT_CD", ""))
|
||||
if not ticker or ticker not in ticker_market:
|
||||
continue
|
||||
|
||||
close_val = self._safe_float(row.get("TDD_CLSPRC"))
|
||||
if close_val is None:
|
||||
continue
|
||||
|
||||
open_val = self._safe_float(row.get("TDD_OPNPRC"))
|
||||
high_val = self._safe_float(row.get("TDD_HGPRC"))
|
||||
low_val = self._safe_float(row.get("TDD_LWPRC"))
|
||||
volume_val = self._safe_int(row.get("ACC_TRDVOL"))
|
||||
|
||||
bas_dd = row.get("BAS_DD")
|
||||
if hasattr(bas_dd, "date"):
|
||||
date_value = bas_dd.date()
|
||||
elif isinstance(bas_dd, str):
|
||||
date_value = datetime.strptime(bas_dd, "%Y%m%d").date()
|
||||
else:
|
||||
date_value = current.date()
|
||||
|
||||
records.append({
|
||||
"ticker": ticker,
|
||||
"date": date_value,
|
||||
"open": open_val,
|
||||
"high": high_val,
|
||||
"low": low_val,
|
||||
"close": close_val,
|
||||
"volume": volume_val,
|
||||
})
|
||||
|
||||
if records:
|
||||
stmt = insert(Price).values(records)
|
||||
stmt = stmt.on_conflict_do_update(
|
||||
index_elements=["ticker", "date"],
|
||||
set_={
|
||||
"open": stmt.excluded.open,
|
||||
"high": stmt.excluded.high,
|
||||
"low": stmt.excluded.low,
|
||||
"close": stmt.excluded.close,
|
||||
"volume": stmt.excluded.volume,
|
||||
},
|
||||
)
|
||||
self.db.execute(stmt)
|
||||
self.db.commit()
|
||||
total_records += len(records)
|
||||
|
||||
except Exception as e:
|
||||
self.db.rollback()
|
||||
logger.warning(f"Price fetch for {market} {date_str} via Open API failed: {e}")
|
||||
|
||||
current += timedelta(days=1)
|
||||
|
||||
return total_records
|
||||
|
||||
def _collect_pykrx(self) -> int:
|
||||
"""Collect stock prices via pykrx scraping (ticker-based loop)."""
|
||||
from pykrx import stock as pykrx_stock
|
||||
|
||||
tickers = self.db.query(Stock.ticker).all()
|
||||
ticker_list = [t[0] for t in tickers]
|
||||
|
||||
@ -68,7 +148,6 @@ class PriceCollector(BaseCollector):
|
||||
total_records = 0
|
||||
logger.info(f"Collecting prices for {len(ticker_list)} stocks from {self.start_date} to {self.end_date}")
|
||||
|
||||
# Fetch prices in batches
|
||||
for ticker in ticker_list:
|
||||
try:
|
||||
df = pykrx_stock.get_market_ohlcv(
|
||||
@ -81,22 +160,19 @@ class PriceCollector(BaseCollector):
|
||||
df.columns = ["date", "open", "high", "low", "close", "volume",
|
||||
"value"]
|
||||
|
||||
# Validate column count
|
||||
expected_cols = 7 # date + 6 data columns
|
||||
expected_cols = 7
|
||||
if len(df.columns) < expected_cols:
|
||||
logger.warning(f"Unexpected column count for {ticker}: {len(df.columns)}")
|
||||
continue
|
||||
|
||||
records = []
|
||||
for _, row in df.iterrows():
|
||||
# Safely convert values with type checking
|
||||
open_val = self._safe_float(row["open"])
|
||||
high_val = self._safe_float(row["high"])
|
||||
low_val = self._safe_float(row["low"])
|
||||
close_val = self._safe_float(row["close"])
|
||||
volume_val = self._safe_int(row["volume"])
|
||||
|
||||
# Skip if essential values are missing
|
||||
if close_val is None:
|
||||
logger.debug(f"Skipping record for {ticker}: missing close price")
|
||||
continue
|
||||
@ -125,7 +201,7 @@ class PriceCollector(BaseCollector):
|
||||
},
|
||||
)
|
||||
self.db.execute(stmt)
|
||||
self.db.commit() # Commit per ticker
|
||||
self.db.commit()
|
||||
total_records += len(records)
|
||||
|
||||
except JSONDecodeError as e:
|
||||
@ -140,5 +216,21 @@ class PriceCollector(BaseCollector):
|
||||
logger.warning(f"Failed to fetch prices for {ticker}: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"Collected {total_records} price records")
|
||||
return total_records
|
||||
|
||||
def collect(self) -> int:
|
||||
"""Collect price data for all stocks."""
|
||||
client = get_krx_client()
|
||||
if client:
|
||||
try:
|
||||
logger.info("Collecting stock prices via KRX Open API")
|
||||
total = self._collect_openapi()
|
||||
logger.info(f"Collected {total} price records via Open API")
|
||||
return total
|
||||
except Exception as e:
|
||||
logger.warning(f"KRX Open API failed, falling back to pykrx: {e}")
|
||||
|
||||
logger.info("Collecting prices via pykrx (scraping)")
|
||||
total = self._collect_pykrx()
|
||||
logger.info(f"Collected {total} price records")
|
||||
return total
|
||||
|
||||
@ -1,24 +1,25 @@
|
||||
"""
|
||||
Stock data collector using pykrx.
|
||||
Stock data collector.
|
||||
|
||||
Uses KRX Open API when KRX_OPENAPI_KEY is set, falls back to pykrx scraping.
|
||||
"""
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from json import JSONDecodeError
|
||||
|
||||
import pandas as pd
|
||||
from pykrx import stock as pykrx_stock
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
|
||||
from app.services.collectors.base import BaseCollector
|
||||
from app.services.krx_client import get_krx_client
|
||||
from app.models.stock import Stock, StockType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class StockCollector(BaseCollector):
|
||||
"""Collects stock master data using pykrx."""
|
||||
"""Collects stock master data."""
|
||||
|
||||
def __init__(self, db: Session, biz_day: str = None):
|
||||
super().__init__(db)
|
||||
@ -52,9 +53,65 @@ class StockCollector(BaseCollector):
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
def collect(self) -> int:
|
||||
"""Collect stock master data."""
|
||||
# Get tickers per market (also caches ticker-name mappings internally)
|
||||
def _collect_openapi(self) -> int:
|
||||
"""Collect stock data via KRX Open API."""
|
||||
client = get_krx_client()
|
||||
base_date = datetime.strptime(self.biz_day, "%Y%m%d").date()
|
||||
records = []
|
||||
|
||||
for market in ("KOSPI", "KOSDAQ"):
|
||||
# Fetch base info (ticker, name, listed shares)
|
||||
base_df = client.get_stock_base_info(self.biz_day, market=market)
|
||||
# Fetch daily trade (close, market cap)
|
||||
trade_df = client.get_stock_daily(self.biz_day, market=market)
|
||||
|
||||
if base_df is None or base_df.empty:
|
||||
logger.warning(f"No stock base info from Open API for {market}")
|
||||
continue
|
||||
|
||||
# Index trade_df by ticker for quick lookup
|
||||
trade_map = {}
|
||||
if trade_df is not None and not trade_df.empty:
|
||||
for _, row in trade_df.iterrows():
|
||||
ticker = str(row.get("ISU_SRT_CD", ""))
|
||||
if ticker:
|
||||
trade_map[ticker] = row
|
||||
|
||||
for _, row in base_df.iterrows():
|
||||
ticker = str(row.get("ISU_SRT_CD", ""))
|
||||
name = str(row.get("ISU_ABBRV", ""))
|
||||
if not ticker or not name:
|
||||
continue
|
||||
|
||||
close_price = None
|
||||
market_cap = None
|
||||
trade_row = trade_map.get(ticker)
|
||||
if trade_row is not None:
|
||||
close_price = self._safe_value(trade_row.get("TDD_CLSPRC"))
|
||||
market_cap = self._safe_value(trade_row.get("MKTCAP"))
|
||||
|
||||
stock_type = self._classify_stock_type(name, ticker)
|
||||
|
||||
records.append({
|
||||
"ticker": ticker,
|
||||
"name": name,
|
||||
"market": market,
|
||||
"close_price": close_price,
|
||||
"market_cap": market_cap,
|
||||
"eps": None,
|
||||
"forward_eps": None,
|
||||
"bps": None,
|
||||
"dividend_per_share": None,
|
||||
"stock_type": stock_type,
|
||||
"base_date": base_date,
|
||||
})
|
||||
|
||||
return self._upsert_records(records)
|
||||
|
||||
def _collect_pykrx(self) -> int:
|
||||
"""Collect stock data via pykrx scraping."""
|
||||
from pykrx import stock as pykrx_stock
|
||||
|
||||
try:
|
||||
kospi_tickers = pykrx_stock.get_market_ticker_list(self.biz_day, market="KOSPI")
|
||||
kosdaq_tickers = pykrx_stock.get_market_ticker_list(self.biz_day, market="KOSDAQ")
|
||||
@ -74,7 +131,6 @@ class StockCollector(BaseCollector):
|
||||
logger.warning("No tickers found from pykrx.")
|
||||
return 0
|
||||
|
||||
# Fetch bulk data
|
||||
try:
|
||||
cap_df = pykrx_stock.get_market_cap_by_ticker(self.biz_day)
|
||||
except (JSONDecodeError, KeyError, ConnectionError, ValueError) as e:
|
||||
@ -125,7 +181,10 @@ class StockCollector(BaseCollector):
|
||||
"base_date": base_date,
|
||||
})
|
||||
|
||||
# Upsert using PostgreSQL INSERT ON CONFLICT
|
||||
return self._upsert_records(records)
|
||||
|
||||
def _upsert_records(self, records: list[dict]) -> int:
|
||||
"""Upsert stock records into the database."""
|
||||
if records:
|
||||
stmt = insert(Stock).values(records)
|
||||
stmt = stmt.on_conflict_do_update(
|
||||
@ -148,3 +207,16 @@ class StockCollector(BaseCollector):
|
||||
|
||||
logger.info(f"Collected {len(records)} stock records")
|
||||
return len(records)
|
||||
|
||||
def collect(self) -> int:
|
||||
"""Collect stock master data."""
|
||||
client = get_krx_client()
|
||||
if client:
|
||||
try:
|
||||
logger.info("Collecting stock data via KRX Open API")
|
||||
return self._collect_openapi()
|
||||
except Exception as e:
|
||||
logger.warning(f"KRX Open API failed, falling back to pykrx: {e}")
|
||||
|
||||
logger.info("Collecting stock data via pykrx (scraping)")
|
||||
return self._collect_pykrx()
|
||||
|
||||
67
backend/app/services/krx_client.py
Normal file
67
backend/app/services/krx_client.py
Normal file
@ -0,0 +1,67 @@
|
||||
"""
|
||||
KRX Open API client wrapper.
|
||||
|
||||
Uses pykrx-openapi library to access KRX official REST API.
|
||||
Activated when KRX_OPENAPI_KEY environment variable is set.
|
||||
"""
|
||||
import os
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
import pandas as pd
|
||||
from pykrx_openapi import KRXOpenAPI
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class KRXClient:
|
||||
"""Thin wrapper around pykrx-openapi with project defaults."""
|
||||
|
||||
def __init__(self, api_key: str | None = None):
|
||||
key = api_key or os.getenv("KRX_OPENAPI_KEY")
|
||||
if not key:
|
||||
raise ValueError("KRX_OPENAPI_KEY is required for KRXClient")
|
||||
self.client = KRXOpenAPI(api_key=key)
|
||||
|
||||
def _to_date_str(self, date_input: str) -> str:
|
||||
"""Ensure date string is in YYYYMMDD format."""
|
||||
return date_input.replace("-", "")
|
||||
|
||||
def _records_to_df(self, result: dict) -> pd.DataFrame:
|
||||
"""Convert API response dict to DataFrame."""
|
||||
records = result.get("OutBlock_1", [])
|
||||
if not records:
|
||||
return pd.DataFrame()
|
||||
return pd.DataFrame(records)
|
||||
|
||||
def get_etf_daily(self, bas_dd: str) -> pd.DataFrame:
|
||||
"""Fetch all ETF daily trade data for a given date."""
|
||||
bas_dd = self._to_date_str(bas_dd)
|
||||
result = self.client.get_etf_daily_trade(bas_dd=bas_dd)
|
||||
return self._records_to_df(result)
|
||||
|
||||
def get_stock_daily(self, bas_dd: str, market: str = "KOSPI") -> pd.DataFrame:
|
||||
"""Fetch stock daily trade data for a given date and market."""
|
||||
bas_dd = self._to_date_str(bas_dd)
|
||||
if market == "KOSDAQ":
|
||||
result = self.client.get_kosdaq_stock_daily_trade(bas_dd=bas_dd)
|
||||
else:
|
||||
result = self.client.get_stock_daily_trade(bas_dd=bas_dd)
|
||||
return self._records_to_df(result)
|
||||
|
||||
def get_stock_base_info(self, bas_dd: str, market: str = "KOSPI") -> pd.DataFrame:
|
||||
"""Fetch stock base info for a given date and market."""
|
||||
bas_dd = self._to_date_str(bas_dd)
|
||||
if market == "KOSDAQ":
|
||||
result = self.client.get_kosdaq_stock_base_info(bas_dd=bas_dd)
|
||||
else:
|
||||
result = self.client.get_stock_base_info(bas_dd=bas_dd)
|
||||
return self._records_to_df(result)
|
||||
|
||||
|
||||
def get_krx_client() -> KRXClient | None:
|
||||
"""Return KRXClient if KRX_OPENAPI_KEY is set, else None."""
|
||||
key = os.getenv("KRX_OPENAPI_KEY")
|
||||
if key:
|
||||
return KRXClient(api_key=key)
|
||||
return None
|
||||
@ -18,6 +18,7 @@ dependencies = [
|
||||
"apscheduler==3.11.2",
|
||||
"setuptools",
|
||||
"pykrx>=1.2.6",
|
||||
"pykrx-openapi",
|
||||
"requests==2.32.5",
|
||||
"beautifulsoup4==4.14.3",
|
||||
"lxml==6.0.2",
|
||||
|
||||
@ -1,8 +1,7 @@
|
||||
"""
|
||||
Generate portfolio snapshots from trade history using actual market prices.
|
||||
|
||||
Fetches closing prices from KRX (via pykrx) for each snapshot date,
|
||||
then computes portfolio value based on cumulative holdings at that point.
|
||||
Uses KRX Open API when KRX_OPENAPI_KEY is set, falls back to pykrx scraping.
|
||||
|
||||
Snapshot dates: end of each month where trades occurred, plus latest available.
|
||||
|
||||
@ -11,7 +10,8 @@ Usage:
|
||||
|
||||
Requires:
|
||||
- DATABASE_URL environment variable
|
||||
- KRX_ID / KRX_PW environment variables (pykrx >= 1.2.5)
|
||||
- KRX_OPENAPI_KEY environment variable (preferred)
|
||||
- KRX_ID / KRX_PW environment variables (pykrx fallback)
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
@ -24,10 +24,9 @@ from json import JSONDecodeError
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from pykrx import stock as pykrx_stock
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
from app.core.database import SessionLocal
|
||||
from app.services.krx_client import get_krx_client
|
||||
from app.models.portfolio import (
|
||||
Portfolio, PortfolioSnapshot, SnapshotHolding,
|
||||
)
|
||||
@ -103,17 +102,14 @@ def _generate_snapshot_dates() -> list[date]:
|
||||
today = date.today()
|
||||
|
||||
dates = []
|
||||
# Month-end dates
|
||||
current = date(first_date.year, first_date.month, 1)
|
||||
while current <= today:
|
||||
# Last day of month
|
||||
if current.month == 12:
|
||||
next_month = date(current.year + 1, 1, 1)
|
||||
else:
|
||||
next_month = date(current.year, current.month + 1, 1)
|
||||
last_day = next_month - timedelta(days=1)
|
||||
|
||||
# Only include if we have holdings at this date
|
||||
if last_day >= first_date and last_day <= today:
|
||||
dates.append(last_day)
|
||||
|
||||
@ -122,17 +118,45 @@ def _generate_snapshot_dates() -> list[date]:
|
||||
return dates
|
||||
|
||||
|
||||
def _fetch_price_with_retry(ticker: str, date_str: str, max_retries: int = 3) -> Decimal | None:
|
||||
"""Fetch closing price for a ticker on a date, with fallback to previous days."""
|
||||
def _fetch_price_openapi(ticker: str, date_str: str) -> Decimal | None:
|
||||
"""Fetch closing price via KRX Open API."""
|
||||
client = get_krx_client()
|
||||
if not client:
|
||||
return None
|
||||
|
||||
target = datetime.strptime(date_str, "%Y%m%d").date()
|
||||
|
||||
for day_offset in range(5): # Try up to 5 days back (weekends/holidays)
|
||||
for day_offset in range(5):
|
||||
try_date = target - timedelta(days=day_offset)
|
||||
try_date_str = try_date.strftime("%Y%m%d")
|
||||
|
||||
try:
|
||||
df = client.get_etf_daily(try_date_str)
|
||||
if df is not None and not df.empty:
|
||||
match = df[df["ISU_SRT_CD"] == ticker]
|
||||
if not match.empty:
|
||||
close = match.iloc[0].get("TDD_CLSPRC")
|
||||
if close and float(close) > 0:
|
||||
return Decimal(str(int(float(close))))
|
||||
except Exception as e:
|
||||
logger.warning(f"Open API fetch for {ticker} on {try_date_str}: {e}")
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _fetch_price_pykrx(ticker: str, date_str: str, max_retries: int = 3) -> Decimal | None:
|
||||
"""Fetch closing price via pykrx scraping."""
|
||||
from pykrx import stock as pykrx_stock
|
||||
|
||||
target = datetime.strptime(date_str, "%Y%m%d").date()
|
||||
|
||||
for day_offset in range(5):
|
||||
try_date = target - timedelta(days=day_offset)
|
||||
try_date_str = try_date.strftime("%Y%m%d")
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
# For ETFs, use get_etf_ohlcv_by_date
|
||||
df = pykrx_stock.get_etf_ohlcv_by_date(try_date_str, try_date_str, ticker)
|
||||
if df is not None and not df.empty:
|
||||
close = df.iloc[0]["종가"]
|
||||
@ -160,15 +184,25 @@ def _fetch_price_with_retry(ticker: str, date_str: str, max_retries: int = 3) ->
|
||||
return None
|
||||
|
||||
|
||||
def _fetch_price_with_retry(ticker: str, date_str: str, max_retries: int = 3) -> Decimal | None:
|
||||
"""Fetch closing price, preferring Open API with pykrx fallback."""
|
||||
client = get_krx_client()
|
||||
if client:
|
||||
price = _fetch_price_openapi(ticker, date_str)
|
||||
if price:
|
||||
return price
|
||||
logger.warning(f"Open API failed for {ticker} on {date_str}, trying pykrx")
|
||||
|
||||
return _fetch_price_pykrx(ticker, date_str, max_retries)
|
||||
|
||||
|
||||
def generate_snapshots(db: Session):
|
||||
"""Generate portfolio snapshots from trade history with actual market prices."""
|
||||
# Find portfolio
|
||||
portfolio = db.query(Portfolio).filter(Portfolio.name == "연금 포트폴리오").first()
|
||||
if not portfolio:
|
||||
logger.error("Portfolio '연금 포트폴리오' not found. Run seed_data.py first.")
|
||||
return
|
||||
|
||||
# Delete existing snapshots
|
||||
existing = db.query(PortfolioSnapshot).filter(
|
||||
PortfolioSnapshot.portfolio_id == portfolio.id
|
||||
).all()
|
||||
@ -192,7 +226,6 @@ def generate_snapshots(db: Session):
|
||||
date_str = snap_date.strftime("%Y%m%d")
|
||||
logger.info(f"Processing {snap_date} ({len(holdings)} tickers)...")
|
||||
|
||||
# Fetch prices for all held tickers
|
||||
prices: dict[str, Decimal] = {}
|
||||
for ticker in holdings:
|
||||
price = _fetch_price_with_retry(ticker, date_str)
|
||||
@ -205,7 +238,6 @@ def generate_snapshots(db: Session):
|
||||
logger.warning(f" Skipping {snap_date}: no prices available")
|
||||
continue
|
||||
|
||||
# Calculate portfolio value
|
||||
total_value = Decimal("0")
|
||||
snapshot_holdings = []
|
||||
|
||||
@ -224,7 +256,6 @@ def generate_snapshots(db: Session):
|
||||
if total_value == 0:
|
||||
continue
|
||||
|
||||
# Create snapshot
|
||||
snapshot = PortfolioSnapshot(
|
||||
portfolio_id=portfolio.id,
|
||||
total_value=total_value,
|
||||
@ -233,7 +264,6 @@ def generate_snapshots(db: Session):
|
||||
db.add(snapshot)
|
||||
db.flush()
|
||||
|
||||
# Create snapshot holdings with ratios
|
||||
for h in snapshot_holdings:
|
||||
ratio = (h["value"] / total_value * 100).quantize(Decimal("0.01"), rounding=ROUND_HALF_UP)
|
||||
db.add(SnapshotHolding(
|
||||
@ -248,7 +278,6 @@ def generate_snapshots(db: Session):
|
||||
created += 1
|
||||
logger.info(f" Snapshot {snap_date}: total={total_value:,.0f}")
|
||||
|
||||
# Rate limit: be gentle with KRX
|
||||
time.sleep(1)
|
||||
|
||||
db.commit()
|
||||
|
||||
@ -39,14 +39,15 @@ def db():
|
||||
Base.metadata.drop_all(bind=engine)
|
||||
|
||||
|
||||
# ── ETFCollector Tests ──
|
||||
# ── ETFCollector Tests (pykrx fallback path, no KRX_OPENAPI_KEY) ──
|
||||
|
||||
|
||||
class TestETFCollectorResilience:
|
||||
|
||||
@patch("app.services.collectors.etf_collector.get_krx_client", return_value=None)
|
||||
@patch("app.services.collectors.etf_collector.time.sleep")
|
||||
@patch("app.services.collectors.etf_collector.ETF_전종목기본종목")
|
||||
def test_json_decode_error_retries_once(self, mock_etf_cls, mock_sleep, db):
|
||||
@patch("pykrx.website.krx.etx.core.ETF_전종목기본종목")
|
||||
def test_json_decode_error_retries_once(self, mock_etf_cls, mock_sleep, mock_client, db):
|
||||
"""JSONDecodeError on both attempts raises KRXDataError with login hint."""
|
||||
mock_fetcher = MagicMock()
|
||||
mock_fetcher.fetch.side_effect = [
|
||||
@ -62,9 +63,10 @@ class TestETFCollectorResilience:
|
||||
assert mock_fetcher.fetch.call_count == 2
|
||||
mock_sleep.assert_called_once_with(3)
|
||||
|
||||
@patch("app.services.collectors.etf_collector.get_krx_client", return_value=None)
|
||||
@patch("app.services.collectors.etf_collector.time.sleep")
|
||||
@patch("app.services.collectors.etf_collector.ETF_전종목기본종목")
|
||||
def test_connection_error_retries_and_raises(self, mock_etf_cls, mock_sleep, db):
|
||||
@patch("pykrx.website.krx.etx.core.ETF_전종목기본종목")
|
||||
def test_connection_error_retries_and_raises(self, mock_etf_cls, mock_sleep, mock_client, db):
|
||||
"""ConnectionError on both attempts raises KRXDataError."""
|
||||
mock_fetcher = MagicMock()
|
||||
mock_fetcher.fetch.side_effect = ConnectionError("timeout")
|
||||
@ -76,9 +78,10 @@ class TestETFCollectorResilience:
|
||||
|
||||
assert mock_fetcher.fetch.call_count == 2
|
||||
|
||||
@patch("app.services.collectors.etf_collector.get_krx_client", return_value=None)
|
||||
@patch("app.services.collectors.etf_collector.time.sleep")
|
||||
@patch("app.services.collectors.etf_collector.ETF_전종목기본종목")
|
||||
def test_retry_succeeds_on_second_attempt(self, mock_etf_cls, mock_sleep, db):
|
||||
@patch("pykrx.website.krx.etx.core.ETF_전종목기본종목")
|
||||
def test_retry_succeeds_on_second_attempt(self, mock_etf_cls, mock_sleep, mock_client, db):
|
||||
"""If first attempt fails but retry succeeds, data is processed normally."""
|
||||
good_df = pd.DataFrame([{
|
||||
"ISU_SRT_CD": "069500",
|
||||
@ -99,9 +102,10 @@ class TestETFCollectorResilience:
|
||||
|
||||
assert result == 1
|
||||
|
||||
@patch("app.services.collectors.etf_collector.get_krx_client", return_value=None)
|
||||
@patch("app.services.collectors.etf_collector.time.sleep")
|
||||
@patch("app.services.collectors.etf_collector.ETF_전종목기본종목")
|
||||
def test_failure_does_not_delete_existing_data(self, mock_etf_cls, mock_sleep, db):
|
||||
@patch("pykrx.website.krx.etx.core.ETF_전종목기본종목")
|
||||
def test_failure_does_not_delete_existing_data(self, mock_etf_cls, mock_sleep, mock_client, db):
|
||||
"""Existing ETF records are preserved when fetch fails."""
|
||||
db.execute(
|
||||
ETF.__table__.insert().values(
|
||||
@ -124,6 +128,59 @@ class TestETFCollectorResilience:
|
||||
assert existing.name == "KODEX 200"
|
||||
|
||||
|
||||
# ── ETFCollector Open API Tests ──
|
||||
|
||||
|
||||
class TestETFCollectorOpenAPI:
|
||||
|
||||
@patch("app.services.collectors.etf_collector.get_krx_client")
|
||||
@patch("app.services.collectors.base.BaseCollector._get_latest_biz_day", return_value="20260417")
|
||||
def test_openapi_success(self, mock_biz, mock_get_client, db):
|
||||
"""Open API returns valid ETF data."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.get_etf_daily.return_value = pd.DataFrame([{
|
||||
"ISU_SRT_CD": "069500",
|
||||
"ISU_ABBRV": "KODEX 200",
|
||||
"TDD_CLSPRC": 35000.0,
|
||||
"ACC_TRDVOL": 1000000,
|
||||
}])
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
collector = ETFCollector(db)
|
||||
result = collector.collect()
|
||||
|
||||
assert result == 1
|
||||
etf = db.query(ETF).filter_by(ticker="069500").first()
|
||||
assert etf is not None
|
||||
assert etf.name == "KODEX 200"
|
||||
|
||||
@patch("app.services.collectors.etf_collector.get_krx_client")
|
||||
@patch("app.services.collectors.etf_collector.time.sleep")
|
||||
@patch("pykrx.website.krx.etx.core.ETF_전종목기본종목")
|
||||
@patch("app.services.collectors.base.BaseCollector._get_latest_biz_day", return_value="20260417")
|
||||
def test_openapi_failure_falls_back_to_pykrx(self, mock_biz, mock_etf_cls, mock_sleep, mock_get_client, db):
|
||||
"""When Open API fails, falls back to pykrx scraping."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.get_etf_daily.side_effect = Exception("API down")
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
good_df = pd.DataFrame([{
|
||||
"ISU_SRT_CD": "069500",
|
||||
"ISU_ABBRV": "KODEX 200",
|
||||
"IDX_ASST_CLSS_NM": "주식",
|
||||
"IDX_MKT_CLSS_NM": "코스피",
|
||||
"ETF_TOT_FEE": "0.15",
|
||||
}])
|
||||
mock_fetcher = MagicMock()
|
||||
mock_fetcher.fetch.return_value = good_df
|
||||
mock_etf_cls.return_value = mock_fetcher
|
||||
|
||||
collector = ETFCollector(db)
|
||||
result = collector.collect()
|
||||
|
||||
assert result == 1
|
||||
|
||||
|
||||
# ── ValuationCollector Tests ──
|
||||
|
||||
|
||||
|
||||
14
backend/uv.lock
generated
14
backend/uv.lock
generated
@ -485,6 +485,7 @@ dependencies = [
|
||||
{ name = "pydantic-settings" },
|
||||
{ name = "pyjwt", extra = ["crypto"] },
|
||||
{ name = "pykrx" },
|
||||
{ name = "pykrx-openapi" },
|
||||
{ name = "python-multipart" },
|
||||
{ name = "requests" },
|
||||
{ name = "setuptools" },
|
||||
@ -521,6 +522,7 @@ requires-dist = [
|
||||
{ name = "pydantic-settings", specifier = "==2.12.0" },
|
||||
{ name = "pyjwt", extras = ["crypto"], specifier = "==2.11.0" },
|
||||
{ name = "pykrx", specifier = ">=1.2.6" },
|
||||
{ name = "pykrx-openapi" },
|
||||
{ name = "pytest", marker = "extra == 'dev'", specifier = "==8.3.4" },
|
||||
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==1.1.0" },
|
||||
{ name = "python-multipart", specifier = "==0.0.22" },
|
||||
@ -1290,6 +1292,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/7a/6a/5bd38daca2de1f514c861f9b175f2b11b4396fdfec3785110f56f1132c01/pykrx-1.2.6-py3-none-any.whl", hash = "sha256:3abb819efe501d2fab055ed913ebd407fa185bb74155378c1e2f831a0ed15398", size = 2196530, upload-time = "2026-04-14T10:17:40.063Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pykrx-openapi"
|
||||
version = "0.1.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "requests" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/9a/2d/893d9a77ac2e66e2c7dcdebe046e991a162b7a026942f8b2d0b4d5f84656/pykrx_openapi-0.1.1.tar.gz", hash = "sha256:891c89ddbc1a8e99044f3201b868bb7530b3e38453096834169615588c79ed06", size = 16062756, upload-time = "2026-01-20T13:58:55.672Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/49/b3/ae37e596307591f6305d89573103d21766337ffb4b65bdf681e9331c792b/pykrx_openapi-0.1.1-py3-none-any.whl", hash = "sha256:fd2a65224645db0fbec2d173ed4140d1bb25040a0a8de5aee3500ebd35114718", size = 12594, upload-time = "2026-01-20T13:58:53.438Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyparsing"
|
||||
version = "3.3.2"
|
||||
|
||||
97
docs/plans/2026-04-17-krx-openapi-migration.md
Normal file
97
docs/plans/2026-04-17-krx-openapi-migration.md
Normal file
@ -0,0 +1,97 @@
|
||||
# KRX Open API 전환 설계
|
||||
|
||||
## 배경
|
||||
- pykrx는 KRX 웹 스크래핑 방식으로 불안정 (로그인 필수화, 구조 변경 시 깨짐)
|
||||
- KRX Open API (openapi.krx.co.kr) 공식 REST API로 전환하여 안정성 확보
|
||||
- `pykrx-openapi` 라이브러리 활용 (MIT, pip install pykrx-openapi)
|
||||
|
||||
## 전환 범위
|
||||
|
||||
### 현재 pykrx 의존 Collector (5개)
|
||||
| Collector | pykrx 함수 | KRX Open API 대체 |
|
||||
|---|---|---|
|
||||
| StockCollector | `get_market_ticker_list`, `get_market_cap_by_ticker`, `get_market_fundamental_by_ticker` | `get_stock_base_info`, `get_stock_daily_trade`, `get_kosdaq_stock_daily_trade` |
|
||||
| PriceCollector | `get_market_ohlcv` | `get_stock_daily_trade`, `get_kosdaq_stock_daily_trade` |
|
||||
| ValuationCollector | `get_market_fundamental_by_ticker` | pykrx 유지 또는 별도 소스 (Open API에 PER/PBR 없음) |
|
||||
| ETFCollector | `ETF_전종목기본종목().fetch()` | `get_etf_daily_trade` (종목 목록 겸용) |
|
||||
| ETFPriceCollector | `get_etf_ohlcv_by_date` | `get_etf_daily_trade` |
|
||||
|
||||
### 주의: ValuationCollector
|
||||
KRX Open API 서비스 목록에 PER/PBR/배당수익률 API가 없음.
|
||||
→ ValuationCollector는 pykrx(스크래핑) 유지 또는 네이버/FnGuide 등 대체 소스 검토.
|
||||
→ 1차 전환에서는 pykrx fallback으로 유지.
|
||||
|
||||
### 스크립트 의존
|
||||
- `scripts/generate_snapshots.py` — `pykrx_stock.get_etf_ohlcv_by_date` 사용
|
||||
- `jobs/kjb_signal_job.py` — `Price.ticker == "069500"` (DB 조회, pykrx 직접 의존 없음)
|
||||
- `app/services/optimizer.py` — DB 조회만, pykrx 직접 의존 없음
|
||||
|
||||
## 구현 계획
|
||||
|
||||
### 1. KRX Open API 클라이언트 모듈 생성
|
||||
**파일:** `backend/app/services/krx_client.py`
|
||||
|
||||
```python
|
||||
from pykrx_openapi import KRXOpenAPI
|
||||
|
||||
class KRXClient:
|
||||
"""Thin wrapper around pykrx-openapi with project defaults."""
|
||||
|
||||
def __init__(self, api_key: str = None):
|
||||
self.client = KRXOpenAPI(
|
||||
api_key=api_key or os.getenv("KRX_OPENAPI_KEY"),
|
||||
rate_limit=10,
|
||||
per_seconds=1,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
def get_etf_daily(self, date: str) -> pd.DataFrame: ...
|
||||
def get_stock_daily(self, date: str, market: str) -> pd.DataFrame: ...
|
||||
def get_stock_base_info(self, date: str, market: str) -> pd.DataFrame: ...
|
||||
```
|
||||
|
||||
### 2. Collector 전환 (4개)
|
||||
각 collector에 `KRX_OPENAPI_KEY` 환경변수가 있으면 Open API 사용, 없으면 pykrx fallback.
|
||||
|
||||
- **ETFCollector** → `get_etf_daily_trade` 로 전종목 ETF 목록 + 기본정보 취득
|
||||
- **ETFPriceCollector** → `get_etf_daily_trade` 로 종가/거래량 취득
|
||||
- **StockCollector** → `get_stock_base_info` + `get_stock_daily_trade` + `get_kosdaq_stock_daily_trade`
|
||||
- **PriceCollector** → `get_stock_daily_trade` + `get_kosdaq_stock_daily_trade`
|
||||
|
||||
### 3. ValuationCollector
|
||||
1차: pykrx 유지 (KRX_ID/KRX_PW 사용)
|
||||
향후: 네이버 금융 또는 FnGuide 스크래핑으로 전환 검토
|
||||
|
||||
### 4. generate_snapshots.py 전환
|
||||
`pykrx_stock.get_etf_ohlcv_by_date` → `KRXClient.get_etf_daily`
|
||||
|
||||
### 5. 의존성 변경
|
||||
- `pyproject.toml`: `pykrx-openapi` 추가
|
||||
- `pykrx` 는 ValuationCollector 용으로 유지
|
||||
- `.env.example`: `KRX_OPENAPI_KEY` 추가
|
||||
- Gitea Secrets: `KRX_OPENAPI_KEY` 추가
|
||||
|
||||
### 6. 테스트
|
||||
- 기존 unit test 업데이트 (mock 대상 변경)
|
||||
- KRX Open API mock으로 테스트
|
||||
|
||||
## 환경 변수
|
||||
```
|
||||
KRX_OPENAPI_KEY=xxx # KRX Open API 인증키 (신규)
|
||||
KRX_ID=xxx # pykrx 웹 로그인 (ValuationCollector용, 유지)
|
||||
KRX_PW=xxx # pykrx 웹 로그인 (ValuationCollector용, 유지)
|
||||
```
|
||||
|
||||
## 롤백 전략
|
||||
- `KRX_OPENAPI_KEY` 환경변수를 제거하면 자동으로 pykrx fallback
|
||||
- 기존 pykrx 코드는 삭제하지 않고 fallback으로 유지
|
||||
|
||||
## 작업 완료 조건
|
||||
- [ ] KRX Open API 클라이언트 모듈
|
||||
- [ ] ETFCollector 전환
|
||||
- [ ] ETFPriceCollector 전환
|
||||
- [ ] StockCollector 전환
|
||||
- [ ] PriceCollector 전환
|
||||
- [ ] generate_snapshots.py 전환
|
||||
- [ ] 테스트 통과
|
||||
- [ ] .env.example / deploy.yml 업데이트
|
||||
Loading…
x
Reference in New Issue
Block a user