From ecb3dca571e7040d2af60f2bce012c27e0b8b9f2 Mon Sep 17 00:00:00 2001 From: zephyrdark Date: Sun, 8 Feb 2026 23:00:27 +0900 Subject: [PATCH] feat: add ETF data collectors and admin API endpoints Add ETFCollector (KRX master data) and ETFPriceCollector (pykrx OHLCV) with corresponding admin API endpoints and frontend collection UI buttons. Co-Authored-By: Claude Opus 4.6 --- backend/app/api/admin.py | 38 ++++++ backend/app/services/collectors/__init__.py | 4 + .../app/services/collectors/etf_collector.py | 92 ++++++++++++++ .../collectors/etf_price_collector.py | 116 ++++++++++++++++++ frontend/src/app/admin/data/page.tsx | 4 +- 5 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 backend/app/services/collectors/etf_collector.py create mode 100644 backend/app/services/collectors/etf_price_collector.py diff --git a/backend/app/api/admin.py b/backend/app/api/admin.py index f36d3d8..c6e4916 100644 --- a/backend/app/api/admin.py +++ b/backend/app/api/admin.py @@ -16,6 +16,8 @@ from app.services.collectors import ( SectorCollector, PriceCollector, ValuationCollector, + ETFCollector, + ETFPriceCollector, ) router = APIRouter(prefix="/api/admin", tags=["admin"]) @@ -112,6 +114,42 @@ async def collect_valuations( raise HTTPException(status_code=500, detail=str(e)) +@router.post("/collect/etfs", response_model=CollectResponse) +async def collect_etfs( + current_user: CurrentUser, + db: Session = Depends(get_db), +): + """Collect ETF master data from KRX.""" + try: + collector = ETFCollector(db) + job_log = collector.run() + return CollectResponse( + message=f"ETF collection completed: {job_log.records_count} records", + job_id=job_log.id, + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/collect/etf-prices", response_model=CollectResponse) +async def collect_etf_prices( + current_user: CurrentUser, + db: Session = Depends(get_db), + start_date: str = Query(None, pattern=r"^\d{8}$", description="Start date in YYYYMMDD format"), + end_date: str = Query(None, pattern=r"^\d{8}$", description="End date in YYYYMMDD format"), +): + """Collect ETF price data using pykrx.""" + try: + collector = ETFPriceCollector(db, start_date=start_date, end_date=end_date) + job_log = collector.run() + return CollectResponse( + message=f"ETF price collection completed: {job_log.records_count} records", + job_id=job_log.id, + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @router.get("/collect/status", response_model=List[JobLogResponse]) async def get_collection_status( current_user: CurrentUser, diff --git a/backend/app/services/collectors/__init__.py b/backend/app/services/collectors/__init__.py index 4db73b6..bdbb596 100644 --- a/backend/app/services/collectors/__init__.py +++ b/backend/app/services/collectors/__init__.py @@ -3,6 +3,8 @@ from app.services.collectors.stock_collector import StockCollector from app.services.collectors.sector_collector import SectorCollector from app.services.collectors.price_collector import PriceCollector from app.services.collectors.valuation_collector import ValuationCollector +from app.services.collectors.etf_collector import ETFCollector +from app.services.collectors.etf_price_collector import ETFPriceCollector __all__ = [ "BaseCollector", @@ -10,4 +12,6 @@ __all__ = [ "SectorCollector", "PriceCollector", "ValuationCollector", + "ETFCollector", + "ETFPriceCollector", ] diff --git a/backend/app/services/collectors/etf_collector.py b/backend/app/services/collectors/etf_collector.py new file mode 100644 index 0000000..acc1807 --- /dev/null +++ b/backend/app/services/collectors/etf_collector.py @@ -0,0 +1,92 @@ +""" +ETF master data collector from KRX. +""" +import logging + +import pandas as pd +from pykrx.website.krx.etf.core import ETF_전종목기본종목 + +from sqlalchemy.orm import Session +from sqlalchemy.dialects.postgresql import insert + +from app.services.collectors.base import BaseCollector +from app.models.stock import ETF, AssetClass + +logger = logging.getLogger(__name__) + + +class ETFCollector(BaseCollector): + """Collects ETF master data from KRX.""" + + def __init__(self, db: Session): + super().__init__(db) + + def _classify_asset_class(self, asset_class_str: str, name: str) -> str: + """Classify ETF asset class.""" + if not asset_class_str or pd.isna(asset_class_str): + return AssetClass.MIXED.value + + if "주식" in asset_class_str: + return AssetClass.EQUITY.value + elif "채권" in asset_class_str: + return AssetClass.BOND.value + elif "부동산" in asset_class_str: + return AssetClass.EQUITY.value + elif "원자재" in asset_class_str: + name_lower = name if name else "" + if "금" in name_lower or "골드" in name_lower: + return AssetClass.GOLD.value + return AssetClass.MIXED.value + else: + return AssetClass.MIXED.value + + def collect(self) -> int: + """Collect ETF master data.""" + df = ETF_전종목기본종목().read() + + if df.empty: + logger.warning("No ETF data returned from KRX.") + return 0 + + records = [] + for _, row in df.iterrows(): + ticker = row.get("ISU_SRT_CD") + name = row.get("ISU_ABBRV") + if not ticker or pd.isna(ticker) or not name or pd.isna(name): + continue + + asset_class_str = row.get("IDX_ASST_CLSS_NM", "") + market = row.get("IDX_MKT_CLSS_NM", "") + expense_ratio_raw = row.get("ETF_TOT_FEE") + + expense_ratio = None + if expense_ratio_raw and not pd.isna(expense_ratio_raw): + try: + expense_ratio = float(expense_ratio_raw) + except (ValueError, TypeError): + pass + + records.append({ + "ticker": ticker, + "name": name, + "asset_class": self._classify_asset_class(asset_class_str, name), + "market": market if market and not pd.isna(market) else "", + "expense_ratio": expense_ratio, + }) + + if records: + stmt = insert(ETF).values(records) + stmt = stmt.on_conflict_do_update( + index_elements=["ticker"], + set_={ + "name": stmt.excluded.name, + "asset_class": stmt.excluded.asset_class, + "market": stmt.excluded.market, + "expense_ratio": stmt.excluded.expense_ratio, + }, + ) + self.db.execute(stmt) + self.db.commit() + + logger.info(f"Collected {len(records)} ETF records") + return len(records) diff --git a/backend/app/services/collectors/etf_price_collector.py b/backend/app/services/collectors/etf_price_collector.py new file mode 100644 index 0000000..22d8c78 --- /dev/null +++ b/backend/app/services/collectors/etf_price_collector.py @@ -0,0 +1,116 @@ +""" +ETF price data collector using pykrx. +""" +import logging +from datetime import datetime, timedelta + +import pandas as pd +from pykrx import stock as pykrx_stock + +from sqlalchemy.orm import Session +from sqlalchemy.dialects.postgresql import insert + +from app.services.collectors.base import BaseCollector +from app.models.stock import ETFPrice, ETF + +logger = logging.getLogger(__name__) + + +class ETFPriceCollector(BaseCollector): + """Collects daily ETF price data.""" + + def __init__(self, db: Session, start_date: str = None, end_date: str = None): + super().__init__(db) + self.end_date = end_date or datetime.now().strftime("%Y%m%d") + self.start_date = start_date or ( + datetime.now() - timedelta(days=7) + ).strftime("%Y%m%d") + self._validate_dates() + + def _validate_dates(self) -> None: + """Validate date formats.""" + for date_str, name in [(self.start_date, "start_date"), (self.end_date, "end_date")]: + try: + datetime.strptime(date_str, "%Y%m%d") + except ValueError: + raise ValueError(f"Invalid {name} format. Expected YYYYMMDD, got: {date_str}") + + def _safe_float(self, value) -> float | None: + """Safely convert value to float.""" + if pd.isna(value): + return None + try: + return float(value) + except (ValueError, TypeError): + return None + + def _safe_int(self, value) -> int | None: + """Safely convert value to int.""" + if pd.isna(value): + return None + try: + return int(float(value)) + except (ValueError, TypeError): + return None + + def collect(self) -> int: + """Collect price data for all ETFs.""" + tickers = self.db.query(ETF.ticker).all() + ticker_list = [t[0] for t in tickers] + + if not ticker_list: + logger.warning("No ETFs found in database. Run ETFCollector first.") + return 0 + + total_records = 0 + logger.info(f"Collecting ETF prices for {len(ticker_list)} ETFs from {self.start_date} to {self.end_date}") + + for ticker in ticker_list: + try: + df = pykrx_stock.get_etf_ohlcv_by_date( + self.start_date, self.end_date, ticker + ) + if df.empty: + continue + + df = df.reset_index() + + records = [] + for _, row in df.iterrows(): + close_val = self._safe_float(row.get("종가")) + if close_val is None: + continue + + nav_val = self._safe_float(row.get("NAV")) + volume_val = self._safe_int(row.get("거래량")) + date_value = row["날짜"].date() if hasattr(row["날짜"], "date") else row["날짜"] + + records.append({ + "ticker": ticker, + "date": date_value, + "close": close_val, + "nav": nav_val, + "volume": volume_val, + }) + + if records: + stmt = insert(ETFPrice).values(records) + stmt = stmt.on_conflict_do_update( + index_elements=["ticker", "date"], + set_={ + "close": stmt.excluded.close, + "nav": stmt.excluded.nav, + "volume": stmt.excluded.volume, + }, + ) + self.db.execute(stmt) + self.db.commit() + total_records += len(records) + + except Exception as e: + self.db.rollback() + logger.warning(f"Failed to fetch ETF prices for {ticker}: {e}") + continue + + logger.info(f"Collected {total_records} ETF price records") + return total_records diff --git a/frontend/src/app/admin/data/page.tsx b/frontend/src/app/admin/data/page.tsx index 339bf54..2977cc7 100644 --- a/frontend/src/app/admin/data/page.tsx +++ b/frontend/src/app/admin/data/page.tsx @@ -22,6 +22,8 @@ const collectors = [ { key: 'sectors', label: '섹터 정보', description: 'WISEindex에서 섹터 분류 수집' }, { key: 'prices', label: '가격 데이터', description: 'pykrx로 OHLCV 데이터 수집' }, { key: 'valuations', label: '밸류 지표', description: 'KRX에서 PER/PBR 등 수집' }, + { key: 'etfs', label: 'ETF 마스터', description: 'KRX에서 ETF 종목 정보 수집' }, + { key: 'etf-prices', label: 'ETF 가격', description: 'pykrx로 ETF 가격 데이터 수집' }, ]; export default function DataManagementPage() { @@ -110,7 +112,7 @@ export default function DataManagementPage() { 수집 작업 -
+
{collectors.map((col) => (