feat: add ETF data collectors and admin API endpoints
Add ETFCollector (KRX master data) and ETFPriceCollector (pykrx OHLCV) with corresponding admin API endpoints and frontend collection UI buttons. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
72c72994b2
commit
ecb3dca571
@ -16,6 +16,8 @@ from app.services.collectors import (
|
|||||||
SectorCollector,
|
SectorCollector,
|
||||||
PriceCollector,
|
PriceCollector,
|
||||||
ValuationCollector,
|
ValuationCollector,
|
||||||
|
ETFCollector,
|
||||||
|
ETFPriceCollector,
|
||||||
)
|
)
|
||||||
|
|
||||||
router = APIRouter(prefix="/api/admin", tags=["admin"])
|
router = APIRouter(prefix="/api/admin", tags=["admin"])
|
||||||
@ -112,6 +114,42 @@ async def collect_valuations(
|
|||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/collect/etfs", response_model=CollectResponse)
|
||||||
|
async def collect_etfs(
|
||||||
|
current_user: CurrentUser,
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Collect ETF master data from KRX."""
|
||||||
|
try:
|
||||||
|
collector = ETFCollector(db)
|
||||||
|
job_log = collector.run()
|
||||||
|
return CollectResponse(
|
||||||
|
message=f"ETF collection completed: {job_log.records_count} records",
|
||||||
|
job_id=job_log.id,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/collect/etf-prices", response_model=CollectResponse)
|
||||||
|
async def collect_etf_prices(
|
||||||
|
current_user: CurrentUser,
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
start_date: str = Query(None, pattern=r"^\d{8}$", description="Start date in YYYYMMDD format"),
|
||||||
|
end_date: str = Query(None, pattern=r"^\d{8}$", description="End date in YYYYMMDD format"),
|
||||||
|
):
|
||||||
|
"""Collect ETF price data using pykrx."""
|
||||||
|
try:
|
||||||
|
collector = ETFPriceCollector(db, start_date=start_date, end_date=end_date)
|
||||||
|
job_log = collector.run()
|
||||||
|
return CollectResponse(
|
||||||
|
message=f"ETF price collection completed: {job_log.records_count} records",
|
||||||
|
job_id=job_log.id,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
@router.get("/collect/status", response_model=List[JobLogResponse])
|
@router.get("/collect/status", response_model=List[JobLogResponse])
|
||||||
async def get_collection_status(
|
async def get_collection_status(
|
||||||
current_user: CurrentUser,
|
current_user: CurrentUser,
|
||||||
|
|||||||
@ -3,6 +3,8 @@ from app.services.collectors.stock_collector import StockCollector
|
|||||||
from app.services.collectors.sector_collector import SectorCollector
|
from app.services.collectors.sector_collector import SectorCollector
|
||||||
from app.services.collectors.price_collector import PriceCollector
|
from app.services.collectors.price_collector import PriceCollector
|
||||||
from app.services.collectors.valuation_collector import ValuationCollector
|
from app.services.collectors.valuation_collector import ValuationCollector
|
||||||
|
from app.services.collectors.etf_collector import ETFCollector
|
||||||
|
from app.services.collectors.etf_price_collector import ETFPriceCollector
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"BaseCollector",
|
"BaseCollector",
|
||||||
@ -10,4 +12,6 @@ __all__ = [
|
|||||||
"SectorCollector",
|
"SectorCollector",
|
||||||
"PriceCollector",
|
"PriceCollector",
|
||||||
"ValuationCollector",
|
"ValuationCollector",
|
||||||
|
"ETFCollector",
|
||||||
|
"ETFPriceCollector",
|
||||||
]
|
]
|
||||||
|
|||||||
92
backend/app/services/collectors/etf_collector.py
Normal file
92
backend/app/services/collectors/etf_collector.py
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
"""
|
||||||
|
ETF master data collector from KRX.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from pykrx.website.krx.etf.core import ETF_전종목기본종목
|
||||||
|
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from sqlalchemy.dialects.postgresql import insert
|
||||||
|
|
||||||
|
from app.services.collectors.base import BaseCollector
|
||||||
|
from app.models.stock import ETF, AssetClass
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ETFCollector(BaseCollector):
|
||||||
|
"""Collects ETF master data from KRX."""
|
||||||
|
|
||||||
|
def __init__(self, db: Session):
|
||||||
|
super().__init__(db)
|
||||||
|
|
||||||
|
def _classify_asset_class(self, asset_class_str: str, name: str) -> str:
|
||||||
|
"""Classify ETF asset class."""
|
||||||
|
if not asset_class_str or pd.isna(asset_class_str):
|
||||||
|
return AssetClass.MIXED.value
|
||||||
|
|
||||||
|
if "주식" in asset_class_str:
|
||||||
|
return AssetClass.EQUITY.value
|
||||||
|
elif "채권" in asset_class_str:
|
||||||
|
return AssetClass.BOND.value
|
||||||
|
elif "부동산" in asset_class_str:
|
||||||
|
return AssetClass.EQUITY.value
|
||||||
|
elif "원자재" in asset_class_str:
|
||||||
|
name_lower = name if name else ""
|
||||||
|
if "금" in name_lower or "골드" in name_lower:
|
||||||
|
return AssetClass.GOLD.value
|
||||||
|
return AssetClass.MIXED.value
|
||||||
|
else:
|
||||||
|
return AssetClass.MIXED.value
|
||||||
|
|
||||||
|
def collect(self) -> int:
|
||||||
|
"""Collect ETF master data."""
|
||||||
|
df = ETF_전종목기본종목().read()
|
||||||
|
|
||||||
|
if df.empty:
|
||||||
|
logger.warning("No ETF data returned from KRX.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
records = []
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
ticker = row.get("ISU_SRT_CD")
|
||||||
|
name = row.get("ISU_ABBRV")
|
||||||
|
if not ticker or pd.isna(ticker) or not name or pd.isna(name):
|
||||||
|
continue
|
||||||
|
|
||||||
|
asset_class_str = row.get("IDX_ASST_CLSS_NM", "")
|
||||||
|
market = row.get("IDX_MKT_CLSS_NM", "")
|
||||||
|
expense_ratio_raw = row.get("ETF_TOT_FEE")
|
||||||
|
|
||||||
|
expense_ratio = None
|
||||||
|
if expense_ratio_raw and not pd.isna(expense_ratio_raw):
|
||||||
|
try:
|
||||||
|
expense_ratio = float(expense_ratio_raw)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
records.append({
|
||||||
|
"ticker": ticker,
|
||||||
|
"name": name,
|
||||||
|
"asset_class": self._classify_asset_class(asset_class_str, name),
|
||||||
|
"market": market if market and not pd.isna(market) else "",
|
||||||
|
"expense_ratio": expense_ratio,
|
||||||
|
})
|
||||||
|
|
||||||
|
if records:
|
||||||
|
stmt = insert(ETF).values(records)
|
||||||
|
stmt = stmt.on_conflict_do_update(
|
||||||
|
index_elements=["ticker"],
|
||||||
|
set_={
|
||||||
|
"name": stmt.excluded.name,
|
||||||
|
"asset_class": stmt.excluded.asset_class,
|
||||||
|
"market": stmt.excluded.market,
|
||||||
|
"expense_ratio": stmt.excluded.expense_ratio,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
self.db.execute(stmt)
|
||||||
|
self.db.commit()
|
||||||
|
|
||||||
|
logger.info(f"Collected {len(records)} ETF records")
|
||||||
|
return len(records)
|
||||||
116
backend/app/services/collectors/etf_price_collector.py
Normal file
116
backend/app/services/collectors/etf_price_collector.py
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
"""
|
||||||
|
ETF price data collector using pykrx.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from pykrx import stock as pykrx_stock
|
||||||
|
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from sqlalchemy.dialects.postgresql import insert
|
||||||
|
|
||||||
|
from app.services.collectors.base import BaseCollector
|
||||||
|
from app.models.stock import ETFPrice, ETF
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ETFPriceCollector(BaseCollector):
|
||||||
|
"""Collects daily ETF price data."""
|
||||||
|
|
||||||
|
def __init__(self, db: Session, start_date: str = None, end_date: str = None):
|
||||||
|
super().__init__(db)
|
||||||
|
self.end_date = end_date or datetime.now().strftime("%Y%m%d")
|
||||||
|
self.start_date = start_date or (
|
||||||
|
datetime.now() - timedelta(days=7)
|
||||||
|
).strftime("%Y%m%d")
|
||||||
|
self._validate_dates()
|
||||||
|
|
||||||
|
def _validate_dates(self) -> None:
|
||||||
|
"""Validate date formats."""
|
||||||
|
for date_str, name in [(self.start_date, "start_date"), (self.end_date, "end_date")]:
|
||||||
|
try:
|
||||||
|
datetime.strptime(date_str, "%Y%m%d")
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError(f"Invalid {name} format. Expected YYYYMMDD, got: {date_str}")
|
||||||
|
|
||||||
|
def _safe_float(self, value) -> float | None:
|
||||||
|
"""Safely convert value to float."""
|
||||||
|
if pd.isna(value):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return float(value)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _safe_int(self, value) -> int | None:
|
||||||
|
"""Safely convert value to int."""
|
||||||
|
if pd.isna(value):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return int(float(value))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def collect(self) -> int:
|
||||||
|
"""Collect price data for all ETFs."""
|
||||||
|
tickers = self.db.query(ETF.ticker).all()
|
||||||
|
ticker_list = [t[0] for t in tickers]
|
||||||
|
|
||||||
|
if not ticker_list:
|
||||||
|
logger.warning("No ETFs found in database. Run ETFCollector first.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
total_records = 0
|
||||||
|
logger.info(f"Collecting ETF prices for {len(ticker_list)} ETFs from {self.start_date} to {self.end_date}")
|
||||||
|
|
||||||
|
for ticker in ticker_list:
|
||||||
|
try:
|
||||||
|
df = pykrx_stock.get_etf_ohlcv_by_date(
|
||||||
|
self.start_date, self.end_date, ticker
|
||||||
|
)
|
||||||
|
if df.empty:
|
||||||
|
continue
|
||||||
|
|
||||||
|
df = df.reset_index()
|
||||||
|
|
||||||
|
records = []
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
close_val = self._safe_float(row.get("종가"))
|
||||||
|
if close_val is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
nav_val = self._safe_float(row.get("NAV"))
|
||||||
|
volume_val = self._safe_int(row.get("거래량"))
|
||||||
|
date_value = row["날짜"].date() if hasattr(row["날짜"], "date") else row["날짜"]
|
||||||
|
|
||||||
|
records.append({
|
||||||
|
"ticker": ticker,
|
||||||
|
"date": date_value,
|
||||||
|
"close": close_val,
|
||||||
|
"nav": nav_val,
|
||||||
|
"volume": volume_val,
|
||||||
|
})
|
||||||
|
|
||||||
|
if records:
|
||||||
|
stmt = insert(ETFPrice).values(records)
|
||||||
|
stmt = stmt.on_conflict_do_update(
|
||||||
|
index_elements=["ticker", "date"],
|
||||||
|
set_={
|
||||||
|
"close": stmt.excluded.close,
|
||||||
|
"nav": stmt.excluded.nav,
|
||||||
|
"volume": stmt.excluded.volume,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
self.db.execute(stmt)
|
||||||
|
self.db.commit()
|
||||||
|
total_records += len(records)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.db.rollback()
|
||||||
|
logger.warning(f"Failed to fetch ETF prices for {ticker}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info(f"Collected {total_records} ETF price records")
|
||||||
|
return total_records
|
||||||
@ -22,6 +22,8 @@ const collectors = [
|
|||||||
{ key: 'sectors', label: '섹터 정보', description: 'WISEindex에서 섹터 분류 수집' },
|
{ key: 'sectors', label: '섹터 정보', description: 'WISEindex에서 섹터 분류 수집' },
|
||||||
{ key: 'prices', label: '가격 데이터', description: 'pykrx로 OHLCV 데이터 수집' },
|
{ key: 'prices', label: '가격 데이터', description: 'pykrx로 OHLCV 데이터 수집' },
|
||||||
{ key: 'valuations', label: '밸류 지표', description: 'KRX에서 PER/PBR 등 수집' },
|
{ key: 'valuations', label: '밸류 지표', description: 'KRX에서 PER/PBR 등 수집' },
|
||||||
|
{ key: 'etfs', label: 'ETF 마스터', description: 'KRX에서 ETF 종목 정보 수집' },
|
||||||
|
{ key: 'etf-prices', label: 'ETF 가격', description: 'pykrx로 ETF 가격 데이터 수집' },
|
||||||
];
|
];
|
||||||
|
|
||||||
export default function DataManagementPage() {
|
export default function DataManagementPage() {
|
||||||
@ -110,7 +112,7 @@ export default function DataManagementPage() {
|
|||||||
<CardTitle>수집 작업</CardTitle>
|
<CardTitle>수집 작업</CardTitle>
|
||||||
</CardHeader>
|
</CardHeader>
|
||||||
<CardContent>
|
<CardContent>
|
||||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
|
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||||
{collectors.map((col) => (
|
{collectors.map((col) => (
|
||||||
<div
|
<div
|
||||||
key={col.key}
|
key={col.key}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user