fix: bulk fallback price collection
This commit is contained in:
parent
5a452e4714
commit
0bcf5bbf23
@ -147,88 +147,94 @@ class PriceCollector(BaseCollector):
|
|||||||
return total_records
|
return total_records
|
||||||
|
|
||||||
def _collect_pykrx(self) -> int:
|
def _collect_pykrx(self) -> int:
|
||||||
"""Collect stock prices via pykrx scraping (ticker-based loop)."""
|
"""Collect stock prices via pykrx scraping (date/market bulk fetch)."""
|
||||||
from pykrx import stock as pykrx_stock
|
from pykrx import stock as pykrx_stock
|
||||||
|
|
||||||
tickers = self.db.query(Stock.ticker).all()
|
tickers = self.db.query(Stock.ticker, Stock.market).all()
|
||||||
ticker_list = [t[0] for t in tickers]
|
ticker_market = {ticker: market for ticker, market in tickers}
|
||||||
|
|
||||||
if not ticker_list:
|
if not ticker_market:
|
||||||
logger.warning("No stocks found in database. Run StockCollector first.")
|
logger.warning("No stocks found in database. Run StockCollector first.")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
total_records = 0
|
total_records = 0
|
||||||
logger.info(f"Collecting prices for {len(ticker_list)} stocks from {self.start_date} to {self.end_date}")
|
logger.info(
|
||||||
|
"Collecting prices for %d stocks from %s to %s",
|
||||||
|
len(ticker_market), self.start_date, self.end_date,
|
||||||
|
)
|
||||||
|
|
||||||
for ticker in ticker_list:
|
start = datetime.strptime(self.start_date, "%Y%m%d")
|
||||||
try:
|
end = datetime.strptime(self.end_date, "%Y%m%d")
|
||||||
df = pykrx_stock.get_market_ohlcv(
|
current = start
|
||||||
self.start_date, self.end_date, ticker
|
|
||||||
)
|
|
||||||
if df.empty:
|
|
||||||
continue
|
|
||||||
|
|
||||||
df = df.reset_index()
|
def get_value(row, *names):
|
||||||
df.columns = ["date", "open", "high", "low", "close", "volume",
|
for name in names:
|
||||||
"value"]
|
if name in row:
|
||||||
|
return row[name]
|
||||||
|
return None
|
||||||
|
|
||||||
expected_cols = 7
|
while current <= end:
|
||||||
if len(df.columns) < expected_cols:
|
date_str = current.strftime("%Y%m%d")
|
||||||
logger.warning(f"Unexpected column count for {ticker}: {len(df.columns)}")
|
date_value = current.date()
|
||||||
continue
|
|
||||||
|
|
||||||
records = []
|
for market in ("KOSPI", "KOSDAQ"):
|
||||||
for _, row in df.iterrows():
|
try:
|
||||||
open_val = self._safe_float(row["open"])
|
df = pykrx_stock.get_market_ohlcv(date_str, market=market)
|
||||||
high_val = self._safe_float(row["high"])
|
if df is None or df.empty:
|
||||||
low_val = self._safe_float(row["low"])
|
|
||||||
close_val = self._safe_float(row["close"])
|
|
||||||
volume_val = self._safe_int(row["volume"])
|
|
||||||
|
|
||||||
if close_val is None:
|
|
||||||
logger.debug(f"Skipping record for {ticker}: missing close price")
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
date_value = row["date"].date() if hasattr(row["date"], "date") else row["date"]
|
records = []
|
||||||
records.append({
|
for ticker, row in df.iterrows():
|
||||||
"ticker": ticker,
|
ticker = str(ticker).zfill(6)
|
||||||
"date": date_value,
|
if ticker_market.get(ticker) != market:
|
||||||
"open": open_val,
|
continue
|
||||||
"high": high_val,
|
|
||||||
"low": low_val,
|
|
||||||
"close": close_val,
|
|
||||||
"volume": volume_val,
|
|
||||||
"trading_value": self._safe_int(row["value"]),
|
|
||||||
})
|
|
||||||
|
|
||||||
if records:
|
close_val = self._safe_float(get_value(row, "종가", "close"))
|
||||||
stmt = insert(Price).values(records)
|
if close_val is None or close_val == 0:
|
||||||
stmt = stmt.on_conflict_do_update(
|
continue
|
||||||
index_elements=["ticker", "date"],
|
|
||||||
set_={
|
records.append({
|
||||||
"open": stmt.excluded.open,
|
"ticker": ticker,
|
||||||
"high": stmt.excluded.high,
|
"date": date_value,
|
||||||
"low": stmt.excluded.low,
|
"open": self._safe_float(get_value(row, "시가", "open")),
|
||||||
"close": stmt.excluded.close,
|
"high": self._safe_float(get_value(row, "고가", "high")),
|
||||||
"volume": stmt.excluded.volume,
|
"low": self._safe_float(get_value(row, "저가", "low")),
|
||||||
"trading_value": stmt.excluded.trading_value,
|
"close": close_val,
|
||||||
},
|
"volume": self._safe_int(get_value(row, "거래량", "volume")),
|
||||||
|
"trading_value": self._safe_int(get_value(row, "거래대금", "value", "trading_value")),
|
||||||
|
})
|
||||||
|
|
||||||
|
if records:
|
||||||
|
stmt = insert(Price).values(records)
|
||||||
|
stmt = stmt.on_conflict_do_update(
|
||||||
|
index_elements=["ticker", "date"],
|
||||||
|
set_={
|
||||||
|
"open": stmt.excluded.open,
|
||||||
|
"high": stmt.excluded.high,
|
||||||
|
"low": stmt.excluded.low,
|
||||||
|
"close": stmt.excluded.close,
|
||||||
|
"volume": stmt.excluded.volume,
|
||||||
|
"trading_value": stmt.excluded.trading_value,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
self.db.execute(stmt)
|
||||||
|
self.db.commit()
|
||||||
|
total_records += len(records)
|
||||||
|
|
||||||
|
except JSONDecodeError as e:
|
||||||
|
self.db.rollback()
|
||||||
|
logger.warning(
|
||||||
|
"Price fetch for %s %s: JSON decode error (%s). "
|
||||||
|
"KRX may require login — set KRX_ID/KRX_PW env vars.",
|
||||||
|
market, date_str, e,
|
||||||
)
|
)
|
||||||
self.db.execute(stmt)
|
continue
|
||||||
self.db.commit()
|
except Exception as e:
|
||||||
total_records += len(records)
|
self.db.rollback()
|
||||||
|
logger.warning("Failed to fetch prices for %s %s: %s", market, date_str, e)
|
||||||
|
continue
|
||||||
|
|
||||||
except JSONDecodeError as e:
|
current += timedelta(days=1)
|
||||||
self.db.rollback()
|
|
||||||
logger.warning(
|
|
||||||
f"Price fetch for {ticker}: JSON decode error ({e}). "
|
|
||||||
"KRX may require login — set KRX_ID/KRX_PW env vars."
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
except Exception as e:
|
|
||||||
self.db.rollback()
|
|
||||||
logger.warning(f"Failed to fetch prices for {ticker}: {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
return total_records
|
return total_records
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user