fix: collector error
All checks were successful
Deploy to Production / deploy (push) Successful in 1m8s
All checks were successful
Deploy to Production / deploy (push) Successful in 1m8s
This commit is contained in:
parent
9f756331c4
commit
72c72994b2
@ -1,18 +1,26 @@
|
||||
"""
|
||||
Base collector class for data collection jobs.
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models.stock import JobLog
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseCollector(ABC):
|
||||
"""Base class for all data collectors."""
|
||||
|
||||
REQUEST_TIMEOUT = 10
|
||||
|
||||
def __init__(self, db: Session):
|
||||
if db is None:
|
||||
raise ValueError("Database session cannot be None")
|
||||
@ -20,6 +28,17 @@ class BaseCollector(ABC):
|
||||
self.job_name = self.__class__.__name__
|
||||
self.job_log: Optional[JobLog] = None
|
||||
|
||||
def _get_latest_biz_day(self) -> str:
|
||||
"""Get the latest business day from Naver Finance."""
|
||||
url = "https://finance.naver.com/sise/sise_index.naver?code=KOSPI"
|
||||
response = requests.get(url, timeout=self.REQUEST_TIMEOUT)
|
||||
soup = BeautifulSoup(response.content, "lxml")
|
||||
time_elem = soup.select_one("div.ly_realtime > span#time")
|
||||
if time_elem:
|
||||
date_str = re.sub(r"[^0-9]", "", time_elem.text)
|
||||
return date_str[:8]
|
||||
raise RuntimeError("Failed to detect latest business day from Naver Finance")
|
||||
|
||||
def start_job(self) -> JobLog:
|
||||
"""Create a job log entry when starting."""
|
||||
self.job_log = JobLog(
|
||||
|
||||
@ -24,7 +24,7 @@ class SectorCollector(BaseCollector):
|
||||
|
||||
def __init__(self, db: Session, biz_day: str = None):
|
||||
super().__init__(db)
|
||||
self.biz_day = biz_day or datetime.now().strftime("%Y%m%d")
|
||||
self.biz_day = biz_day or self._get_latest_biz_day()
|
||||
self._validate_biz_day()
|
||||
|
||||
def _validate_biz_day(self) -> None:
|
||||
@ -43,7 +43,7 @@ class SectorCollector(BaseCollector):
|
||||
try:
|
||||
response = requests.get(url, timeout=10)
|
||||
data = response.json()
|
||||
if "list" in data:
|
||||
if "list" in data and data["list"]:
|
||||
df = pd.json_normalize(data["list"])
|
||||
all_data.append(df)
|
||||
except (requests.RequestException, ValueError, KeyError) as e:
|
||||
|
||||
@ -24,8 +24,8 @@ class StockCollector(BaseCollector):
|
||||
GEN_OTP_URL = "http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd"
|
||||
DOWN_URL = "http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd"
|
||||
HEADERS = {
|
||||
"Referer": "http://data.krx.co.kr/contents/MDC/MDI/mdiLoader",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||
"Referer": "http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
||||
}
|
||||
REQUEST_TIMEOUT = 10
|
||||
RATE_LIMIT_DELAY = 1
|
||||
@ -42,20 +42,6 @@ class StockCollector(BaseCollector):
|
||||
except ValueError:
|
||||
raise ValueError(f"Invalid biz_day format. Expected YYYYMMDD, got: {self.biz_day}")
|
||||
|
||||
def _get_latest_biz_day(self) -> str:
|
||||
"""Get the latest business day from Naver Finance."""
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
|
||||
url = "https://finance.naver.com/sise/sise_index.naver?code=KOSPI"
|
||||
response = requests.get(url, timeout=self.REQUEST_TIMEOUT)
|
||||
soup = BeautifulSoup(response.content, "lxml")
|
||||
time_elem = soup.select_one("div.ly_realtime > span#time")
|
||||
if time_elem:
|
||||
date_str = re.sub(r"[^0-9]", "", time_elem.text)
|
||||
return date_str[:8]
|
||||
return datetime.now().strftime("%Y%m%d")
|
||||
|
||||
def _fetch_stock_data(self, mkt_id: str) -> pd.DataFrame:
|
||||
"""Fetch stock data for a specific market."""
|
||||
gen_otp_data = {
|
||||
|
||||
@ -25,15 +25,15 @@ class ValuationCollector(BaseCollector):
|
||||
GEN_OTP_URL = "http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd"
|
||||
DOWN_URL = "http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd"
|
||||
HEADERS = {
|
||||
"Referer": "http://data.krx.co.kr/contents/MDC/MDI/mdiLoader",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||
"Referer": "http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
||||
}
|
||||
REQUEST_TIMEOUT = 10
|
||||
RATE_LIMIT_DELAY = 1
|
||||
|
||||
def __init__(self, db: Session, biz_day: str = None):
|
||||
super().__init__(db)
|
||||
self.biz_day = biz_day or datetime.now().strftime("%Y%m%d")
|
||||
self.biz_day = biz_day or self._get_latest_biz_day()
|
||||
self._validate_biz_day()
|
||||
|
||||
def _validate_biz_day(self) -> None:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user