zephyrdark 72c72994b2
All checks were successful
Deploy to Production / deploy (push) Successful in 1m8s
fix: collector error
2026-02-08 22:48:35 +09:00

98 lines
2.9 KiB
Python

"""
Base collector class for data collection jobs.
"""
import logging
import re
from abc import ABC, abstractmethod
from datetime import datetime
from typing import Optional
import requests
from bs4 import BeautifulSoup
from sqlalchemy.orm import Session
from app.models.stock import JobLog
logger = logging.getLogger(__name__)
class BaseCollector(ABC):
"""Base class for all data collectors."""
REQUEST_TIMEOUT = 10
def __init__(self, db: Session):
if db is None:
raise ValueError("Database session cannot be None")
self.db = db
self.job_name = self.__class__.__name__
self.job_log: Optional[JobLog] = None
def _get_latest_biz_day(self) -> str:
"""Get the latest business day from Naver Finance."""
url = "https://finance.naver.com/sise/sise_index.naver?code=KOSPI"
response = requests.get(url, timeout=self.REQUEST_TIMEOUT)
soup = BeautifulSoup(response.content, "lxml")
time_elem = soup.select_one("div.ly_realtime > span#time")
if time_elem:
date_str = re.sub(r"[^0-9]", "", time_elem.text)
return date_str[:8]
raise RuntimeError("Failed to detect latest business day from Naver Finance")
def start_job(self) -> JobLog:
"""Create a job log entry when starting."""
self.job_log = JobLog(
job_name=self.job_name,
status="running",
started_at=datetime.utcnow(),
)
self.db.add(self.job_log)
self.db.commit()
return self.job_log
def complete_job(self, records_count: int):
"""Mark job as completed."""
if self.job_log:
try:
self.job_log.status = "success"
self.job_log.finished_at = datetime.utcnow()
self.job_log.records_count = records_count
self.db.commit()
except Exception:
self.db.rollback()
raise
def fail_job(self, error_msg: str):
"""Mark job as failed."""
if self.job_log:
try:
self.job_log.status = "failed"
self.job_log.finished_at = datetime.utcnow()
self.job_log.error_msg = error_msg
self.db.commit()
except Exception:
self.db.rollback()
raise
@abstractmethod
def collect(self) -> int:
"""
Perform the data collection.
Returns the number of records collected.
"""
pass
def run(self) -> JobLog:
"""Execute the collection job with logging."""
self.start_job()
try:
records = self.collect()
self.complete_job(records)
except Exception as e:
try:
self.fail_job(str(e))
except Exception:
pass # Log update failed, but original exception is more important
raise
return self.job_log