98 lines
2.9 KiB
Python
Raw Normal View History

"""
Base collector class for data collection jobs.
"""
2026-02-08 22:48:35 +09:00
import logging
import re
from abc import ABC, abstractmethod
from datetime import datetime
from typing import Optional
2026-02-08 22:48:35 +09:00
import requests
from bs4 import BeautifulSoup
from sqlalchemy.orm import Session
from app.models.stock import JobLog
2026-02-08 22:48:35 +09:00
logger = logging.getLogger(__name__)
class BaseCollector(ABC):
"""Base class for all data collectors."""
2026-02-08 22:48:35 +09:00
REQUEST_TIMEOUT = 10
def __init__(self, db: Session):
if db is None:
raise ValueError("Database session cannot be None")
self.db = db
self.job_name = self.__class__.__name__
self.job_log: Optional[JobLog] = None
2026-02-08 22:48:35 +09:00
def _get_latest_biz_day(self) -> str:
"""Get the latest business day from Naver Finance."""
url = "https://finance.naver.com/sise/sise_index.naver?code=KOSPI"
response = requests.get(url, timeout=self.REQUEST_TIMEOUT)
soup = BeautifulSoup(response.content, "lxml")
time_elem = soup.select_one("div.ly_realtime > span#time")
if time_elem:
date_str = re.sub(r"[^0-9]", "", time_elem.text)
return date_str[:8]
raise RuntimeError("Failed to detect latest business day from Naver Finance")
def start_job(self) -> JobLog:
"""Create a job log entry when starting."""
self.job_log = JobLog(
job_name=self.job_name,
status="running",
started_at=datetime.utcnow(),
)
self.db.add(self.job_log)
self.db.commit()
return self.job_log
def complete_job(self, records_count: int):
"""Mark job as completed."""
if self.job_log:
try:
self.job_log.status = "success"
self.job_log.finished_at = datetime.utcnow()
self.job_log.records_count = records_count
self.db.commit()
except Exception:
self.db.rollback()
raise
def fail_job(self, error_msg: str):
"""Mark job as failed."""
if self.job_log:
try:
self.job_log.status = "failed"
self.job_log.finished_at = datetime.utcnow()
self.job_log.error_msg = error_msg
self.db.commit()
except Exception:
self.db.rollback()
raise
@abstractmethod
def collect(self) -> int:
"""
Perform the data collection.
Returns the number of records collected.
"""
pass
def run(self) -> JobLog:
"""Execute the collection job with logging."""
self.start_job()
try:
records = self.collect()
self.complete_job(records)
except Exception as e:
try:
self.fail_job(str(e))
except Exception:
pass # Log update failed, but original exception is more important
raise
return self.job_log