""" Base collector class for data collection jobs. """ import logging import re from abc import ABC, abstractmethod from datetime import datetime from typing import Optional import requests from bs4 import BeautifulSoup from sqlalchemy.orm import Session from app.models.stock import JobLog logger = logging.getLogger(__name__) class BaseCollector(ABC): """Base class for all data collectors.""" REQUEST_TIMEOUT = 10 def __init__(self, db: Session): if db is None: raise ValueError("Database session cannot be None") self.db = db self.job_name = self.__class__.__name__ self.job_log: Optional[JobLog] = None def _get_latest_biz_day(self) -> str: """Get the latest business day from Naver Finance.""" url = "https://finance.naver.com/sise/sise_index.naver?code=KOSPI" response = requests.get(url, timeout=self.REQUEST_TIMEOUT) soup = BeautifulSoup(response.content, "lxml") time_elem = soup.select_one("div.ly_realtime > span#time") if time_elem: date_str = re.sub(r"[^0-9]", "", time_elem.text) return date_str[:8] raise RuntimeError("Failed to detect latest business day from Naver Finance") def start_job(self) -> JobLog: """Create a job log entry when starting.""" self.job_log = JobLog( job_name=self.job_name, status="running", started_at=datetime.utcnow(), ) self.db.add(self.job_log) self.db.commit() return self.job_log def complete_job(self, records_count: int): """Mark job as completed.""" if self.job_log: try: self.job_log.status = "success" self.job_log.finished_at = datetime.utcnow() self.job_log.records_count = records_count self.db.commit() except Exception: self.db.rollback() raise def fail_job(self, error_msg: str): """Mark job as failed.""" if self.job_log: try: self.job_log.status = "failed" self.job_log.finished_at = datetime.utcnow() self.job_log.error_msg = error_msg self.db.commit() except Exception: self.db.rollback() raise @abstractmethod def collect(self) -> int: """ Perform the data collection. Returns the number of records collected. """ pass def run(self) -> JobLog: """Execute the collection job with logging.""" self.start_job() try: records = self.collect() self.complete_job(records) except Exception as e: try: self.fail_job(str(e)) except Exception: pass # Log update failed, but original exception is more important raise return self.job_log