All checks were successful
Deploy to Production / deploy (push) Successful in 1m8s
98 lines
2.9 KiB
Python
98 lines
2.9 KiB
Python
"""
|
|
Base collector class for data collection jobs.
|
|
"""
|
|
import logging
|
|
import re
|
|
from abc import ABC, abstractmethod
|
|
from datetime import datetime
|
|
from typing import Optional
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.models.stock import JobLog
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class BaseCollector(ABC):
|
|
"""Base class for all data collectors."""
|
|
|
|
REQUEST_TIMEOUT = 10
|
|
|
|
def __init__(self, db: Session):
|
|
if db is None:
|
|
raise ValueError("Database session cannot be None")
|
|
self.db = db
|
|
self.job_name = self.__class__.__name__
|
|
self.job_log: Optional[JobLog] = None
|
|
|
|
def _get_latest_biz_day(self) -> str:
|
|
"""Get the latest business day from Naver Finance."""
|
|
url = "https://finance.naver.com/sise/sise_index.naver?code=KOSPI"
|
|
response = requests.get(url, timeout=self.REQUEST_TIMEOUT)
|
|
soup = BeautifulSoup(response.content, "lxml")
|
|
time_elem = soup.select_one("div.ly_realtime > span#time")
|
|
if time_elem:
|
|
date_str = re.sub(r"[^0-9]", "", time_elem.text)
|
|
return date_str[:8]
|
|
raise RuntimeError("Failed to detect latest business day from Naver Finance")
|
|
|
|
def start_job(self) -> JobLog:
|
|
"""Create a job log entry when starting."""
|
|
self.job_log = JobLog(
|
|
job_name=self.job_name,
|
|
status="running",
|
|
started_at=datetime.utcnow(),
|
|
)
|
|
self.db.add(self.job_log)
|
|
self.db.commit()
|
|
return self.job_log
|
|
|
|
def complete_job(self, records_count: int):
|
|
"""Mark job as completed."""
|
|
if self.job_log:
|
|
try:
|
|
self.job_log.status = "success"
|
|
self.job_log.finished_at = datetime.utcnow()
|
|
self.job_log.records_count = records_count
|
|
self.db.commit()
|
|
except Exception:
|
|
self.db.rollback()
|
|
raise
|
|
|
|
def fail_job(self, error_msg: str):
|
|
"""Mark job as failed."""
|
|
if self.job_log:
|
|
try:
|
|
self.job_log.status = "failed"
|
|
self.job_log.finished_at = datetime.utcnow()
|
|
self.job_log.error_msg = error_msg
|
|
self.db.commit()
|
|
except Exception:
|
|
self.db.rollback()
|
|
raise
|
|
|
|
@abstractmethod
|
|
def collect(self) -> int:
|
|
"""
|
|
Perform the data collection.
|
|
Returns the number of records collected.
|
|
"""
|
|
pass
|
|
|
|
def run(self) -> JobLog:
|
|
"""Execute the collection job with logging."""
|
|
self.start_job()
|
|
try:
|
|
records = self.collect()
|
|
self.complete_job(records)
|
|
except Exception as e:
|
|
try:
|
|
self.fail_job(str(e))
|
|
except Exception:
|
|
pass # Log update failed, but original exception is more important
|
|
raise
|
|
return self.job_log
|