머니페니 120a8546cb
Some checks failed
Deploy to Production / deploy (push) Failing after 2m38s
fix: 테스트 리포트 보완 계획 전체 구현
2026-05-10 16:44:05 +09:00

125 lines
4.0 KiB
Python

"""
Base collector class for data collection jobs.
"""
import logging
import re
from abc import ABC, abstractmethod
from contextlib import contextmanager
from datetime import datetime, timezone
from typing import Optional, Generator
import requests
from bs4 import BeautifulSoup
from sqlalchemy.orm import Session
from app.models.stock import JobLog
logger = logging.getLogger(__name__)
HEARTBEAT_STALE_MINUTES = 30
class BaseCollector(ABC):
"""Base class for all data collectors."""
REQUEST_TIMEOUT = 10
def __init__(self, db: Session):
if db is None:
raise ValueError("Database session cannot be None")
self.db = db
self.job_name = self.__class__.__name__
self.job_log: Optional[JobLog] = None
def _get_latest_biz_day(self) -> str:
"""Get the latest business day from Naver Finance."""
url = "https://finance.naver.com/sise/sise_index.naver?code=KOSPI"
response = requests.get(url, timeout=self.REQUEST_TIMEOUT)
soup = BeautifulSoup(response.content, "lxml")
time_elem = soup.select_one("div.ly_realtime > span#time")
if time_elem:
date_str = re.sub(r"[^0-9]", "", time_elem.text)
return date_str[:8]
raise RuntimeError("Failed to detect latest business day from Naver Finance")
def start_job(self) -> JobLog:
"""Create a job log entry when starting."""
self.job_log = JobLog(
job_name=self.job_name,
status="running",
started_at=datetime.now(timezone.utc),
last_heartbeat=datetime.now(timezone.utc),
)
self.db.add(self.job_log)
self.db.commit()
return self.job_log
def heartbeat(self) -> None:
"""Update last_heartbeat so watchdog knows the job is still alive."""
if self.job_log:
try:
self.job_log.last_heartbeat = datetime.now(timezone.utc)
self.db.commit()
except Exception:
self.db.rollback()
def complete_job(self, records_count: int):
"""Mark job as completed."""
if self.job_log:
try:
self.job_log.status = "success"
self.job_log.finished_at = datetime.now(timezone.utc)
self.job_log.records_count = records_count
self.db.commit()
except Exception:
self.db.rollback()
raise
def fail_job(self, error_msg: str):
"""Mark job as failed."""
if self.job_log:
try:
self.job_log.status = "failed"
self.job_log.finished_at = datetime.now(timezone.utc)
self.job_log.error_msg = error_msg
self.db.commit()
except Exception:
self.db.rollback()
raise
def complete_if_running(self) -> None:
"""If the job is still 'running' in the DB, mark it failed_orphaned.
Called in finally blocks to handle unexpected termination paths."""
if self.job_log and self.job_log.status == "running":
try:
self.job_log.status = "failed_orphaned"
self.job_log.finished_at = datetime.now(timezone.utc)
self.job_log.error_msg = "Job exited without explicit success/fail"
self.db.commit()
except Exception:
self.db.rollback()
@abstractmethod
def collect(self) -> int:
"""
Perform the data collection.
Returns the number of records collected.
"""
pass
def run(self) -> JobLog:
"""Execute the collection job with logging."""
self.start_job()
try:
records = self.collect()
self.complete_job(records)
except Exception as e:
try:
self.fail_job(str(e))
except Exception:
pass # Log update failed, but original exception is more important
raise
finally:
self.complete_if_running()
return self.job_log