fix: add validation and rate limiting to ValuationCollector
- Add time import and RATE_LIMIT_DELAY constant for rate limiting between HTTP requests - Add 1-second delay after OTP request to respect API rate limits - Validate OTP response is not empty before using it - Add CSV column structure validation with required columns check - Add data quality check to skip records where all metrics are None - Improve error handling and data integrity Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
3e723b6146
commit
8cc2d3fa41
@ -2,6 +2,7 @@
|
||||
Valuation data collector from KRX.
|
||||
"""
|
||||
import logging
|
||||
import time
|
||||
from io import BytesIO
|
||||
from datetime import datetime
|
||||
|
||||
@ -28,6 +29,7 @@ class ValuationCollector(BaseCollector):
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||
}
|
||||
REQUEST_TIMEOUT = 10
|
||||
RATE_LIMIT_DELAY = 1
|
||||
|
||||
def __init__(self, db: Session, biz_day: str = None):
|
||||
super().__init__(db)
|
||||
@ -71,9 +73,15 @@ class ValuationCollector(BaseCollector):
|
||||
)
|
||||
otp.raise_for_status()
|
||||
|
||||
otp_code = otp.text.strip()
|
||||
if not otp_code:
|
||||
raise RuntimeError("Received empty OTP from KRX API")
|
||||
|
||||
time.sleep(self.RATE_LIMIT_DELAY)
|
||||
|
||||
response = requests.post(
|
||||
self.DOWN_URL,
|
||||
data={"code": otp.text},
|
||||
data={"code": otp_code},
|
||||
headers=self.HEADERS,
|
||||
timeout=self.REQUEST_TIMEOUT
|
||||
)
|
||||
@ -82,7 +90,18 @@ class ValuationCollector(BaseCollector):
|
||||
raise RuntimeError(f"Failed to fetch valuation data: {e}")
|
||||
|
||||
df = pd.read_csv(BytesIO(response.content), encoding="EUC-KR")
|
||||
|
||||
if df.empty:
|
||||
logger.warning(f"Empty CSV response for {self.biz_day}")
|
||||
return 0
|
||||
|
||||
df.columns = df.columns.str.replace(" ", "")
|
||||
|
||||
required_cols = ["종목코드", "PER", "PBR", "배당수익률"]
|
||||
missing_cols = [col for col in required_cols if col not in df.columns]
|
||||
if missing_cols:
|
||||
raise ValueError(f"Required columns missing from CSV: {missing_cols}")
|
||||
|
||||
base_date = datetime.strptime(self.biz_day, "%Y%m%d").date()
|
||||
|
||||
logger.info(f"Processing {len(df)} valuation records for {self.biz_day}")
|
||||
@ -93,14 +112,22 @@ class ValuationCollector(BaseCollector):
|
||||
if not ticker or pd.isna(ticker):
|
||||
continue
|
||||
|
||||
per = self._safe_float(row.get("PER"))
|
||||
pbr = self._safe_float(row.get("PBR"))
|
||||
dividend_yield = self._safe_float(row.get("배당수익률"))
|
||||
|
||||
# Skip records where all metrics are None
|
||||
if all(v is None for v in [per, pbr, dividend_yield]):
|
||||
continue
|
||||
|
||||
records.append({
|
||||
"ticker": ticker,
|
||||
"base_date": base_date,
|
||||
"per": self._safe_float(row.get("PER")),
|
||||
"pbr": self._safe_float(row.get("PBR")),
|
||||
"per": per,
|
||||
"pbr": pbr,
|
||||
"psr": None, # Not available from this endpoint
|
||||
"pcr": None, # Not available from this endpoint
|
||||
"dividend_yield": self._safe_float(row.get("배당수익률")),
|
||||
"dividend_yield": dividend_yield,
|
||||
})
|
||||
|
||||
if records:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user