fix: add validation and rate limiting to ValuationCollector
- Add time import and RATE_LIMIT_DELAY constant for rate limiting between HTTP requests - Add 1-second delay after OTP request to respect API rate limits - Validate OTP response is not empty before using it - Add CSV column structure validation with required columns check - Add data quality check to skip records where all metrics are None - Improve error handling and data integrity Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
3e723b6146
commit
8cc2d3fa41
@ -2,6 +2,7 @@
|
|||||||
Valuation data collector from KRX.
|
Valuation data collector from KRX.
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
@ -28,6 +29,7 @@ class ValuationCollector(BaseCollector):
|
|||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||||
}
|
}
|
||||||
REQUEST_TIMEOUT = 10
|
REQUEST_TIMEOUT = 10
|
||||||
|
RATE_LIMIT_DELAY = 1
|
||||||
|
|
||||||
def __init__(self, db: Session, biz_day: str = None):
|
def __init__(self, db: Session, biz_day: str = None):
|
||||||
super().__init__(db)
|
super().__init__(db)
|
||||||
@ -71,9 +73,15 @@ class ValuationCollector(BaseCollector):
|
|||||||
)
|
)
|
||||||
otp.raise_for_status()
|
otp.raise_for_status()
|
||||||
|
|
||||||
|
otp_code = otp.text.strip()
|
||||||
|
if not otp_code:
|
||||||
|
raise RuntimeError("Received empty OTP from KRX API")
|
||||||
|
|
||||||
|
time.sleep(self.RATE_LIMIT_DELAY)
|
||||||
|
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
self.DOWN_URL,
|
self.DOWN_URL,
|
||||||
data={"code": otp.text},
|
data={"code": otp_code},
|
||||||
headers=self.HEADERS,
|
headers=self.HEADERS,
|
||||||
timeout=self.REQUEST_TIMEOUT
|
timeout=self.REQUEST_TIMEOUT
|
||||||
)
|
)
|
||||||
@ -82,7 +90,18 @@ class ValuationCollector(BaseCollector):
|
|||||||
raise RuntimeError(f"Failed to fetch valuation data: {e}")
|
raise RuntimeError(f"Failed to fetch valuation data: {e}")
|
||||||
|
|
||||||
df = pd.read_csv(BytesIO(response.content), encoding="EUC-KR")
|
df = pd.read_csv(BytesIO(response.content), encoding="EUC-KR")
|
||||||
|
|
||||||
|
if df.empty:
|
||||||
|
logger.warning(f"Empty CSV response for {self.biz_day}")
|
||||||
|
return 0
|
||||||
|
|
||||||
df.columns = df.columns.str.replace(" ", "")
|
df.columns = df.columns.str.replace(" ", "")
|
||||||
|
|
||||||
|
required_cols = ["종목코드", "PER", "PBR", "배당수익률"]
|
||||||
|
missing_cols = [col for col in required_cols if col not in df.columns]
|
||||||
|
if missing_cols:
|
||||||
|
raise ValueError(f"Required columns missing from CSV: {missing_cols}")
|
||||||
|
|
||||||
base_date = datetime.strptime(self.biz_day, "%Y%m%d").date()
|
base_date = datetime.strptime(self.biz_day, "%Y%m%d").date()
|
||||||
|
|
||||||
logger.info(f"Processing {len(df)} valuation records for {self.biz_day}")
|
logger.info(f"Processing {len(df)} valuation records for {self.biz_day}")
|
||||||
@ -93,14 +112,22 @@ class ValuationCollector(BaseCollector):
|
|||||||
if not ticker or pd.isna(ticker):
|
if not ticker or pd.isna(ticker):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
per = self._safe_float(row.get("PER"))
|
||||||
|
pbr = self._safe_float(row.get("PBR"))
|
||||||
|
dividend_yield = self._safe_float(row.get("배당수익률"))
|
||||||
|
|
||||||
|
# Skip records where all metrics are None
|
||||||
|
if all(v is None for v in [per, pbr, dividend_yield]):
|
||||||
|
continue
|
||||||
|
|
||||||
records.append({
|
records.append({
|
||||||
"ticker": ticker,
|
"ticker": ticker,
|
||||||
"base_date": base_date,
|
"base_date": base_date,
|
||||||
"per": self._safe_float(row.get("PER")),
|
"per": per,
|
||||||
"pbr": self._safe_float(row.get("PBR")),
|
"pbr": pbr,
|
||||||
"psr": None, # Not available from this endpoint
|
"psr": None, # Not available from this endpoint
|
||||||
"pcr": None, # Not available from this endpoint
|
"pcr": None, # Not available from this endpoint
|
||||||
"dividend_yield": self._safe_float(row.get("배당수익률")),
|
"dividend_yield": dividend_yield,
|
||||||
})
|
})
|
||||||
|
|
||||||
if records:
|
if records:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user