Some checks failed
Deploy to Production / deploy (push) Failing after 6m46s
- KOSPIMarketStateDetector: KOSPI MA 기반 시장 상태 판단 (bull/neutral/bear/crash) - VolumeScreener: 거래대금 2000억+ 스크리닝 (상한가 우선, 희소성 체크, 대형주 예외) - SectorPortfolioManager: 섹터 기반 비중 배분 - KJBScreeningSignalGenerator: 눌림목 진입, 5MA 손절, 단계적 익절 - KISTradeExecutor: KIS API 자동 매수/매도 (기본값 모의투자) - ScreeningSignal / AutoOrder DB 모델 추가 - screening API 엔드포인트 추가 - 스케줄러 잡 3종 추가 (08:30/5분/15:35) - Price.trading_value 컬럼 추가 - MarketIndex 테이블 추가 (KOSPI/KOSDAQ 지수 일봉) - IndexCollector 추가 (일일 수집 잡 등록) - intraday_exit_check 시간 필터 추가 (09:05~15:20 KST) - 드라이런 스크립트 추가 (scripts/screening_dryrun.py)
176 lines
5.8 KiB
Python
176 lines
5.8 KiB
Python
"""
|
|
Data collection orchestration jobs.
|
|
"""
|
|
import logging
|
|
from datetime import date, timedelta
|
|
|
|
from sqlalchemy import func
|
|
|
|
from app.core.database import SessionLocal
|
|
from app.models.stock import Price, ETFPrice
|
|
from app.services.collectors import (
|
|
StockCollector,
|
|
SectorCollector,
|
|
PriceCollector,
|
|
ValuationCollector,
|
|
FinancialCollector,
|
|
ETFCollector,
|
|
ETFPriceCollector,
|
|
IndexCollector,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _get_daily_collectors():
|
|
"""
|
|
Return collectors in dependency order: master data first, then derived data.
|
|
|
|
Built at call time so that module-level names can be patched in tests.
|
|
"""
|
|
return [
|
|
("StockCollector", StockCollector, {}),
|
|
("SectorCollector", SectorCollector, {}),
|
|
("PriceCollector", PriceCollector, {}),
|
|
("ValuationCollector", ValuationCollector, {}),
|
|
("ETFCollector", ETFCollector, {}),
|
|
("ETFPriceCollector", ETFPriceCollector, {}),
|
|
("IndexCollector", IndexCollector, {}),
|
|
]
|
|
|
|
|
|
def run_daily_collection():
|
|
"""
|
|
Run all data collectors in dependency order.
|
|
|
|
Each collector gets its own DB session. If one fails, the rest continue.
|
|
Designed to be called by APScheduler at 18:00 Mon-Fri.
|
|
"""
|
|
logger.info("Starting daily data collection")
|
|
results = {}
|
|
|
|
for name, collector_cls, kwargs in _get_daily_collectors():
|
|
db = SessionLocal()
|
|
try:
|
|
collector = collector_cls(db, **kwargs)
|
|
collector.run()
|
|
results[name] = "success"
|
|
logger.info(f"{name} completed successfully")
|
|
except Exception as e:
|
|
results[name] = f"failed: {e}"
|
|
logger.error(f"{name} failed: {e}")
|
|
finally:
|
|
db.close()
|
|
|
|
logger.info(f"Daily collection finished: {results}")
|
|
return results
|
|
|
|
|
|
def run_financial_collection():
|
|
"""
|
|
Run financial statement collector.
|
|
|
|
Financial data updates quarterly, so this runs weekly (Monday)
|
|
rather than daily. Separated from daily collection to avoid
|
|
unnecessary FnGuide scraping on every business day.
|
|
"""
|
|
logger.info("Starting weekly financial statement collection")
|
|
db = SessionLocal()
|
|
try:
|
|
collector = FinancialCollector(db)
|
|
collector.run()
|
|
logger.info("FinancialCollector completed successfully")
|
|
return {"FinancialCollector": "success"}
|
|
except Exception as e:
|
|
logger.error(f"FinancialCollector failed: {e}")
|
|
return {"FinancialCollector": f"failed: {e}"}
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
def _generate_yearly_chunks(start_year: int, end_date: date) -> list[tuple[str, str]]:
|
|
"""Generate (start_date, end_date) pairs in YYYYMMDD format, one per year."""
|
|
chunks = []
|
|
current_start = date(start_year, 1, 1)
|
|
|
|
while current_start < end_date:
|
|
current_end = date(current_start.year, 12, 31)
|
|
if current_end > end_date:
|
|
current_end = end_date
|
|
chunks.append((
|
|
current_start.strftime("%Y%m%d"),
|
|
current_end.strftime("%Y%m%d"),
|
|
))
|
|
current_start = date(current_start.year + 1, 1, 1)
|
|
|
|
return chunks
|
|
|
|
|
|
def run_backfill(start_year: int = 2000):
|
|
"""
|
|
Collect historical price data from start_year to today.
|
|
|
|
Checks the earliest existing data in DB and only collects
|
|
missing periods. Splits into yearly chunks to avoid overloading pykrx.
|
|
"""
|
|
logger.info(f"Starting backfill from {start_year}")
|
|
today = date.today()
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
# Determine what needs backfilling
|
|
backfill_targets = [
|
|
("Price", PriceCollector, Price.date),
|
|
("ETFPrice", ETFPriceCollector, ETFPrice.date),
|
|
]
|
|
|
|
for name, collector_cls, date_col in backfill_targets:
|
|
# Find earliest existing data
|
|
earliest = db.query(func.min(date_col)).scalar()
|
|
|
|
if earliest is None:
|
|
# No data at all - collect everything
|
|
backfill_end = today
|
|
else:
|
|
# Data exists - collect from start_year to day before earliest
|
|
backfill_end = earliest - timedelta(days=1)
|
|
|
|
if date(start_year, 1, 1) >= backfill_end:
|
|
logger.info(f"{name}: no backfill needed (data exists from {earliest})")
|
|
continue
|
|
|
|
chunks = _generate_yearly_chunks(start_year, backfill_end)
|
|
logger.info(f"{name}: backfilling {len(chunks)} yearly chunks from {start_year} to {backfill_end}")
|
|
|
|
for start_dt, end_dt in chunks:
|
|
chunk_db = SessionLocal()
|
|
try:
|
|
collector = collector_cls(chunk_db, start_date=start_dt, end_date=end_dt)
|
|
collector.run()
|
|
logger.info(f"{name}: chunk {start_dt}-{end_dt} completed")
|
|
except Exception as e:
|
|
logger.error(f"{name}: chunk {start_dt}-{end_dt} failed: {e}")
|
|
finally:
|
|
chunk_db.close()
|
|
|
|
# Also fill gap between latest data and today (forward fill)
|
|
if earliest is not None:
|
|
latest = db.query(func.max(date_col)).scalar()
|
|
if latest and latest < today:
|
|
gap_start = (latest + timedelta(days=1)).strftime("%Y%m%d")
|
|
gap_end = today.strftime("%Y%m%d")
|
|
gap_db = SessionLocal()
|
|
try:
|
|
collector = collector_cls(gap_db, start_date=gap_start, end_date=gap_end)
|
|
collector.run()
|
|
logger.info(f"{name}: forward fill {gap_start}-{gap_end} completed")
|
|
except Exception as e:
|
|
logger.error(f"{name}: forward fill failed: {e}")
|
|
finally:
|
|
gap_db.close()
|
|
|
|
finally:
|
|
db.close()
|
|
|
|
logger.info("Backfill completed")
|