galaxis-po/backend/jobs/collection_job.py
머니페니 34d09d9d34
Some checks failed
Deploy to Production / deploy (push) Failing after 6m46s
feat: 김종봉식 KOSPI 종목발굴 전략 구현
- KOSPIMarketStateDetector: KOSPI MA 기반 시장 상태 판단 (bull/neutral/bear/crash)
- VolumeScreener: 거래대금 2000억+ 스크리닝 (상한가 우선, 희소성 체크, 대형주 예외)
- SectorPortfolioManager: 섹터 기반 비중 배분
- KJBScreeningSignalGenerator: 눌림목 진입, 5MA 손절, 단계적 익절
- KISTradeExecutor: KIS API 자동 매수/매도 (기본값 모의투자)
- ScreeningSignal / AutoOrder DB 모델 추가
- screening API 엔드포인트 추가
- 스케줄러 잡 3종 추가 (08:30/5분/15:35)
- Price.trading_value 컬럼 추가
- MarketIndex 테이블 추가 (KOSPI/KOSDAQ 지수 일봉)
- IndexCollector 추가 (일일 수집 잡 등록)
- intraday_exit_check 시간 필터 추가 (09:05~15:20 KST)
- 드라이런 스크립트 추가 (scripts/screening_dryrun.py)
2026-05-05 23:03:53 +09:00

176 lines
5.8 KiB
Python

"""
Data collection orchestration jobs.
"""
import logging
from datetime import date, timedelta
from sqlalchemy import func
from app.core.database import SessionLocal
from app.models.stock import Price, ETFPrice
from app.services.collectors import (
StockCollector,
SectorCollector,
PriceCollector,
ValuationCollector,
FinancialCollector,
ETFCollector,
ETFPriceCollector,
IndexCollector,
)
logger = logging.getLogger(__name__)
def _get_daily_collectors():
"""
Return collectors in dependency order: master data first, then derived data.
Built at call time so that module-level names can be patched in tests.
"""
return [
("StockCollector", StockCollector, {}),
("SectorCollector", SectorCollector, {}),
("PriceCollector", PriceCollector, {}),
("ValuationCollector", ValuationCollector, {}),
("ETFCollector", ETFCollector, {}),
("ETFPriceCollector", ETFPriceCollector, {}),
("IndexCollector", IndexCollector, {}),
]
def run_daily_collection():
"""
Run all data collectors in dependency order.
Each collector gets its own DB session. If one fails, the rest continue.
Designed to be called by APScheduler at 18:00 Mon-Fri.
"""
logger.info("Starting daily data collection")
results = {}
for name, collector_cls, kwargs in _get_daily_collectors():
db = SessionLocal()
try:
collector = collector_cls(db, **kwargs)
collector.run()
results[name] = "success"
logger.info(f"{name} completed successfully")
except Exception as e:
results[name] = f"failed: {e}"
logger.error(f"{name} failed: {e}")
finally:
db.close()
logger.info(f"Daily collection finished: {results}")
return results
def run_financial_collection():
"""
Run financial statement collector.
Financial data updates quarterly, so this runs weekly (Monday)
rather than daily. Separated from daily collection to avoid
unnecessary FnGuide scraping on every business day.
"""
logger.info("Starting weekly financial statement collection")
db = SessionLocal()
try:
collector = FinancialCollector(db)
collector.run()
logger.info("FinancialCollector completed successfully")
return {"FinancialCollector": "success"}
except Exception as e:
logger.error(f"FinancialCollector failed: {e}")
return {"FinancialCollector": f"failed: {e}"}
finally:
db.close()
def _generate_yearly_chunks(start_year: int, end_date: date) -> list[tuple[str, str]]:
"""Generate (start_date, end_date) pairs in YYYYMMDD format, one per year."""
chunks = []
current_start = date(start_year, 1, 1)
while current_start < end_date:
current_end = date(current_start.year, 12, 31)
if current_end > end_date:
current_end = end_date
chunks.append((
current_start.strftime("%Y%m%d"),
current_end.strftime("%Y%m%d"),
))
current_start = date(current_start.year + 1, 1, 1)
return chunks
def run_backfill(start_year: int = 2000):
"""
Collect historical price data from start_year to today.
Checks the earliest existing data in DB and only collects
missing periods. Splits into yearly chunks to avoid overloading pykrx.
"""
logger.info(f"Starting backfill from {start_year}")
today = date.today()
db = SessionLocal()
try:
# Determine what needs backfilling
backfill_targets = [
("Price", PriceCollector, Price.date),
("ETFPrice", ETFPriceCollector, ETFPrice.date),
]
for name, collector_cls, date_col in backfill_targets:
# Find earliest existing data
earliest = db.query(func.min(date_col)).scalar()
if earliest is None:
# No data at all - collect everything
backfill_end = today
else:
# Data exists - collect from start_year to day before earliest
backfill_end = earliest - timedelta(days=1)
if date(start_year, 1, 1) >= backfill_end:
logger.info(f"{name}: no backfill needed (data exists from {earliest})")
continue
chunks = _generate_yearly_chunks(start_year, backfill_end)
logger.info(f"{name}: backfilling {len(chunks)} yearly chunks from {start_year} to {backfill_end}")
for start_dt, end_dt in chunks:
chunk_db = SessionLocal()
try:
collector = collector_cls(chunk_db, start_date=start_dt, end_date=end_dt)
collector.run()
logger.info(f"{name}: chunk {start_dt}-{end_dt} completed")
except Exception as e:
logger.error(f"{name}: chunk {start_dt}-{end_dt} failed: {e}")
finally:
chunk_db.close()
# Also fill gap between latest data and today (forward fill)
if earliest is not None:
latest = db.query(func.max(date_col)).scalar()
if latest and latest < today:
gap_start = (latest + timedelta(days=1)).strftime("%Y%m%d")
gap_end = today.strftime("%Y%m%d")
gap_db = SessionLocal()
try:
collector = collector_cls(gap_db, start_date=gap_start, end_date=gap_end)
collector.run()
logger.info(f"{name}: forward fill {gap_start}-{gap_end} completed")
except Exception as e:
logger.error(f"{name}: forward fill failed: {e}")
finally:
gap_db.close()
finally:
db.close()
logger.info("Backfill completed")