All checks were successful
Deploy to Production / deploy (push) Successful in 1m8s
Port make-quant-py's FnGuide scraping logic into galaxy-po's BaseCollector pattern. Collects annual and quarterly financial statements (revenue, net income, total assets, etc.) and maps Korean account names to English keys for FactorCalculator. Scheduled weekly on Monday 19:00 KST since data updates quarterly. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
274 lines
10 KiB
Python
274 lines
10 KiB
Python
"""
|
|
Unit tests for FinancialCollector.
|
|
|
|
These tests mock HTTP responses to avoid hitting FnGuide in CI.
|
|
"""
|
|
from datetime import date
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
import pandas as pd
|
|
import pytest
|
|
from sqlalchemy import create_engine
|
|
from sqlalchemy.orm import sessionmaker
|
|
from sqlalchemy.pool import StaticPool
|
|
|
|
from app.core.database import Base
|
|
from app.models.stock import Financial, Stock, StockType, ReportType
|
|
from app.services.collectors.financial_collector import FinancialCollector
|
|
|
|
|
|
@pytest.fixture
|
|
def db():
|
|
"""In-memory SQLite database for testing."""
|
|
engine = create_engine(
|
|
"sqlite:///:memory:",
|
|
connect_args={"check_same_thread": False},
|
|
poolclass=StaticPool,
|
|
)
|
|
Base.metadata.create_all(bind=engine)
|
|
Session = sessionmaker(bind=engine)
|
|
session = Session()
|
|
yield session
|
|
session.close()
|
|
Base.metadata.drop_all(bind=engine)
|
|
|
|
|
|
@pytest.fixture
|
|
def db_with_stocks(db):
|
|
"""Database with sample stock records."""
|
|
stocks = [
|
|
Stock(
|
|
ticker="005930",
|
|
name="삼성전자",
|
|
market="KOSPI",
|
|
close_price=70000,
|
|
market_cap=418000000000000,
|
|
stock_type=StockType.COMMON.value,
|
|
base_date=date(2025, 3, 28),
|
|
),
|
|
Stock(
|
|
ticker="000660",
|
|
name="SK하이닉스",
|
|
market="KOSPI",
|
|
close_price=120000,
|
|
market_cap=87000000000000,
|
|
stock_type=StockType.COMMON.value,
|
|
base_date=date(2025, 3, 28),
|
|
),
|
|
]
|
|
for s in stocks:
|
|
db.add(s)
|
|
db.commit()
|
|
return db
|
|
|
|
|
|
# Sample FnGuide HTML tables (6 tables: annual/quarterly x income/balance/cashflow)
|
|
def _make_sample_tables():
|
|
"""Build 6 DataFrames mimicking pd.read_html output from FnGuide."""
|
|
# Annual income statement (index 0)
|
|
annual_income = pd.DataFrame({
|
|
"IFRS(연결)": ["매출액", "매출총이익", "영업이익", "당기순이익"],
|
|
"2022/12": [302231400, 108747000, 43376600, 55654200],
|
|
"2023/12": [258935500, 73024400, 6566500, 15487100],
|
|
"2024/12": [300870000, 100000000, 32726500, 34681300],
|
|
"전년동기(%)": [None, None, None, None],
|
|
})
|
|
# Quarterly income statement (index 1)
|
|
quarterly_income = pd.DataFrame({
|
|
"IFRS(연결)": ["매출액", "매출총이익", "영업이익", "당기순이익"],
|
|
"2024/03": [71922800, 22735000, 6609800, 6745200],
|
|
"2024/06": [74069300, 25558200, 10443900, 9837900],
|
|
})
|
|
# Annual balance sheet (index 2)
|
|
annual_balance = pd.DataFrame({
|
|
"IFRS(연결)": ["자산총계", "부채총계", "자본총계", "유동자산", "유동부채"],
|
|
"2022/12": [448424400, 101153300, 347271100, 218439000, 67766200],
|
|
"2023/12": [455905400, 107064700, 348840700, 213137900, 73291500],
|
|
"2024/12": [480000000, 110000000, 370000000, 220000000, 75000000],
|
|
})
|
|
# Quarterly balance sheet (index 3)
|
|
quarterly_balance = pd.DataFrame({
|
|
"IFRS(연결)": ["자산총계", "부채총계", "자본총계", "유동자산", "유동부채"],
|
|
"2024/03": [460000000, 108000000, 352000000, 215000000, 74000000],
|
|
"2024/06": [465000000, 109000000, 356000000, 218000000, 74500000],
|
|
})
|
|
# Annual cash flow (index 4)
|
|
annual_cashflow = pd.DataFrame({
|
|
"IFRS(연결)": ["영업활동으로인한현금흐름"],
|
|
"2022/12": [49050400],
|
|
"2023/12": [67442000],
|
|
"2024/12": [50000000],
|
|
})
|
|
# Quarterly cash flow (index 5)
|
|
quarterly_cashflow = pd.DataFrame({
|
|
"IFRS(연결)": ["영업활동으로인한현금흐름"],
|
|
"2024/03": [12000000],
|
|
"2024/06": [15000000],
|
|
})
|
|
return [
|
|
annual_income, quarterly_income,
|
|
annual_balance, quarterly_balance,
|
|
annual_cashflow, quarterly_cashflow,
|
|
]
|
|
|
|
|
|
def _make_fiscal_html():
|
|
"""Build HTML snippet with fiscal year end month (12월)."""
|
|
return """
|
|
<html><body>
|
|
<div class="corp_group1">
|
|
<h2>삼성전자</h2>
|
|
<h2>12월 결산</h2>
|
|
</div>
|
|
</body></html>
|
|
"""
|
|
|
|
|
|
class TestAccountMapping:
|
|
"""Test that Korean account names map correctly to English."""
|
|
|
|
def test_known_accounts_are_mapped(self):
|
|
assert FinancialCollector.ACCOUNT_MAP["매출액"] == "revenue"
|
|
assert FinancialCollector.ACCOUNT_MAP["당기순이익"] == "net_income"
|
|
assert FinancialCollector.ACCOUNT_MAP["자산총계"] == "total_assets"
|
|
assert FinancialCollector.ACCOUNT_MAP["자본총계"] == "total_equity"
|
|
assert FinancialCollector.ACCOUNT_MAP["영업활동으로인한현금흐름"] == "operating_cash_flow"
|
|
|
|
def test_all_factor_calculator_accounts_covered(self):
|
|
"""FactorCalculator expects these account keys."""
|
|
required = {
|
|
"revenue", "gross_profit", "operating_income", "net_income",
|
|
"total_assets", "total_liabilities", "total_equity",
|
|
"current_assets", "current_liabilities", "operating_cash_flow",
|
|
}
|
|
mapped_values = set(FinancialCollector.ACCOUNT_MAP.values())
|
|
assert required.issubset(mapped_values)
|
|
|
|
|
|
class TestCleanFinancialData:
|
|
"""Test the data cleaning logic."""
|
|
|
|
def test_clean_removes_nan_rows(self, db):
|
|
collector = FinancialCollector(db)
|
|
df = pd.DataFrame({
|
|
"계정": ["매출액", "빈행"],
|
|
"2024/12": [100000, None],
|
|
})
|
|
result = collector._clean_financial_data(df, "005930", "annual")
|
|
# Only 매출액 should remain (빈행 has NaN value and is not in ACCOUNT_MAP)
|
|
assert len(result) == 1
|
|
assert result[0]["account"] == "revenue"
|
|
|
|
def test_clean_maps_account_names(self, db):
|
|
collector = FinancialCollector(db)
|
|
df = pd.DataFrame({
|
|
"계정": ["매출액", "자산총계"],
|
|
"2024/12": [100000, 500000],
|
|
})
|
|
result = collector._clean_financial_data(df, "005930", "annual")
|
|
accounts = {r["account"] for r in result}
|
|
assert "revenue" in accounts
|
|
assert "total_assets" in accounts
|
|
|
|
def test_clean_skips_unmapped_accounts(self, db):
|
|
collector = FinancialCollector(db)
|
|
df = pd.DataFrame({
|
|
"계정": ["매출액", "알수없는계정"],
|
|
"2024/12": [100000, 999],
|
|
})
|
|
result = collector._clean_financial_data(df, "005930", "annual")
|
|
accounts = {r["account"] for r in result}
|
|
assert "revenue" in accounts
|
|
assert "알수없는계정" not in accounts
|
|
|
|
def test_clean_strips_fnguide_suffix(self, db):
|
|
collector = FinancialCollector(db)
|
|
df = pd.DataFrame({
|
|
"계정": ["매출액계산에 참여한 계정 펼치기"],
|
|
"2024/12": [100000],
|
|
})
|
|
result = collector._clean_financial_data(df, "005930", "annual")
|
|
assert len(result) == 1
|
|
assert result[0]["account"] == "revenue"
|
|
|
|
|
|
class TestCollect:
|
|
"""Test full collect flow with mocked HTTP."""
|
|
|
|
@patch("app.services.collectors.financial_collector.time.sleep")
|
|
@patch("app.services.collectors.financial_collector.requests.get")
|
|
@patch("app.services.collectors.financial_collector.pd.read_html")
|
|
def test_collect_saves_records(self, mock_read_html, mock_get, mock_sleep, db_with_stocks):
|
|
mock_read_html.return_value = _make_sample_tables()
|
|
mock_response = MagicMock()
|
|
mock_response.text = _make_fiscal_html()
|
|
mock_response.content = _make_fiscal_html().encode()
|
|
mock_get.return_value = mock_response
|
|
|
|
collector = FinancialCollector(db_with_stocks)
|
|
count = collector.collect()
|
|
|
|
assert count > 0
|
|
records = db_with_stocks.query(Financial).all()
|
|
assert len(records) > 0
|
|
|
|
@patch("app.services.collectors.financial_collector.time.sleep")
|
|
@patch("app.services.collectors.financial_collector.requests.get")
|
|
@patch("app.services.collectors.financial_collector.pd.read_html")
|
|
def test_collect_stores_correct_report_types(self, mock_read_html, mock_get, mock_sleep, db_with_stocks):
|
|
mock_read_html.return_value = _make_sample_tables()
|
|
mock_response = MagicMock()
|
|
mock_response.text = _make_fiscal_html()
|
|
mock_response.content = _make_fiscal_html().encode()
|
|
mock_get.return_value = mock_response
|
|
|
|
collector = FinancialCollector(db_with_stocks)
|
|
collector.collect()
|
|
|
|
report_types = {r.report_type for r in db_with_stocks.query(Financial).all()}
|
|
assert ReportType.ANNUAL in report_types or "annual" in report_types
|
|
|
|
@patch("app.services.collectors.financial_collector.time.sleep")
|
|
@patch("app.services.collectors.financial_collector.requests.get")
|
|
@patch("app.services.collectors.financial_collector.pd.read_html")
|
|
def test_collect_continues_on_ticker_error(self, mock_read_html, mock_get, mock_sleep, db_with_stocks):
|
|
"""If one ticker fails, the collector should continue to the next."""
|
|
call_count = 0
|
|
|
|
def side_effect(*args, **kwargs):
|
|
nonlocal call_count
|
|
call_count += 1
|
|
if call_count <= 1:
|
|
raise ValueError("Simulated error")
|
|
return _make_sample_tables()
|
|
|
|
mock_read_html.side_effect = side_effect
|
|
mock_response = MagicMock()
|
|
mock_response.text = _make_fiscal_html()
|
|
mock_response.content = _make_fiscal_html().encode()
|
|
mock_get.return_value = mock_response
|
|
|
|
collector = FinancialCollector(db_with_stocks)
|
|
count = collector.collect()
|
|
|
|
# Should still have records from the second ticker
|
|
assert count > 0
|
|
|
|
@patch("app.services.collectors.financial_collector.time.sleep")
|
|
@patch("app.services.collectors.financial_collector.requests.get")
|
|
@patch("app.services.collectors.financial_collector.pd.read_html")
|
|
def test_collect_upserts_on_duplicate(self, mock_read_html, mock_get, mock_sleep, db_with_stocks):
|
|
"""Running collect twice should update, not duplicate."""
|
|
mock_read_html.return_value = _make_sample_tables()
|
|
mock_response = MagicMock()
|
|
mock_response.text = _make_fiscal_html()
|
|
mock_response.content = _make_fiscal_html().encode()
|
|
mock_get.return_value = mock_response
|
|
|
|
collector = FinancialCollector(db_with_stocks)
|
|
count1 = collector.collect()
|
|
count2 = collector.collect()
|
|
|
|
# Both runs should succeed with same count
|
|
assert count1 == count2
|