galaxis-po/backend/tests/unit/test_financial_collector.py

"""
Unit tests for FinancialCollector.

These tests mock HTTP responses to avoid hitting FnGuide in CI.
"""
from datetime import date
from unittest.mock import patch, MagicMock

import pandas as pd
import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import StaticPool

from app.core.database import Base
from app.models.stock import Financial, Stock, StockType, ReportType
from app.services.collectors.financial_collector import FinancialCollector


@pytest.fixture
def db():
    """In-memory SQLite database for testing."""
    engine = create_engine(
        "sqlite:///:memory:",
        connect_args={"check_same_thread": False},
        poolclass=StaticPool,
    )
    Base.metadata.create_all(bind=engine)
    Session = sessionmaker(bind=engine)
    session = Session()
    yield session
    session.close()
    Base.metadata.drop_all(bind=engine)


@pytest.fixture
def db_with_stocks(db):
    """Database with sample stock records."""
    stocks = [
        Stock(
            ticker="005930",
            name="삼성전자",
            market="KOSPI",
            close_price=70000,
            market_cap=418000000000000,
            stock_type=StockType.COMMON.value,
            base_date=date(2025, 3, 28),
        ),
        Stock(
            ticker="000660",
            name="SK하이닉스",
            market="KOSPI",
            close_price=120000,
            market_cap=87000000000000,
            stock_type=StockType.COMMON.value,
            base_date=date(2025, 3, 28),
        ),
    ]
    for s in stocks:
        db.add(s)
    db.commit()
    return db


# Sample FnGuide HTML tables (6 tables: annual/quarterly x income/balance/cashflow)
def _make_sample_tables():
    """Build 6 DataFrames mimicking pd.read_html output from FnGuide."""
    # Annual income statement (index 0)
    annual_income = pd.DataFrame({
        "IFRS(연결)": ["매출액", "매출총이익", "영업이익", "당기순이익"],
        "2022/12": [302231400, 108747000, 43376600, 55654200],
        "2023/12": [258935500, 73024400, 6566500, 15487100],
        "2024/12": [300870000, 100000000, 32726500, 34681300],
        "전년동기(%)": [None, None, None, None],
    })
    # Quarterly income statement (index 1)
    quarterly_income = pd.DataFrame({
        "IFRS(연결)": ["매출액", "매출총이익", "영업이익", "당기순이익"],
        "2024/03": [71922800, 22735000, 6609800, 6745200],
        "2024/06": [74069300, 25558200, 10443900, 9837900],
    })
    # Annual balance sheet (index 2)
    annual_balance = pd.DataFrame({
        "IFRS(연결)": ["자산총계", "부채총계", "자본총계", "유동자산", "유동부채"],
        "2022/12": [448424400, 101153300, 347271100, 218439000, 67766200],
        "2023/12": [455905400, 107064700, 348840700, 213137900, 73291500],
        "2024/12": [480000000, 110000000, 370000000, 220000000, 75000000],
    })
    # Quarterly balance sheet (index 3)
    quarterly_balance = pd.DataFrame({
        "IFRS(연결)": ["자산총계", "부채총계", "자본총계", "유동자산", "유동부채"],
        "2024/03": [460000000, 108000000, 352000000, 215000000, 74000000],
        "2024/06": [465000000, 109000000, 356000000, 218000000, 74500000],
    })
    # Annual cash flow (index 4)
    annual_cashflow = pd.DataFrame({
        "IFRS(연결)": ["영업활동으로인한현금흐름"],
        "2022/12": [49050400],
        "2023/12": [67442000],
        "2024/12": [50000000],
    })
    # Quarterly cash flow (index 5)
    quarterly_cashflow = pd.DataFrame({
        "IFRS(연결)": ["영업활동으로인한현금흐름"],
        "2024/03": [12000000],
        "2024/06": [15000000],
    })
    return [
        annual_income, quarterly_income,
        annual_balance, quarterly_balance,
        annual_cashflow, quarterly_cashflow,
    ]


def _make_fiscal_html():
    """Build HTML snippet with fiscal year end month (12월)."""
    return """
    <html><body>
    <div class="corp_group1">
        <h2>삼성전자</h2>
        <h2>12월 결산</h2>
    </div>
    </body></html>
    """


class TestAccountMapping:
    """Test that Korean account names map correctly to English."""

    def test_known_accounts_are_mapped(self):
        assert FinancialCollector.ACCOUNT_MAP["매출액"] == "revenue"
        assert FinancialCollector.ACCOUNT_MAP["당기순이익"] == "net_income"
        assert FinancialCollector.ACCOUNT_MAP["자산총계"] == "total_assets"
        assert FinancialCollector.ACCOUNT_MAP["자본총계"] == "total_equity"
        assert FinancialCollector.ACCOUNT_MAP["영업활동으로인한현금흐름"] == "operating_cash_flow"

    def test_all_factor_calculator_accounts_covered(self):
        """FactorCalculator expects these account keys."""
        required = {
            "revenue", "gross_profit", "operating_income", "net_income",
            "total_assets", "total_liabilities", "total_equity",
            "current_assets", "current_liabilities", "operating_cash_flow",
        }
        mapped_values = set(FinancialCollector.ACCOUNT_MAP.values())
        assert required.issubset(mapped_values)


class TestCleanFinancialData:
    """Test the data cleaning logic."""

    def test_clean_removes_nan_rows(self, db):
        collector = FinancialCollector(db)
        df = pd.DataFrame({
            "계정": ["매출액", "빈행"],
            "2024/12": [100000, None],
        })
        result = collector._clean_financial_data(df, "005930", "annual")
        # Only 매출액 should remain (빈행 has NaN value and is not in ACCOUNT_MAP)
        assert len(result) == 1
        assert result[0]["account"] == "revenue"

    def test_clean_maps_account_names(self, db):
        collector = FinancialCollector(db)
        df = pd.DataFrame({
            "계정": ["매출액", "자산총계"],
            "2024/12": [100000, 500000],
        })
        result = collector._clean_financial_data(df, "005930", "annual")
        accounts = {r["account"] for r in result}
        assert "revenue" in accounts
        assert "total_assets" in accounts

    def test_clean_skips_unmapped_accounts(self, db):
        collector = FinancialCollector(db)
        df = pd.DataFrame({
            "계정": ["매출액", "알수없는계정"],
            "2024/12": [100000, 999],
        })
        result = collector._clean_financial_data(df, "005930", "annual")
        accounts = {r["account"] for r in result}
        assert "revenue" in accounts
        assert "알수없는계정" not in accounts

    def test_clean_strips_fnguide_suffix(self, db):
        collector = FinancialCollector(db)
        df = pd.DataFrame({
            "계정": ["매출액계산에 참여한 계정 펼치기"],
            "2024/12": [100000],
        })
        result = collector._clean_financial_data(df, "005930", "annual")
        assert len(result) == 1
        assert result[0]["account"] == "revenue"


class TestCollect:
    """Test full collect flow with mocked HTTP."""

    @patch("app.services.collectors.financial_collector.time.sleep")
    @patch("app.services.collectors.financial_collector.requests.get")
    @patch("app.services.collectors.financial_collector.pd.read_html")
    def test_collect_saves_records(self, mock_read_html, mock_get, mock_sleep, db_with_stocks):
        mock_read_html.return_value = _make_sample_tables()
        mock_response = MagicMock()
        mock_response.text = _make_fiscal_html()
        mock_response.content = _make_fiscal_html().encode()
        mock_get.return_value = mock_response

        collector = FinancialCollector(db_with_stocks)
        count = collector.collect()

        assert count > 0
        records = db_with_stocks.query(Financial).all()
        assert len(records) > 0

    @patch("app.services.collectors.financial_collector.time.sleep")
    @patch("app.services.collectors.financial_collector.requests.get")
    @patch("app.services.collectors.financial_collector.pd.read_html")
    def test_collect_stores_correct_report_types(self, mock_read_html, mock_get, mock_sleep, db_with_stocks):
        mock_read_html.return_value = _make_sample_tables()
        mock_response = MagicMock()
        mock_response.text = _make_fiscal_html()
        mock_response.content = _make_fiscal_html().encode()
        mock_get.return_value = mock_response

        collector = FinancialCollector(db_with_stocks)
        collector.collect()

        report_types = {r.report_type for r in db_with_stocks.query(Financial).all()}
        assert ReportType.ANNUAL in report_types or "annual" in report_types

    @patch("app.services.collectors.financial_collector.time.sleep")
    @patch("app.services.collectors.financial_collector.requests.get")
    @patch("app.services.collectors.financial_collector.pd.read_html")
    def test_collect_continues_on_ticker_error(self, mock_read_html, mock_get, mock_sleep, db_with_stocks):
        """If one ticker fails, the collector should continue to the next."""
        call_count = 0

        def side_effect(*args, **kwargs):
            nonlocal call_count
            call_count += 1
            if call_count <= 1:
                raise ValueError("Simulated error")
            return _make_sample_tables()

        mock_read_html.side_effect = side_effect
        mock_response = MagicMock()
        mock_response.text = _make_fiscal_html()
        mock_response.content = _make_fiscal_html().encode()
        mock_get.return_value = mock_response

        collector = FinancialCollector(db_with_stocks)
        count = collector.collect()

        # Should still have records from the second ticker
        assert count > 0

    @patch("app.services.collectors.financial_collector.time.sleep")
    @patch("app.services.collectors.financial_collector.requests.get")
    @patch("app.services.collectors.financial_collector.pd.read_html")
    def test_collect_upserts_on_duplicate(self, mock_read_html, mock_get, mock_sleep, db_with_stocks):
        """Running collect twice should update, not duplicate."""
        mock_read_html.return_value = _make_sample_tables()
        mock_response = MagicMock()
        mock_response.text = _make_fiscal_html()
        mock_response.content = _make_fiscal_html().encode()
        mock_get.return_value = mock_response

        collector = FinancialCollector(db_with_stocks)
        count1 = collector.collect()
        count2 = collector.collect()

        # Both runs should succeed with same count
        assert count1 == count2