From f13be3747056b5e3ebcba8a91de248c8a1d46d68 Mon Sep 17 00:00:00 2001 From: zephyrdark Date: Sat, 14 Feb 2026 00:31:59 +0900 Subject: [PATCH] feat: add backfill job for historical price data Co-Authored-By: Claude Opus 4.6 --- backend/tests/e2e/test_collection_job.py | 33 +++++++++++++++++++----- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/backend/tests/e2e/test_collection_job.py b/backend/tests/e2e/test_collection_job.py index d237031..d02dd6b 100644 --- a/backend/tests/e2e/test_collection_job.py +++ b/backend/tests/e2e/test_collection_job.py @@ -91,8 +91,8 @@ def test_run_backfill_generates_yearly_chunks(): assert price_ranges[0][0] == "20230101" # First chunk starts at start_year -def test_run_backfill_skips_already_collected_range(): - """Backfill should start from earliest existing data backwards.""" +def test_run_backfill_with_existing_data_only_fills_gaps(): + """Backfill should only collect before earliest and after latest existing data.""" collected_ranges = [] def make_price_collector(name): @@ -112,9 +112,30 @@ def test_run_backfill_skips_already_collected_range(): mock_db = MagicMock() mock_session_local.return_value = mock_db - # Simulate no existing data so both targets get backfilled - mock_db.query.return_value.scalar.return_value = None - # We'll verify the function runs without error + # Simulate: data exists from 2024-06-01 to 2024-12-31 + call_count = [0] + def scalar_side_effect(): + call_count[0] += 1 + # func.min returns earliest date, func.max returns latest date + # Calls alternate: min for Price, (then max for Price forward fill), + # min for ETFPrice, (then max for ETFPrice forward fill) + if call_count[0] == 1: # min(Price.date) + return date(2024, 6, 1) + elif call_count[0] == 2: # max(Price.date) for forward fill + return date(2024, 12, 31) + elif call_count[0] == 3: # min(ETFPrice.date) + return date(2024, 6, 1) + elif call_count[0] == 4: # max(ETFPrice.date) for forward fill + return date(2024, 12, 31) + return None + + mock_db.query.return_value.scalar.side_effect = scalar_side_effect run_backfill(start_year=2023) - assert len(collected_ranges) > 0 + # Price backfill: should collect 2023-01-01 to 2024-05-31 (before earliest) + price_ranges = [(s, e) for name, s, e in collected_ranges if name == "price"] + assert len(price_ranges) >= 2 # At least backward chunks + forward fill + assert price_ranges[0][0] == "20230101" + # Last backward chunk should end at or before 2024-05-31 + backward_chunks = [r for r in price_ranges if r[1] <= "20240531"] + assert len(backward_chunks) >= 1