galaxis-po/backend/app/services/correlation.py
머니페니 12d235a1f1 feat: add 9 new modules - notification alerts, trading journal, position sizing, pension allocation, drawdown monitoring, benchmark dashboard, tax simulation, correlation analysis, parameter optimizer
Phase 1:
- Real-time signal alerts (Discord/Telegram webhook)
- Trading journal with entry/exit tracking
- Position sizing calculator (Fixed/Kelly/ATR)

Phase 2:
- Pension asset allocation (DC/IRP 70% risk limit)
- Drawdown monitoring with SVG gauge
- Benchmark dashboard (portfolio vs KOSPI vs deposit)

Phase 3:
- Tax benefit simulation (Korean pension tax rules)
- Correlation matrix heatmap
- Parameter optimizer with grid search + overfit detection
2026-03-29 10:03:08 +09:00

189 lines
5.6 KiB
Python

"""
Correlation analysis service.
Calculates inter-stock correlation matrices and portfolio diversification scores.
"""
import logging
from datetime import date, timedelta
from typing import List, Optional
import numpy as np
import pandas as pd
from sqlalchemy.orm import Session
from app.models.stock import Price
from app.models.portfolio import Portfolio, PortfolioSnapshot
logger = logging.getLogger(__name__)
class CorrelationService:
def __init__(self, db: Session):
self.db = db
def calculate_correlation_matrix(
self, stock_codes: List[str], period_days: int = 60
) -> dict:
if not stock_codes:
return {"stock_codes": [], "matrix": []}
end_date = date.today()
start_date = end_date - timedelta(days=period_days)
prices = (
self.db.query(Price)
.filter(
Price.ticker.in_(stock_codes),
Price.date >= start_date,
Price.date <= end_date,
)
.order_by(Price.date)
.all()
)
returns_df = self._prices_to_returns_df(prices, stock_codes)
if returns_df.empty or len(returns_df) < 2:
n = len(stock_codes)
matrix = [[None if i != j else 1.0 for j in range(n)] for i in range(n)]
return {"stock_codes": stock_codes, "matrix": matrix}
corr_matrix = returns_df.corr()
matrix = []
for code in stock_codes:
row = []
for other in stock_codes:
if code in corr_matrix.columns and other in corr_matrix.columns:
val = corr_matrix.loc[code, other]
row.append(round(float(val), 4) if not np.isnan(val) else None)
else:
row.append(None if code != other else 1.0)
matrix.append(row)
return {"stock_codes": stock_codes, "matrix": matrix}
def calculate_portfolio_diversification(self, portfolio_id: int) -> float:
portfolio = (
self.db.query(Portfolio)
.filter(Portfolio.id == portfolio_id)
.first()
)
if not portfolio:
raise ValueError("Portfolio not found")
snapshot = (
self.db.query(PortfolioSnapshot)
.filter(PortfolioSnapshot.portfolio_id == portfolio_id)
.order_by(PortfolioSnapshot.snapshot_date.desc())
.first()
)
if not snapshot or not snapshot.holdings:
return 1.0
holdings = snapshot.holdings
if len(holdings) == 1:
return 0.0
tickers = [h.ticker for h in holdings]
total_value = sum(float(h.value) for h in holdings)
if total_value == 0:
return 1.0
weights = np.array([float(h.value) / total_value for h in holdings])
end_date = date.today()
start_date = end_date - timedelta(days=60)
prices = (
self.db.query(Price)
.filter(
Price.ticker.in_(tickers),
Price.date >= start_date,
Price.date <= end_date,
)
.order_by(Price.date)
.all()
)
returns_df = self._prices_to_returns_df(prices, tickers)
if returns_df.empty or len(returns_df) < 2:
return 0.5
cov_matrix = returns_df.cov().values
stds = returns_df.std().values
# Portfolio variance
portfolio_variance = weights @ cov_matrix @ weights
# Weighted average variance (no diversification case)
weighted_avg_variance = np.sum((weights ** 2) * (stds ** 2)) + \
2 * np.sum([
weights[i] * weights[j] * stds[i] * stds[j]
for i in range(len(weights))
for j in range(i + 1, len(weights))
])
if weighted_avg_variance < 1e-10:
return 1.0
# Diversification ratio: 1 - (portfolio_vol / weighted_avg_vol)
portfolio_vol = np.sqrt(portfolio_variance)
weighted_avg_vol = np.sum(weights * stds)
if weighted_avg_vol < 1e-10:
return 1.0
diversification_ratio = 1.0 - (portfolio_vol / weighted_avg_vol)
return round(float(np.clip(diversification_ratio, 0, 1)), 4)
def get_correlation_data(
self, stock_codes: List[str], period_days: int = 60
) -> dict:
result = self.calculate_correlation_matrix(stock_codes, period_days)
high_pairs = []
codes = result["stock_codes"]
matrix = result["matrix"]
for i in range(len(codes)):
for j in range(i + 1, len(codes)):
val = matrix[i][j]
if val is not None and abs(val) > 0.7:
high_pairs.append({
"stock_a": codes[i],
"stock_b": codes[j],
"correlation": val,
})
result["high_correlation_pairs"] = high_pairs
return result
def _prices_to_returns_df(
self, prices: list, stock_codes: List[str]
) -> pd.DataFrame:
if not prices:
return pd.DataFrame()
data = {}
for p in prices:
if p.ticker not in data:
data[p.ticker] = {}
data[p.ticker][p.date] = float(p.close)
if not data:
return pd.DataFrame()
df = pd.DataFrame(data)
df.index = pd.to_datetime(df.index)
df = df.sort_index()
# Reorder columns to match requested order
existing = [c for c in stock_codes if c in df.columns]
df = df[existing]
returns_df = df.pct_change().dropna()
return returns_df