Phase 1: - Real-time signal alerts (Discord/Telegram webhook) - Trading journal with entry/exit tracking - Position sizing calculator (Fixed/Kelly/ATR) Phase 2: - Pension asset allocation (DC/IRP 70% risk limit) - Drawdown monitoring with SVG gauge - Benchmark dashboard (portfolio vs KOSPI vs deposit) Phase 3: - Tax benefit simulation (Korean pension tax rules) - Correlation matrix heatmap - Parameter optimizer with grid search + overfit detection
189 lines
5.6 KiB
Python
189 lines
5.6 KiB
Python
"""
|
|
Correlation analysis service.
|
|
|
|
Calculates inter-stock correlation matrices and portfolio diversification scores.
|
|
"""
|
|
import logging
|
|
from datetime import date, timedelta
|
|
from typing import List, Optional
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.models.stock import Price
|
|
from app.models.portfolio import Portfolio, PortfolioSnapshot
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class CorrelationService:
|
|
def __init__(self, db: Session):
|
|
self.db = db
|
|
|
|
def calculate_correlation_matrix(
|
|
self, stock_codes: List[str], period_days: int = 60
|
|
) -> dict:
|
|
if not stock_codes:
|
|
return {"stock_codes": [], "matrix": []}
|
|
|
|
end_date = date.today()
|
|
start_date = end_date - timedelta(days=period_days)
|
|
|
|
prices = (
|
|
self.db.query(Price)
|
|
.filter(
|
|
Price.ticker.in_(stock_codes),
|
|
Price.date >= start_date,
|
|
Price.date <= end_date,
|
|
)
|
|
.order_by(Price.date)
|
|
.all()
|
|
)
|
|
|
|
returns_df = self._prices_to_returns_df(prices, stock_codes)
|
|
|
|
if returns_df.empty or len(returns_df) < 2:
|
|
n = len(stock_codes)
|
|
matrix = [[None if i != j else 1.0 for j in range(n)] for i in range(n)]
|
|
return {"stock_codes": stock_codes, "matrix": matrix}
|
|
|
|
corr_matrix = returns_df.corr()
|
|
|
|
matrix = []
|
|
for code in stock_codes:
|
|
row = []
|
|
for other in stock_codes:
|
|
if code in corr_matrix.columns and other in corr_matrix.columns:
|
|
val = corr_matrix.loc[code, other]
|
|
row.append(round(float(val), 4) if not np.isnan(val) else None)
|
|
else:
|
|
row.append(None if code != other else 1.0)
|
|
matrix.append(row)
|
|
|
|
return {"stock_codes": stock_codes, "matrix": matrix}
|
|
|
|
def calculate_portfolio_diversification(self, portfolio_id: int) -> float:
|
|
portfolio = (
|
|
self.db.query(Portfolio)
|
|
.filter(Portfolio.id == portfolio_id)
|
|
.first()
|
|
)
|
|
if not portfolio:
|
|
raise ValueError("Portfolio not found")
|
|
|
|
snapshot = (
|
|
self.db.query(PortfolioSnapshot)
|
|
.filter(PortfolioSnapshot.portfolio_id == portfolio_id)
|
|
.order_by(PortfolioSnapshot.snapshot_date.desc())
|
|
.first()
|
|
)
|
|
|
|
if not snapshot or not snapshot.holdings:
|
|
return 1.0
|
|
|
|
holdings = snapshot.holdings
|
|
if len(holdings) == 1:
|
|
return 0.0
|
|
|
|
tickers = [h.ticker for h in holdings]
|
|
total_value = sum(float(h.value) for h in holdings)
|
|
if total_value == 0:
|
|
return 1.0
|
|
|
|
weights = np.array([float(h.value) / total_value for h in holdings])
|
|
|
|
end_date = date.today()
|
|
start_date = end_date - timedelta(days=60)
|
|
|
|
prices = (
|
|
self.db.query(Price)
|
|
.filter(
|
|
Price.ticker.in_(tickers),
|
|
Price.date >= start_date,
|
|
Price.date <= end_date,
|
|
)
|
|
.order_by(Price.date)
|
|
.all()
|
|
)
|
|
|
|
returns_df = self._prices_to_returns_df(prices, tickers)
|
|
|
|
if returns_df.empty or len(returns_df) < 2:
|
|
return 0.5
|
|
|
|
cov_matrix = returns_df.cov().values
|
|
stds = returns_df.std().values
|
|
|
|
# Portfolio variance
|
|
portfolio_variance = weights @ cov_matrix @ weights
|
|
|
|
# Weighted average variance (no diversification case)
|
|
weighted_avg_variance = np.sum((weights ** 2) * (stds ** 2)) + \
|
|
2 * np.sum([
|
|
weights[i] * weights[j] * stds[i] * stds[j]
|
|
for i in range(len(weights))
|
|
for j in range(i + 1, len(weights))
|
|
])
|
|
|
|
if weighted_avg_variance < 1e-10:
|
|
return 1.0
|
|
|
|
# Diversification ratio: 1 - (portfolio_vol / weighted_avg_vol)
|
|
portfolio_vol = np.sqrt(portfolio_variance)
|
|
weighted_avg_vol = np.sum(weights * stds)
|
|
|
|
if weighted_avg_vol < 1e-10:
|
|
return 1.0
|
|
|
|
diversification_ratio = 1.0 - (portfolio_vol / weighted_avg_vol)
|
|
return round(float(np.clip(diversification_ratio, 0, 1)), 4)
|
|
|
|
def get_correlation_data(
|
|
self, stock_codes: List[str], period_days: int = 60
|
|
) -> dict:
|
|
result = self.calculate_correlation_matrix(stock_codes, period_days)
|
|
|
|
high_pairs = []
|
|
codes = result["stock_codes"]
|
|
matrix = result["matrix"]
|
|
|
|
for i in range(len(codes)):
|
|
for j in range(i + 1, len(codes)):
|
|
val = matrix[i][j]
|
|
if val is not None and abs(val) > 0.7:
|
|
high_pairs.append({
|
|
"stock_a": codes[i],
|
|
"stock_b": codes[j],
|
|
"correlation": val,
|
|
})
|
|
|
|
result["high_correlation_pairs"] = high_pairs
|
|
return result
|
|
|
|
def _prices_to_returns_df(
|
|
self, prices: list, stock_codes: List[str]
|
|
) -> pd.DataFrame:
|
|
if not prices:
|
|
return pd.DataFrame()
|
|
|
|
data = {}
|
|
for p in prices:
|
|
if p.ticker not in data:
|
|
data[p.ticker] = {}
|
|
data[p.ticker][p.date] = float(p.close)
|
|
|
|
if not data:
|
|
return pd.DataFrame()
|
|
|
|
df = pd.DataFrame(data)
|
|
df.index = pd.to_datetime(df.index)
|
|
df = df.sort_index()
|
|
|
|
# Reorder columns to match requested order
|
|
existing = [c for c in stock_codes if c in df.columns]
|
|
df = df[existing]
|
|
|
|
returns_df = df.pct_change().dropna()
|
|
return returns_df
|