make-quant-py/example/10-stock-data.py

193 lines
6.9 KiB
Python

import re
import time
from io import BytesIO
import numpy as np
import pandas as pd
import requests as rq
from tqdm import tqdm
from bs4 import BeautifulSoup
from dotenv import load_dotenv
import quantcommon
load_dotenv()
GEN_OTP_URL = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd'
DOWN_URL = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd'
# 최근 영업일을 가져옴
def get_latest_biz_day():
url = 'https://finance.naver.com/sise/sise_deposit.nhn'
data = rq.post(url)
data_html = BeautifulSoup(data.content, 'lxml')
parse_day = data_html.select_one('div.subtop_sise_graph2 > ul.subtop_chart_note > li > span.tah').text
biz_day = re.findall('[0-9]+', parse_day)
biz_day = ''.join(biz_day)
return biz_day
# 업종 분류 현황 가져옴
def get_stock_data(biz_day, mkt_id):
# logging.basicConfig(level=logging.DEBUG)
gen_otp_data = {
'locale': 'ko_KR',
'mktId': mkt_id, # STK: 코스피, KSQ: 코스닥
'trdDd': biz_day,
'money': '1',
'csvxls_isNo': 'false',
'name': 'fileDown',
'url': 'dbms/MDC/STAT/standard/MDCSTAT03901'
}
headers = {
'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
}
otp = rq.post(url=GEN_OTP_URL, data=gen_otp_data, headers=headers, verify=False)
# # 요청 디버깅
# print("===== Request Details =====")
# print(f"Method: {otp.request.method}")
# print(f"URL: {otp.request.url}")
# print(f"Headers: {otp.request.headers}")
# print(f"Body: {otp.request.body}")
#
# # 응답 디버깅
# print("===== Response Details =====")
# print(f"Status Code: {otp.status_code}")
# print(f"Headers: {otp.headers}")
# print(f"Body: {otp.text}")
down_sector = rq.post(url=DOWN_URL, data={'code': otp.text}, headers=headers)
return pd.read_csv(BytesIO(down_sector.content), encoding='EUC-KR')
# 개별 지표 조회
def get_ind_stock_data(biz_day):
gen_otp_data = {
'locale': 'ko_KR',
'searchType': '1',
'mktId': 'ALL',
'trdDd': biz_day,
'csvxls_isNo': 'false',
'name': 'fileDown',
'url': 'dbms/MDC/STAT/standard/MDCSTAT03501'
}
headers = {
'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
}
otp = rq.post(url=GEN_OTP_URL, data=gen_otp_data, headers=headers, verify=False)
down_ind_sector = rq.post(url=DOWN_URL, data={'code': otp.text}, headers=headers)
return pd.read_csv(BytesIO(down_ind_sector.content), encoding='EUC-KR')
def process_for_total_stock(biz_day):
# 업종 분류 현황(코스피, 코스닥)
sector_stk = get_stock_data(biz_day, 'STK')
sector_ksq = get_stock_data(biz_day, 'KSQ')
# 각각 조회 후 합침
krx_sector = pd.concat([sector_stk, sector_ksq]).reset_index(drop=True)
krx_sector['종목명'] = krx_sector['종목명'].str.strip()
krx_sector['기준일'] = biz_day
# 개별 지표 조회
krx_ind = get_ind_stock_data(biz_day)
krx_ind['종목명'] = krx_ind['종목명'].str.strip()
krx_ind['기준일'] = biz_day
# 데이터 정리
# 종목, 개별 중 한군데만 있는 데이터 삭제(선박펀드, 광물펀드, 해외종목 등)
diff = list(set(krx_sector['종목명']).symmetric_difference(set(krx_ind['종목명'])))
kor_ticker = pd.merge(krx_sector,
krx_ind,
on=krx_sector.columns.intersection(
krx_ind.columns).tolist(),
how='outer')
# 일반적인 종목과 SPAC, 우선주, 리츠, 기타 주식을 구분
kor_ticker['종목구분'] = np.where(kor_ticker['종목명'].str.contains('스팩|제[0-9]+호'), '스팩',
np.where(kor_ticker['종목코드'].str[-1:] != '0', '우선주',
np.where(kor_ticker['종목명'].str.endswith('리츠'), '리츠',
np.where(kor_ticker['종목명'].isin(diff), '기타',
'보통주'))))
kor_ticker = kor_ticker.reset_index(drop=True)
kor_ticker.columns = kor_ticker.columns.str.replace(' ', '')
kor_ticker = kor_ticker[['종목코드', '종목명', '시장구분', '종가',
'시가총액', '기준일', 'EPS', '선행EPS', 'BPS', '주당배당금', '종목구분']]
kor_ticker = kor_ticker.replace({np.nan: None})
kor_ticker['기준일'] = pd.to_datetime(kor_ticker['기준일'])
save_ticker(kor_ticker)
def save_ticker(ticker):
con = quantcommon.QuantCommon().connect()
mycursor = con.cursor()
query = f"""
insert into kor_ticker (종목코드,종목명,시장구분,종가,시가총액,기준일,EPS,선행EPS,BPS,주당배당금,종목구분)
values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) as new
on duplicate key update
종목명=new.종목명,시장구분=new.시장구분,종가=new.종가,시가총액=new.시가총액,EPS=new.EPS,선행EPS=new.선행EPS,
BPS=new.BPS,주당배당금=new.주당배당금,종목구분 = new.종목구분;
"""
args = ticker.values.tolist()
mycursor.executemany(query, args)
con.commit()
con.close()
# WICS 기준 섹터정보 크롤링
def process_for_wics(biz_day):
sector_code = [
'G25', 'G35', 'G50', 'G40', 'G10', 'G20', 'G55', 'G30', 'G15', 'G45'
]
data_sector = []
# 모든 섹터에 대한 데이터 받아서 가공
for i in tqdm(sector_code):
url = f'''http://www.wiseindex.com/Index/GetIndexComponets?ceil_yn=0&dt={biz_day}&sec_cd={i}'''
data = rq.get(url).json()
data_pd = pd.json_normalize(data['list'])
data_sector.append(data_pd)
time.sleep(2)
kor_sector = pd.concat(data_sector, axis=0)
kor_sector = kor_sector[['IDX_CD', 'CMP_CD', 'CMP_KOR', 'SEC_NM_KOR']]
kor_sector['기준일'] = biz_day
kor_sector['기준일'] = pd.to_datetime(kor_sector['기준일'])
save_sector(kor_sector)
def save_sector(sector):
con = quantcommon.QuantCommon().connect()
mycursor = con.cursor()
query = f"""
insert into kor_sector (IDX_CD, CMP_CD, CMP_KOR, SEC_NM_KOR, 기준일)
values (%s,%s,%s,%s,%s) as new
on duplicate key update
IDX_CD = new.IDX_CD, CMP_KOR = new.CMP_KOR, SEC_NM_KOR = new.SEC_NM_KOR
"""
args = sector.values.tolist()
mycursor.executemany(query, args)
con.commit()
con.close()
if __name__ == '__main__':
latest_biz_day = get_latest_biz_day()
process_for_total_stock(latest_biz_day)
process_for_wics(latest_biz_day)