make-quant-py/example/10-stock-data.py

120 lines
4.7 KiB
Python
Raw Normal View History

import logging
import re
from io import BytesIO
import numpy as np
import pandas as pd
import requests as rq
from bs4 import BeautifulSoup
GEN_OTP_URL = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd'
DOWN_URL = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd'
# 최근 영업일을 가져옴
def get_latest_biz_day():
url = 'https://finance.naver.com/sise/sise_deposit.nhn'
data = rq.post(url)
data_html = BeautifulSoup(data.content, 'lxml')
parse_day = data_html.select_one('div.subtop_sise_graph2 > ul.subtop_chart_note > li > span.tah').text
biz_day = re.findall('[0-9]+', parse_day)
biz_day = ''.join(biz_day)
return biz_day
# 업종 분류 현황 가져옴
def get_stock_data(biz_day, mkt_id):
# logging.basicConfig(level=logging.DEBUG)
gen_otp_data = {
'locale': 'ko_KR',
'mktId': mkt_id, # STK: 코스피, KSQ: 코스닥
'trdDd': biz_day,
'money': '1',
'csvxls_isNo': 'false',
'name': 'fileDown',
'url': 'dbms/MDC/STAT/standard/MDCSTAT03901'
}
headers = {
'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
}
otp = rq.post(url=GEN_OTP_URL, data=gen_otp_data, headers=headers, verify=False)
# # 요청 디버깅
# print("===== Request Details =====")
# print(f"Method: {otp.request.method}")
# print(f"URL: {otp.request.url}")
# print(f"Headers: {otp.request.headers}")
# print(f"Body: {otp.request.body}")
#
# # 응답 디버깅
# print("===== Response Details =====")
# print(f"Status Code: {otp.status_code}")
# print(f"Headers: {otp.headers}")
# print(f"Body: {otp.text}")
down_sector = rq.post(url=DOWN_URL, data={'code': otp.text}, headers=headers)
return pd.read_csv(BytesIO(down_sector.content), encoding='EUC-KR')
# 개별 지표 조회
def get_ind_stock_data(biz_day):
gen_otp_data = {
'locale': 'ko_KR',
'searchType': '1',
'mktId': 'ALL',
'trdDd': biz_day,
'csvxls_isNo': 'false',
'name': 'fileDown',
'url': 'dbms/MDC/STAT/standard/MDCSTAT03501'
}
headers = {
'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
}
otp = rq.post(url=GEN_OTP_URL, data=gen_otp_data, headers=headers, verify=False)
down_ind_sector = rq.post(url=DOWN_URL, data={'code': otp.text}, headers=headers)
return pd.read_csv(BytesIO(down_ind_sector.content), encoding='EUC-KR')
def get_total_stock_data(biz_day):
# 업종 분류 현황(코스피, 코스닥)
sector_stk = get_stock_data(biz_day, 'stk')
sector_ksq = get_stock_data(biz_day, 'ksq')
# 각각 조회 후 합침
krx_sector = pd.concat([sector_stk, sector_ksq]).reset_index(drop=True)
krx_sector['종목명'] = krx_sector['종목명'].str.strip()
krx_sector['기준일'] = biz_day
# 개별 지표 조회
krx_ind = get_ind_stock_data(biz_day)
krx_ind['종목명'] = krx_ind['종목명'].str.strip()
krx_ind['기준일'] = biz_day
# 데이터 정리
# 종목, 개별 중 한군데만 있는 데이터 삭제(선박펀드, 광물펀드, 해외종목 등)
diff = list(set(krx_sector['종목명']).symmetric_difference(set(krx_ind['종목명'])))
kor_ticket = pd.merge(krx_sector, krx_ind, on=krx_sector.columns.intersection(krx_ind.columns).tolist(), how='outer')
# 일반적인 종목과 SPAC, 우선주, 리츠, 기타 주식을 구분
kor_ticket['종목구분'] = np.where(kor_ticket['종목명'].str.contains('스팩|제[0-9]+호'), '스팩',
np.where(kor_ticket['종목코드'].str[-1:] != '0', '우선주',
np.where(kor_ticket['종목명'].str.endswith('리츠'), '리츠',
np.where(kor_ticket['종목명'].isin(diff), '기타',
'보통주'
))))
kor_ticket = kor_ticket.reset_index(drop=True)
kor_ticket.columns = kor_ticket.columns.str.replace(' ', '')
kor_ticket = kor_ticket[['종목코드', '종목명', '시장구분', '종가',
'시가총액', '기준일', 'EPS', '선행EPS', 'BPS', '주당배당금', '종목구분']]
kor_ticket = kor_ticket.replace({np.nan: None})
kor_ticket['기준일'] = pd.to_datetime(kor_ticket['기준일'])
return kor_ticket
if __name__ == '__main__':
latest_biz_day = get_latest_biz_day()
data = get_total_stock_data(latest_biz_day)
print(data)