From 18016bfdecb44d4612ad1c542dfa439fb19939c1 Mon Sep 17 00:00:00 2001 From: Ayuriel Date: Mon, 27 Jan 2025 23:57:05 +0900 Subject: [PATCH] =?UTF-8?q?feat:=2010=EC=9E=A5=20=EC=A3=BC=EC=8B=9D=20?= =?UTF-8?q?=EB=8D=B0=EC=9D=B4=ED=84=B0=20=EC=A1=B0=ED=9A=8C=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- example/10-stock-data.py | 119 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 example/10-stock-data.py diff --git a/example/10-stock-data.py b/example/10-stock-data.py new file mode 100644 index 0000000..0798cf1 --- /dev/null +++ b/example/10-stock-data.py @@ -0,0 +1,119 @@ +import logging +import re +from io import BytesIO + +import numpy as np +import pandas as pd +import requests as rq +from bs4 import BeautifulSoup + +GEN_OTP_URL = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd' +DOWN_URL = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd' + +# 최근 영업일을 가져옴 +def get_latest_biz_day(): + url = 'https://finance.naver.com/sise/sise_deposit.nhn' + data = rq.post(url) + data_html = BeautifulSoup(data.content, 'lxml') + parse_day = data_html.select_one('div.subtop_sise_graph2 > ul.subtop_chart_note > li > span.tah').text + biz_day = re.findall('[0-9]+', parse_day) + biz_day = ''.join(biz_day) + return biz_day + + +# 업종 분류 현황 가져옴 +def get_stock_data(biz_day, mkt_id): + # logging.basicConfig(level=logging.DEBUG) + gen_otp_data = { + 'locale': 'ko_KR', + 'mktId': mkt_id, # STK: 코스피, KSQ: 코스닥 + 'trdDd': biz_day, + 'money': '1', + 'csvxls_isNo': 'false', + 'name': 'fileDown', + 'url': 'dbms/MDC/STAT/standard/MDCSTAT03901' + } + headers = { + 'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36' + } + + otp = rq.post(url=GEN_OTP_URL, data=gen_otp_data, headers=headers, verify=False) + # # 요청 디버깅 + # print("===== Request Details =====") + # print(f"Method: {otp.request.method}") + # print(f"URL: {otp.request.url}") + # print(f"Headers: {otp.request.headers}") + # print(f"Body: {otp.request.body}") + # + # # 응답 디버깅 + # print("===== Response Details =====") + # print(f"Status Code: {otp.status_code}") + # print(f"Headers: {otp.headers}") + # print(f"Body: {otp.text}") + + down_sector = rq.post(url=DOWN_URL, data={'code': otp.text}, headers=headers) + return pd.read_csv(BytesIO(down_sector.content), encoding='EUC-KR') + + +# 개별 지표 조회 +def get_ind_stock_data(biz_day): + gen_otp_data = { + 'locale': 'ko_KR', + 'searchType': '1', + 'mktId': 'ALL', + 'trdDd': biz_day, + 'csvxls_isNo': 'false', + 'name': 'fileDown', + 'url': 'dbms/MDC/STAT/standard/MDCSTAT03501' + } + headers = { + 'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36' + } + + otp = rq.post(url=GEN_OTP_URL, data=gen_otp_data, headers=headers, verify=False) + + down_ind_sector = rq.post(url=DOWN_URL, data={'code': otp.text}, headers=headers) + return pd.read_csv(BytesIO(down_ind_sector.content), encoding='EUC-KR') + + +def get_total_stock_data(biz_day): + # 업종 분류 현황(코스피, 코스닥) + sector_stk = get_stock_data(biz_day, 'stk') + sector_ksq = get_stock_data(biz_day, 'ksq') + # 각각 조회 후 합침 + krx_sector = pd.concat([sector_stk, sector_ksq]).reset_index(drop=True) + krx_sector['종목명'] = krx_sector['종목명'].str.strip() + krx_sector['기준일'] = biz_day + + # 개별 지표 조회 + krx_ind = get_ind_stock_data(biz_day) + krx_ind['종목명'] = krx_ind['종목명'].str.strip() + krx_ind['기준일'] = biz_day + + # 데이터 정리 + # 종목, 개별 중 한군데만 있는 데이터 삭제(선박펀드, 광물펀드, 해외종목 등) + diff = list(set(krx_sector['종목명']).symmetric_difference(set(krx_ind['종목명']))) + kor_ticket = pd.merge(krx_sector, krx_ind, on=krx_sector.columns.intersection(krx_ind.columns).tolist(), how='outer') + # 일반적인 종목과 SPAC, 우선주, 리츠, 기타 주식을 구분 + kor_ticket['종목구분'] = np.where(kor_ticket['종목명'].str.contains('스팩|제[0-9]+호'), '스팩', + np.where(kor_ticket['종목코드'].str[-1:] != '0', '우선주', + np.where(kor_ticket['종목명'].str.endswith('리츠'), '리츠', + np.where(kor_ticket['종목명'].isin(diff), '기타', + '보통주' + )))) + kor_ticket = kor_ticket.reset_index(drop=True) + kor_ticket.columns = kor_ticket.columns.str.replace(' ', '') + kor_ticket = kor_ticket[['종목코드', '종목명', '시장구분', '종가', + '시가총액', '기준일', 'EPS', '선행EPS', 'BPS', '주당배당금', '종목구분']] + kor_ticket = kor_ticket.replace({np.nan: None}) + kor_ticket['기준일'] = pd.to_datetime(kor_ticket['기준일']) + return kor_ticket + +if __name__ == '__main__': + latest_biz_day = get_latest_biz_day() + data = get_total_stock_data(latest_biz_day) + print(data) + +