import logging import os import re from io import BytesIO import numpy as np import pandas as pd import requests as rq from bs4 import BeautifulSoup import pymysql from dotenv import load_dotenv load_dotenv() GEN_OTP_URL = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd' DOWN_URL = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd' # 최근 영업일을 가져옴 def get_latest_biz_day(): url = 'https://finance.naver.com/sise/sise_deposit.nhn' data = rq.post(url) data_html = BeautifulSoup(data.content, 'lxml') parse_day = data_html.select_one('div.subtop_sise_graph2 > ul.subtop_chart_note > li > span.tah').text biz_day = re.findall('[0-9]+', parse_day) biz_day = ''.join(biz_day) return biz_day # 업종 분류 현황 가져옴 def get_stock_data(biz_day, mkt_id): # logging.basicConfig(level=logging.DEBUG) gen_otp_data = { 'locale': 'ko_KR', 'mktId': mkt_id, # STK: 코스피, KSQ: 코스닥 'trdDd': biz_day, 'money': '1', 'csvxls_isNo': 'false', 'name': 'fileDown', 'url': 'dbms/MDC/STAT/standard/MDCSTAT03901' } headers = { 'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36' } otp = rq.post(url=GEN_OTP_URL, data=gen_otp_data, headers=headers, verify=False) # # 요청 디버깅 # print("===== Request Details =====") # print(f"Method: {otp.request.method}") # print(f"URL: {otp.request.url}") # print(f"Headers: {otp.request.headers}") # print(f"Body: {otp.request.body}") # # # 응답 디버깅 # print("===== Response Details =====") # print(f"Status Code: {otp.status_code}") # print(f"Headers: {otp.headers}") # print(f"Body: {otp.text}") down_sector = rq.post(url=DOWN_URL, data={'code': otp.text}, headers=headers) return pd.read_csv(BytesIO(down_sector.content), encoding='EUC-KR') # 개별 지표 조회 def get_ind_stock_data(biz_day): gen_otp_data = { 'locale': 'ko_KR', 'searchType': '1', 'mktId': 'ALL', 'trdDd': biz_day, 'csvxls_isNo': 'false', 'name': 'fileDown', 'url': 'dbms/MDC/STAT/standard/MDCSTAT03501' } headers = { 'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36' } otp = rq.post(url=GEN_OTP_URL, data=gen_otp_data, headers=headers, verify=False) down_ind_sector = rq.post(url=DOWN_URL, data={'code': otp.text}, headers=headers) return pd.read_csv(BytesIO(down_ind_sector.content), encoding='EUC-KR') def get_total_stock_data(biz_day): # 업종 분류 현황(코스피, 코스닥) sector_stk = get_stock_data(biz_day, 'stk') sector_ksq = get_stock_data(biz_day, 'ksq') # 각각 조회 후 합침 krx_sector = pd.concat([sector_stk, sector_ksq]).reset_index(drop=True) krx_sector['종목명'] = krx_sector['종목명'].str.strip() krx_sector['기준일'] = biz_day # 개별 지표 조회 krx_ind = get_ind_stock_data(biz_day) krx_ind['종목명'] = krx_ind['종목명'].str.strip() krx_ind['기준일'] = biz_day # 데이터 정리 # 종목, 개별 중 한군데만 있는 데이터 삭제(선박펀드, 광물펀드, 해외종목 등) diff = list(set(krx_sector['종목명']).symmetric_difference(set(krx_ind['종목명']))) kor_ticker = pd.merge(krx_sector, krx_ind, on=krx_sector.columns.intersection(krx_ind.columns).tolist(), how='outer') # 일반적인 종목과 SPAC, 우선주, 리츠, 기타 주식을 구분 kor_ticker['종목구분'] = np.where(kor_ticker['종목명'].str.contains('스팩|제[0-9]+호'), '스팩', np.where(kor_ticker['종목코드'].str[-1:] != '0', '우선주', np.where(kor_ticker['종목명'].str.endswith('리츠'), '리츠', np.where(kor_ticker['종목명'].isin(diff), '기타', '보통주' )))) kor_ticker = kor_ticker.reset_index(drop=True) kor_ticker.columns = kor_ticker.columns.str.replace(' ', '') kor_ticker = kor_ticker[['종목코드', '종목명', '시장구분', '종가', '시가총액', '기준일', 'EPS', '선행EPS', 'BPS', '주당배당금', '종목구분']] kor_ticker = kor_ticker.replace({np.nan: None}) kor_ticker['기준일'] = pd.to_datetime(kor_ticker['기준일']) return kor_ticker def save_db(ticker): con = pymysql.connect(user=os.getenv('DB_USER'), passwd=os.getenv('DB_PW'), host=os.getenv('DB_HOST'), port=int(os.getenv('DB_PORT')), db=os.getenv('DB_DB'), charset='utf8') mycursor = con.cursor() query = f""" insert into kor_ticker (종목코드,종목명,시장구분,종가,시가총액,기준일,EPS,선행EPS,BPS,주당배당금,종목구분) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) as new on duplicate key update 종목명=new.종목명,시장구분=new.시장구분,종가=new.종가,시가총액=new.시가총액,EPS=new.EPS,선행EPS=new.선행EPS, BPS=new.BPS,주당배당금=new.주당배당금,종목구분 = new.종목구분; """ args = ticker.values.tolist() mycursor.executemany(query, args) con.commit() con.close() if __name__ == '__main__': latest_biz_day = get_latest_biz_day() data = get_total_stock_data(latest_biz_day) save_db(data)