| 
									
										
										
										
											2025-01-27 23:57:05 +09:00
										 |  |  | import logging | 
					
						
							| 
									
										
										
										
											2025-01-30 22:45:04 +09:00
										 |  |  | import os | 
					
						
							| 
									
										
										
										
											2025-01-27 23:57:05 +09:00
										 |  |  | import re | 
					
						
							|  |  |  | from io import BytesIO | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import numpy as np | 
					
						
							|  |  |  | import pandas as pd | 
					
						
							|  |  |  | import requests as rq | 
					
						
							|  |  |  | from bs4 import BeautifulSoup | 
					
						
							| 
									
										
										
										
											2025-01-30 22:45:04 +09:00
										 |  |  | import pymysql | 
					
						
							|  |  |  | from dotenv import load_dotenv | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | load_dotenv() | 
					
						
							| 
									
										
										
										
											2025-01-27 23:57:05 +09:00
										 |  |  | 
 | 
					
						
							|  |  |  | GEN_OTP_URL = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd' | 
					
						
							|  |  |  | DOWN_URL = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # 최근 영업일을 가져옴 | 
					
						
							|  |  |  | def get_latest_biz_day(): | 
					
						
							|  |  |  |     url = 'https://finance.naver.com/sise/sise_deposit.nhn' | 
					
						
							|  |  |  |     data = rq.post(url) | 
					
						
							|  |  |  |     data_html = BeautifulSoup(data.content, 'lxml') | 
					
						
							|  |  |  |     parse_day = data_html.select_one('div.subtop_sise_graph2 > ul.subtop_chart_note > li > span.tah').text | 
					
						
							|  |  |  |     biz_day = re.findall('[0-9]+', parse_day) | 
					
						
							|  |  |  |     biz_day = ''.join(biz_day) | 
					
						
							|  |  |  |     return biz_day | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # 업종 분류 현황 가져옴 | 
					
						
							|  |  |  | def get_stock_data(biz_day, mkt_id): | 
					
						
							|  |  |  |     # logging.basicConfig(level=logging.DEBUG) | 
					
						
							|  |  |  |     gen_otp_data = { | 
					
						
							|  |  |  |         'locale': 'ko_KR', | 
					
						
							|  |  |  |         'mktId': mkt_id, # STK: 코스피, KSQ: 코스닥 | 
					
						
							|  |  |  |         'trdDd': biz_day, | 
					
						
							|  |  |  |         'money': '1', | 
					
						
							|  |  |  |         'csvxls_isNo': 'false', | 
					
						
							|  |  |  |         'name': 'fileDown', | 
					
						
							|  |  |  |         'url': 'dbms/MDC/STAT/standard/MDCSTAT03901' | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     headers = { | 
					
						
							|  |  |  |         'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201', | 
					
						
							|  |  |  |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36' | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     otp = rq.post(url=GEN_OTP_URL, data=gen_otp_data, headers=headers, verify=False) | 
					
						
							|  |  |  |     # # 요청 디버깅 | 
					
						
							|  |  |  |     # print("===== Request Details =====") | 
					
						
							|  |  |  |     # print(f"Method: {otp.request.method}") | 
					
						
							|  |  |  |     # print(f"URL: {otp.request.url}") | 
					
						
							|  |  |  |     # print(f"Headers: {otp.request.headers}") | 
					
						
							|  |  |  |     # print(f"Body: {otp.request.body}") | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # # 응답 디버깅 | 
					
						
							|  |  |  |     # print("===== Response Details =====") | 
					
						
							|  |  |  |     # print(f"Status Code: {otp.status_code}") | 
					
						
							|  |  |  |     # print(f"Headers: {otp.headers}") | 
					
						
							|  |  |  |     # print(f"Body: {otp.text}") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     down_sector = rq.post(url=DOWN_URL, data={'code': otp.text}, headers=headers) | 
					
						
							|  |  |  |     return pd.read_csv(BytesIO(down_sector.content), encoding='EUC-KR') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # 개별 지표 조회 | 
					
						
							|  |  |  | def get_ind_stock_data(biz_day): | 
					
						
							|  |  |  |     gen_otp_data = { | 
					
						
							|  |  |  |         'locale': 'ko_KR', | 
					
						
							|  |  |  |         'searchType': '1', | 
					
						
							|  |  |  |         'mktId': 'ALL', | 
					
						
							|  |  |  |         'trdDd': biz_day, | 
					
						
							|  |  |  |         'csvxls_isNo': 'false', | 
					
						
							|  |  |  |         'name': 'fileDown', | 
					
						
							|  |  |  |         'url': 'dbms/MDC/STAT/standard/MDCSTAT03501' | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     headers = { | 
					
						
							|  |  |  |         'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201', | 
					
						
							|  |  |  |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36' | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     otp = rq.post(url=GEN_OTP_URL, data=gen_otp_data, headers=headers, verify=False) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     down_ind_sector = rq.post(url=DOWN_URL, data={'code': otp.text}, headers=headers) | 
					
						
							|  |  |  |     return pd.read_csv(BytesIO(down_ind_sector.content), encoding='EUC-KR') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_total_stock_data(biz_day): | 
					
						
							|  |  |  |     # 업종 분류 현황(코스피, 코스닥) | 
					
						
							|  |  |  |     sector_stk = get_stock_data(biz_day, 'stk') | 
					
						
							|  |  |  |     sector_ksq = get_stock_data(biz_day, 'ksq') | 
					
						
							|  |  |  |     # 각각 조회 후 합침 | 
					
						
							|  |  |  |     krx_sector = pd.concat([sector_stk, sector_ksq]).reset_index(drop=True) | 
					
						
							|  |  |  |     krx_sector['종목명'] = krx_sector['종목명'].str.strip() | 
					
						
							|  |  |  |     krx_sector['기준일'] = biz_day | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # 개별 지표 조회 | 
					
						
							|  |  |  |     krx_ind = get_ind_stock_data(biz_day) | 
					
						
							|  |  |  |     krx_ind['종목명'] = krx_ind['종목명'].str.strip() | 
					
						
							|  |  |  |     krx_ind['기준일'] = biz_day | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # 데이터 정리 | 
					
						
							|  |  |  |     # 종목, 개별 중 한군데만 있는 데이터 삭제(선박펀드, 광물펀드, 해외종목 등) | 
					
						
							|  |  |  |     diff = list(set(krx_sector['종목명']).symmetric_difference(set(krx_ind['종목명']))) | 
					
						
							| 
									
										
										
										
											2025-01-30 22:45:04 +09:00
										 |  |  |     kor_ticker = pd.merge(krx_sector, krx_ind, on=krx_sector.columns.intersection(krx_ind.columns).tolist(), how='outer') | 
					
						
							| 
									
										
										
										
											2025-01-27 23:57:05 +09:00
										 |  |  |     # 일반적인 종목과 SPAC, 우선주, 리츠, 기타 주식을 구분 | 
					
						
							| 
									
										
										
										
											2025-01-30 22:45:04 +09:00
										 |  |  |     kor_ticker['종목구분'] = np.where(kor_ticker['종목명'].str.contains('스팩|제[0-9]+호'), '스팩', | 
					
						
							|  |  |  |             np.where(kor_ticker['종목코드'].str[-1:] != '0', '우선주', | 
					
						
							|  |  |  |                      np.where(kor_ticker['종목명'].str.endswith('리츠'), '리츠', | 
					
						
							|  |  |  |                               np.where(kor_ticker['종목명'].isin(diff), '기타', | 
					
						
							| 
									
										
										
										
											2025-01-27 23:57:05 +09:00
										 |  |  |                                        '보통주' | 
					
						
							|  |  |  |             )))) | 
					
						
							| 
									
										
										
										
											2025-01-30 22:45:04 +09:00
										 |  |  |     kor_ticker = kor_ticker.reset_index(drop=True) | 
					
						
							|  |  |  |     kor_ticker.columns = kor_ticker.columns.str.replace(' ', '') | 
					
						
							|  |  |  |     kor_ticker = kor_ticker[['종목코드', '종목명', '시장구분', '종가', | 
					
						
							| 
									
										
										
										
											2025-01-27 23:57:05 +09:00
										 |  |  |                              '시가총액', '기준일', 'EPS', '선행EPS', 'BPS', '주당배당금', '종목구분']] | 
					
						
							| 
									
										
										
										
											2025-01-30 22:45:04 +09:00
										 |  |  |     kor_ticker = kor_ticker.replace({np.nan: None}) | 
					
						
							|  |  |  |     kor_ticker['기준일'] = pd.to_datetime(kor_ticker['기준일']) | 
					
						
							|  |  |  |     return kor_ticker | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def save_db(ticker): | 
					
						
							|  |  |  |     con = pymysql.connect(user=os.getenv('DB_USER'), | 
					
						
							|  |  |  |                           passwd=os.getenv('DB_PW'), | 
					
						
							|  |  |  |                           host=os.getenv('DB_HOST'), | 
					
						
							|  |  |  |                           port=int(os.getenv('DB_PORT')), | 
					
						
							|  |  |  |                           db=os.getenv('DB_DB'), | 
					
						
							|  |  |  |                           charset='utf8') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     mycursor = con.cursor() | 
					
						
							|  |  |  |     query = f"""
 | 
					
						
							|  |  |  |         insert into kor_ticker (종목코드,종목명,시장구분,종가,시가총액,기준일,EPS,선행EPS,BPS,주당배당금,종목구분) | 
					
						
							|  |  |  |         values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) as new | 
					
						
							|  |  |  |         on duplicate key update | 
					
						
							|  |  |  |         종목명=new.종목명,시장구분=new.시장구분,종가=new.종가,시가총액=new.시가총액,EPS=new.EPS,선행EPS=new.선행EPS, | 
					
						
							|  |  |  |         BPS=new.BPS,주당배당금=new.주당배당금,종목구분 = new.종목구분; | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     args = ticker.values.tolist() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     mycursor.executemany(query, args) | 
					
						
							|  |  |  |     con.commit() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     con.close() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-01-27 23:57:05 +09:00
										 |  |  | 
 | 
					
						
							|  |  |  | if __name__ == '__main__': | 
					
						
							|  |  |  |     latest_biz_day = get_latest_biz_day() | 
					
						
							|  |  |  |     data = get_total_stock_data(latest_biz_day) | 
					
						
							| 
									
										
										
										
											2025-01-30 22:45:04 +09:00
										 |  |  |     save_db(data) |