193 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			193 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import re
 | |
| import time
 | |
| from io import BytesIO
 | |
| 
 | |
| import numpy as np
 | |
| import pandas as pd
 | |
| import requests as rq
 | |
| from tqdm import tqdm
 | |
| from bs4 import BeautifulSoup
 | |
| from dotenv import load_dotenv
 | |
| import quantcommon
 | |
| 
 | |
| load_dotenv()
 | |
| 
 | |
| GEN_OTP_URL = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd'
 | |
| DOWN_URL = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd'
 | |
| 
 | |
| # 최근 영업일을 가져옴
 | |
| def get_latest_biz_day():
 | |
|     url = 'https://finance.naver.com/sise/sise_deposit.nhn'
 | |
|     data = rq.post(url)
 | |
|     data_html = BeautifulSoup(data.content, 'lxml')
 | |
|     parse_day = data_html.select_one('div.subtop_sise_graph2 > ul.subtop_chart_note > li > span.tah').text
 | |
|     biz_day = re.findall('[0-9]+', parse_day)
 | |
|     biz_day = ''.join(biz_day)
 | |
|     return biz_day
 | |
| 
 | |
| 
 | |
| # 업종 분류 현황 가져옴
 | |
| def get_stock_data(biz_day, mkt_id):
 | |
|     # logging.basicConfig(level=logging.DEBUG)
 | |
|     gen_otp_data = {
 | |
|         'locale': 'ko_KR',
 | |
|         'mktId': mkt_id, # STK: 코스피, KSQ: 코스닥
 | |
|         'trdDd': biz_day,
 | |
|         'money': '1',
 | |
|         'csvxls_isNo': 'false',
 | |
|         'name': 'fileDown',
 | |
|         'url': 'dbms/MDC/STAT/standard/MDCSTAT03901'
 | |
|     }
 | |
|     headers = {
 | |
|         'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201',
 | |
|         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
 | |
|     }
 | |
| 
 | |
|     otp = rq.post(url=GEN_OTP_URL, data=gen_otp_data, headers=headers, verify=False)
 | |
|     # # 요청 디버깅
 | |
|     # print("===== Request Details =====")
 | |
|     # print(f"Method: {otp.request.method}")
 | |
|     # print(f"URL: {otp.request.url}")
 | |
|     # print(f"Headers: {otp.request.headers}")
 | |
|     # print(f"Body: {otp.request.body}")
 | |
|     #
 | |
|     # # 응답 디버깅
 | |
|     # print("===== Response Details =====")
 | |
|     # print(f"Status Code: {otp.status_code}")
 | |
|     # print(f"Headers: {otp.headers}")
 | |
|     # print(f"Body: {otp.text}")
 | |
| 
 | |
|     down_sector = rq.post(url=DOWN_URL, data={'code': otp.text}, headers=headers)
 | |
|     return pd.read_csv(BytesIO(down_sector.content), encoding='EUC-KR')
 | |
| 
 | |
| 
 | |
| # 개별 지표 조회
 | |
| def get_ind_stock_data(biz_day):
 | |
|     gen_otp_data = {
 | |
|         'locale': 'ko_KR',
 | |
|         'searchType': '1',
 | |
|         'mktId': 'ALL',
 | |
|         'trdDd': biz_day,
 | |
|         'csvxls_isNo': 'false',
 | |
|         'name': 'fileDown',
 | |
|         'url': 'dbms/MDC/STAT/standard/MDCSTAT03501'
 | |
|     }
 | |
|     headers = {
 | |
|         'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201',
 | |
|         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
 | |
|     }
 | |
| 
 | |
|     otp = rq.post(url=GEN_OTP_URL, data=gen_otp_data, headers=headers, verify=False)
 | |
| 
 | |
|     down_ind_sector = rq.post(url=DOWN_URL, data={'code': otp.text}, headers=headers)
 | |
|     return pd.read_csv(BytesIO(down_ind_sector.content), encoding='EUC-KR')
 | |
| 
 | |
| 
 | |
| def process_for_total_stock(biz_day):
 | |
|     # 업종 분류 현황(코스피, 코스닥)
 | |
|     sector_stk = get_stock_data(biz_day, 'STK')
 | |
|     sector_ksq = get_stock_data(biz_day, 'KSQ')
 | |
|     # 각각 조회 후 합침
 | |
|     krx_sector = pd.concat([sector_stk, sector_ksq]).reset_index(drop=True)
 | |
|     krx_sector['종목명'] = krx_sector['종목명'].str.strip()
 | |
|     krx_sector['기준일'] = biz_day
 | |
| 
 | |
|     # 개별 지표 조회
 | |
|     krx_ind = get_ind_stock_data(biz_day)
 | |
|     krx_ind['종목명'] = krx_ind['종목명'].str.strip()
 | |
|     krx_ind['기준일'] = biz_day
 | |
| 
 | |
|     # 데이터 정리
 | |
|     # 종목, 개별 중 한군데만 있는 데이터 삭제(선박펀드, 광물펀드, 해외종목 등)
 | |
|     diff = list(set(krx_sector['종목명']).symmetric_difference(set(krx_ind['종목명'])))
 | |
| 
 | |
|     kor_ticker = pd.merge(krx_sector,
 | |
|                           krx_ind,
 | |
|                           on=krx_sector.columns.intersection(
 | |
|                               krx_ind.columns).tolist(),
 | |
|                           how='outer')
 | |
|     # 일반적인 종목과 SPAC, 우선주, 리츠, 기타 주식을 구분
 | |
|     kor_ticker['종목구분'] = np.where(kor_ticker['종목명'].str.contains('스팩|제[0-9]+호'), '스팩',
 | |
|                               np.where(kor_ticker['종목코드'].str[-1:] != '0', '우선주',
 | |
|                                        np.where(kor_ticker['종목명'].str.endswith('리츠'), '리츠',
 | |
|                                                 np.where(kor_ticker['종목명'].isin(diff),  '기타',
 | |
|                                                 '보통주'))))
 | |
|     kor_ticker = kor_ticker.reset_index(drop=True)
 | |
|     kor_ticker.columns = kor_ticker.columns.str.replace(' ', '')
 | |
|     kor_ticker = kor_ticker[['종목코드', '종목명', '시장구분', '종가',
 | |
|                              '시가총액', '기준일', 'EPS', '선행EPS', 'BPS', '주당배당금', '종목구분']]
 | |
|     kor_ticker = kor_ticker.replace({np.nan: None})
 | |
|     kor_ticker['기준일'] = pd.to_datetime(kor_ticker['기준일'])
 | |
| 
 | |
|     save_ticker(kor_ticker)
 | |
| 
 | |
| 
 | |
| def save_ticker(ticker):
 | |
|     con = quantcommon.QuantCommon().connect()
 | |
| 
 | |
|     mycursor = con.cursor()
 | |
|     query = f"""
 | |
|         insert into kor_ticker (종목코드,종목명,시장구분,종가,시가총액,기준일,EPS,선행EPS,BPS,주당배당금,종목구분)
 | |
|         values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) as new
 | |
|         on duplicate key update
 | |
|         종목명=new.종목명,시장구분=new.시장구분,종가=new.종가,시가총액=new.시가총액,EPS=new.EPS,선행EPS=new.선행EPS,
 | |
|         BPS=new.BPS,주당배당금=new.주당배당금,종목구분 = new.종목구분;
 | |
|     """
 | |
| 
 | |
|     args = ticker.values.tolist()
 | |
| 
 | |
|     mycursor.executemany(query, args)
 | |
|     con.commit()
 | |
| 
 | |
|     con.close()
 | |
| 
 | |
| 
 | |
| # WICS 기준 섹터정보 크롤링
 | |
| def process_for_wics(biz_day):
 | |
|     sector_code = [
 | |
|         'G25', 'G35', 'G50', 'G40', 'G10', 'G20', 'G55', 'G30', 'G15', 'G45'
 | |
|     ]
 | |
| 
 | |
|     data_sector = []
 | |
| 
 | |
|     # 모든 섹터에 대한 데이터 받아서 가공
 | |
|     for i in tqdm(sector_code):
 | |
|         url = f'''http://www.wiseindex.com/Index/GetIndexComponets?ceil_yn=0&dt={biz_day}&sec_cd={i}'''
 | |
|         data = rq.get(url).json()
 | |
|         data_pd = pd.json_normalize(data['list'])
 | |
| 
 | |
|         data_sector.append(data_pd)
 | |
| 
 | |
|         time.sleep(2)
 | |
| 
 | |
|     kor_sector = pd.concat(data_sector, axis=0)
 | |
|     kor_sector = kor_sector[['IDX_CD', 'CMP_CD', 'CMP_KOR', 'SEC_NM_KOR']]
 | |
|     kor_sector['기준일'] = biz_day
 | |
|     kor_sector['기준일'] = pd.to_datetime(kor_sector['기준일'])
 | |
|     save_sector(kor_sector)
 | |
| 
 | |
| 
 | |
| def save_sector(sector):
 | |
|     con = quantcommon.QuantCommon().connect()
 | |
| 
 | |
|     mycursor = con.cursor()
 | |
|     query = f"""
 | |
|         insert into kor_sector (IDX_CD, CMP_CD, CMP_KOR, SEC_NM_KOR, 기준일)
 | |
|         values (%s,%s,%s,%s,%s) as new
 | |
|         on duplicate key update
 | |
|         IDX_CD = new.IDX_CD, CMP_KOR = new.CMP_KOR, SEC_NM_KOR = new.SEC_NM_KOR
 | |
|     """
 | |
| 
 | |
|     args = sector.values.tolist()
 | |
| 
 | |
|     mycursor.executemany(query, args)
 | |
|     con.commit()
 | |
| 
 | |
|     con.close()
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     latest_biz_day = get_latest_biz_day()
 | |
|     process_for_total_stock(latest_biz_day)
 | |
|     process_for_wics(latest_biz_day)
 |