diff --git a/example/10-stock-data.py b/example/10-stock-data.py index 4acc0cf..27c114a 100644 --- a/example/10-stock-data.py +++ b/example/10-stock-data.py @@ -1,11 +1,13 @@ import logging import os import re +import time from io import BytesIO import numpy as np import pandas as pd import requests as rq +from tqdm import tqdm from bs4 import BeautifulSoup import pymysql from dotenv import load_dotenv @@ -83,7 +85,7 @@ def get_ind_stock_data(biz_day): return pd.read_csv(BytesIO(down_ind_sector.content), encoding='EUC-KR') -def get_total_stock_data(biz_day): +def process_for_total_stock(biz_day): # 업종 분류 현황(코스피, 코스닥) sector_stk = get_stock_data(biz_day, 'stk') sector_ksq = get_stock_data(biz_day, 'ksq') @@ -114,10 +116,11 @@ def get_total_stock_data(biz_day): '시가총액', '기준일', 'EPS', '선행EPS', 'BPS', '주당배당금', '종목구분']] kor_ticker = kor_ticker.replace({np.nan: None}) kor_ticker['기준일'] = pd.to_datetime(kor_ticker['기준일']) - return kor_ticker + + save_ticker(kor_ticker) -def save_db(ticker): +def save_ticker(ticker): con = pymysql.connect(user=os.getenv('DB_USER'), passwd=os.getenv('DB_PW'), host=os.getenv('DB_HOST'), @@ -142,7 +145,56 @@ def save_db(ticker): con.close() +# WICS 기준 섹터정보 크롤링 +def process_for_wics(biz_day): + sector_code = [ + 'G25', 'G35', 'G50', 'G40', 'G10', 'G20', 'G55', 'G30', 'G15', 'G45' + ] + + data_sector = [] + + # 모든 섹터에 대한 데이터 받아서 가공 + for i in tqdm(sector_code): + url = f'''http://www.wiseindex.com/Index/GetIndexComponets?ceil_yn=0&dt={biz_day}&sec_cd={i}''' + data = rq.get(url).json() + data_pd = pd.json_normalize(data['list']) + + data_sector.append(data_pd) + + time.sleep(2) + + kor_sector = pd.concat(data_sector, axis=0) + kor_sector = kor_sector[['IDX_CD', 'CMP_CD', 'CMP_KOR', 'SEC_NM_KOR']] + kor_sector['기준일'] = biz_day + kor_sector['기준일'] = pd.to_datetime(kor_sector['기준일']) + save_sector(kor_sector) + + +def save_sector(sector): + con = pymysql.connect(user=os.getenv('DB_USER'), + passwd=os.getenv('DB_PW'), + host=os.getenv('DB_HOST'), + port=int(os.getenv('DB_PORT')), + db=os.getenv('DB_DB'), + charset='utf8') + + mycursor = con.cursor() + query = f""" + insert into kor_sector (IDX_CD, CMP_CD, CMP_KOR, SEC_NM_KOR, 기준일) + values (%s,%s,%s,%s,%s) as new + on duplicate key update + IDX_CD = new.IDX_CD, CMP_KOR = new.CMP_KOR, SEC_NM_KOR = new.SEC_NM_KOR + """ + + args = sector.values.tolist() + + mycursor.executemany(query, args) + con.commit() + + con.close() + + if __name__ == '__main__': latest_biz_day = get_latest_biz_day() - data = get_total_stock_data(latest_biz_day) - save_db(data) + process_for_total_stock(latest_biz_day) + process_for_wics(latest_biz_day)