diff --git a/example/10-financial-statements.py b/example/10-financial-statements.py index 5b185e0..170aedb 100644 --- a/example/10-financial-statements.py +++ b/example/10-financial-statements.py @@ -8,6 +8,7 @@ from tqdm import tqdm import quantcommon +# src/current-financial-statement.py 로 개선 # DB 연결 common = quantcommon.QuantCommon() engine = common.create_engine() diff --git a/example/10-stock-data.py b/example/10-stock-data.py index 80d9529..398897c 100644 --- a/example/10-stock-data.py +++ b/example/10-stock-data.py @@ -10,6 +10,7 @@ from bs4 import BeautifulSoup from dotenv import load_dotenv import quantcommon +# src/current-stock.py 로 개선 load_dotenv() GEN_OTP_URL = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd' diff --git a/example/10-stock-price.py b/example/10-stock-price.py index c5c4d03..050cb74 100644 --- a/example/10-stock-price.py +++ b/example/10-stock-price.py @@ -11,6 +11,8 @@ from tqdm import tqdm import quantcommon +# src/current-price.py 로 개선 + # DB 연결 common = quantcommon.QuantCommon() engine = common.create_engine() diff --git a/streamlit-quant/src/current-financial-statements.py b/streamlit-quant/src/current-financial-statements.py new file mode 100644 index 0000000..004eb8a --- /dev/null +++ b/streamlit-quant/src/current-financial-statements.py @@ -0,0 +1,130 @@ +import re +import time + +import pandas as pd +import requests as rq +from bs4 import BeautifulSoup +from tqdm import tqdm + +import quantcommon + +# 재무제표 크롤링 + +def get_ticker_list(): + engine = quantcommon.QuantCommon().create_engine() + # 티커리스트 불러오기 + ticker_list = {} + try: + ticker_list = pd.read_sql(""" + select * from kor_ticker + where 기준일 = (select max(기준일) from kor_ticker) + and 종목구분 = '보통주'; + """, con=engine) + finally: + engine.dispose() + + return ticker_list + + +# 재무제표 클렌징 함수 +def clean_fs(df, ticker, frequency): + df = df[~df.loc[:, ~df.columns.isin(['계정'])].isna().all(axis=1)] + df = df.drop_duplicates(['계정'], keep='first') + df = pd.melt(df, id_vars='계정', var_name='기준일', value_name='값') + df = df[~pd.isnull(df['값'])] + df['계정'] = df['계정'].replace({'계산에 참여한 계정 펼치기': ''}, regex=True) + df['기준일'] = pd.to_datetime(df['기준일'], + format='%Y/%m') + pd.tseries.offsets.MonthEnd() + df['종목코드'] = ticker + df['공시구분'] = frequency + + return df + + +# ticker 별 재무제표 조회해서 DB에 저장 +def process_for_fs(ticker_list): + # DB 연결 + common = quantcommon.QuantCommon() + engine = common.create_engine() + con = common.connect() + mycursor = con.cursor() + + # DB 저장 쿼리 + query = """ + insert into kor_fs (계정, 기준일, 값, 종목코드, 공시구분) + values (%s,%s,%s,%s,%s) as new + on duplicate key update + 값=new.값 + """ + + # 오류 발생시 저장할 리스트 생성 + error_list = [] + + # for loop + for i in tqdm(range(0, len(ticker_list))): + + # 티커 선택 + ticker = ticker_list['종목코드'][i] + + # 오류 발생 시 이를 무시하고 다음 루프로 진행 + try: + # url 생성 + url = f'https://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp?pGB=1&gicode=A{ticker}' + + # 데이터 받아오기 + data = pd.read_html(url, displayed_only=False) + + # 연간 데이터 + data_fs_y = pd.concat([ + data[0].iloc[:, ~data[0].columns.str.contains('전년동기')], data[2], + data[4] + ]) + data_fs_y = data_fs_y.rename(columns={data_fs_y.columns[0]: "계정"}) + + # 결산년 찾기 + page_data = rq.get(url) + page_data_html = BeautifulSoup(page_data.content, 'html.parser') + + fiscal_data = page_data_html.select('div.corp_group1 > h2') + fiscal_data_text = fiscal_data[1].text + fiscal_data_text = re.findall('[0-9]+', fiscal_data_text) + + # 결산년에 해당하는 계정만 남기기 + data_fs_y = data_fs_y.loc[:, (data_fs_y.columns == '계정') | ( + data_fs_y.columns.str[-2:].isin(fiscal_data_text))] + + # 클렌징 + data_fs_y_clean = clean_fs(data_fs_y, ticker, 'y') + + # 분기 데이터 + data_fs_q = pd.concat([ + data[1].iloc[:, ~data[1].columns.str.contains('전년동기')], data[3], + data[5] + ]) + data_fs_q = data_fs_q.rename(columns={data_fs_q.columns[0]: "계정"}) + + data_fs_q_clean = clean_fs(data_fs_q, ticker, 'q') + + # 두개 합치기 + data_fs_bind = pd.concat([data_fs_y_clean, data_fs_q_clean]) + + # 재무제표 데이터를 DB에 저장 + args = data_fs_bind.values.tolist() + mycursor.executemany(query, args) + con.commit() + + except: + # 오류 발생시 해당 종목명을 저장하고 다음 루프로 이동 + print(ticker) + error_list.append(ticker) + + # 타임슬립 적용 + time.sleep(2) + + # DB 연결 종료 + engine.dispose() + con.close() + +if __name__ == '__main__': + tickers = get_ticker_list() + process_for_fs(tickers) \ No newline at end of file diff --git a/streamlit-quant/src/current-price.py b/streamlit-quant/src/current-price.py new file mode 100644 index 0000000..9b3ed8e --- /dev/null +++ b/streamlit-quant/src/current-price.py @@ -0,0 +1,101 @@ +# 패키지 불러오기 + +import time +from datetime import date +from io import BytesIO + +import pandas as pd +import requests as rq +from dateutil.relativedelta import relativedelta +from tqdm import tqdm + +import quantcommon + +# 주가 크롤링 + +def get_ticker_list(): + engine = quantcommon.QuantCommon().create_engine() + # 티커리스트 불러오기 + ticker_list = {} + try: + ticker_list = pd.read_sql(""" + select * from kor_ticker + where 기준일 = (select max(기준일) from kor_ticker) + and 종목구분 = '보통주'; + """, con=engine) + finally: + engine.dispose() + + return ticker_list + + +def process_for_price(ticker_list): + # DB 저장 쿼리 + query = """ + insert into kor_price (날짜, 시가, 고가, 저가, 종가, 거래량, 종목코드) + values (%s,%s,%s,%s,%s,%s,%s) as new + on duplicate key update + 시가 = new.시가, 고가 = new.고가, 저가 = new.저가, + 종가 = new.종가, 거래량 = new.거래량; + """ + + # DB 연결 + common = quantcommon.QuantCommon() + engine = common.create_engine() + con = common.connect() + + mycursor = con.cursor() + # 오류 발생시 저장할 리스트 생성 + error_list = [] + + # 전종목 주가 다운로드 및 저장 + for i in tqdm(range(0, len(ticker_list))): + + # 티커 선택 + ticker = ticker_list['종목코드'][i] + + # 시작일과 종료일 + # fr = (date.today() + relativedelta(years=-5)).strftime("%Y%m%d") + to = (date.today()).strftime("%Y%m%d") + fr = '20250125' + + # 오류 발생 시 이를 무시하고 다음 루프로 진행 + try: + + # url 생성 + url = f'''https://fchart.stock.naver.com/siseJson.nhn?symbol={ticker}&requestType=1 + &startTime={fr}&endTime={to}&timeframe=day''' + + # 데이터 다운로드 + data = rq.get(url).content + data_price = pd.read_csv(BytesIO(data)) + + # 데이터 클렌징 + price = data_price.iloc[:, 0:6] + price.columns = ['날짜', '시가', '고가', '저가', '종가', '거래량'] + price = price.dropna() + price['날짜'] = price['날짜'].str.extract("(\d+)") + price['날짜'] = pd.to_datetime(price['날짜']) + price['종목코드'] = ticker + + # 주가 데이터를 DB에 저장 + args = price.values.tolist() + mycursor.executemany(query, args) + con.commit() + + except: + + # 오류 발생시 error_list에 티커 저장하고 넘어가기 + print(ticker) + error_list.append(ticker) + + # 타임슬립 적용 + time.sleep(2) + + # DB 연결 종료 + engine.dispose() + con.close() + +if __name__ == '__main__': + ticker_list = get_ticker_list() + process_for_price(ticker_list) \ No newline at end of file diff --git a/streamlit-quant/src/current_stock.py b/streamlit-quant/src/current-stock.py similarity index 99% rename from streamlit-quant/src/current_stock.py rename to streamlit-quant/src/current-stock.py index 80d9529..0b06390 100644 --- a/streamlit-quant/src/current_stock.py +++ b/streamlit-quant/src/current-stock.py @@ -187,6 +187,7 @@ def save_sector(sector): if __name__ == '__main__': + # sector와 ticker 갱신 latest_biz_day = get_latest_biz_day() process_for_total_stock(latest_biz_day) process_for_wics(latest_biz_day)