From 18e085fd3c677d889aa9f6fcb856c49b7d3d1b6f Mon Sep 17 00:00:00 2001 From: ayuriel Date: Fri, 31 Jan 2025 17:19:55 +0900 Subject: [PATCH] =?UTF-8?q?chore:=20=ED=85=8C=EC=8A=A4=ED=8A=B8=EC=9A=A9?= =?UTF-8?q?=20=EC=A0=9C=EB=AC=B4=EC=9E=AC=ED=91=9C=201=EA=B0=9C=20?= =?UTF-8?q?=EC=B2=98=EB=A6=AC=20=EC=B6=94=EA=B0=80,=20=EC=A0=9C=EB=AC=B4?= =?UTF-8?q?=EC=9E=AC=ED=91=9C=20=EC=88=98=EC=A7=91=20=EC=98=A4=EB=A5=98=20?= =?UTF-8?q?=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- example/10-financial-statements-one.py | 114 +++++++++++++++++++++++++ example/10-financial-statements.py | 6 +- 2 files changed, 117 insertions(+), 3 deletions(-) create mode 100644 example/10-financial-statements-one.py diff --git a/example/10-financial-statements-one.py b/example/10-financial-statements-one.py new file mode 100644 index 0000000..ded0d36 --- /dev/null +++ b/example/10-financial-statements-one.py @@ -0,0 +1,114 @@ +import re +import time + +import pandas as pd +import requests as rq +from bs4 import BeautifulSoup +from tqdm import tqdm + +import quantcommon + +# DB 연결 +common = quantcommon.QuantCommon() +engine = common.create_engine() +con = common.connect() + +mycursor = con.cursor() + +# 재무제표 크롤링 +# 티커리스트 불러오기 +ticker_list = pd.read_sql(""" +select * from kor_ticker +where 기준일 = (select max(기준일) from kor_ticker) + and 종목구분 = '보통주'; +""", con=engine) + +# DB 저장 쿼리 +query = """ + insert into kor_fs (계정, 기준일, 값, 종목코드, 공시구분) + values (%s,%s,%s,%s,%s) as new + on duplicate key update + 값=new.값 +""" + +# 오류 발생시 저장할 리스트 생성 +error_list = [] + + +# 재무제표 클렌징 함수 +def clean_fs(df, ticker, frequency): + + df = df[~df.loc[:, ~df.columns.isin(['계정'])].isna().all(axis=1)] + df = df.drop_duplicates(['계정'], keep='first') + df = pd.melt(df, id_vars='계정', var_name='기준일', value_name='값') + df = df[~pd.isnull(df['값'])] + df['계정'] = df['계정'].replace({'계산에 참여한 계정 펼치기': ''}, regex=True) + print(df) + df['기준일'] = pd.to_datetime(df['기준일'], + format='%Y/%m') + pd.tseries.offsets.MonthEnd() + df['종목코드'] = ticker + df['공시구분'] = frequency + + return df + + +i = 0 +ticker = ticker_list['종목코드'][i] + +try: + # url 생성 + url = f'https://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp?pGB=1&gicode=A{ticker}' + + # 데이터 받아오기 + data = pd.read_html(url, displayed_only=False) + + # print([item.head(3) for item in data]) + # 연간 데이터 + data_fs_y = pd.concat([ + data[0].iloc[:, ~data[0].columns.str.contains('전년동기')], data[2], + data[4] + ]) + data_fs_y = data_fs_y.rename(columns={data_fs_y.columns[0]: "계정"}) + + # 결산년 찾기 + page_data = rq.get(url) + page_data_html = BeautifulSoup(page_data.content, 'html.parser') + + fiscal_data = page_data_html.select('div.corp_group1 > h2') + fiscal_data_text = fiscal_data[1].text + fiscal_data_text = re.findall('[0-9]+', fiscal_data_text) + + # 결산년에 해당하는 계정만 남기기 + data_fs_y = data_fs_y.loc[:, (data_fs_y.columns == '계정') | ( + data_fs_y.columns.str[-2:].isin(fiscal_data_text))] + + # 클렌징 + data_fs_y_clean = clean_fs(data_fs_y, ticker, 'y') + + # 분기 데이터 + data_fs_q = pd.concat([ + data[1].iloc[:, ~data[1].columns.str.contains('전년동기')], data[3], + data[5] + ]) + data_fs_q = data_fs_q.rename(columns={data_fs_q.columns[0]: "계정"}) + + data_fs_q_clean = clean_fs(data_fs_q, ticker, 'q') + + # 두개 합치기 + data_fs_bind = pd.concat([data_fs_y_clean, data_fs_q_clean]) + print(data_fs_bind.head(3)) + + # 재무제표 데이터를 DB에 저장 + args = data_fs_bind.values.tolist() + mycursor.executemany(query, args) + con.commit() + +except Exception as e: + + # 오류 발생시 해당 종목명을 저장하고 다음 루프로 이동 + print(e) + error_list.append(ticker) + +# DB 연결 종료 +engine.dispose() +con.close() \ No newline at end of file diff --git a/example/10-financial-statements.py b/example/10-financial-statements.py index 2eded4c..5b185e0 100644 --- a/example/10-financial-statements.py +++ b/example/10-financial-statements.py @@ -15,7 +15,7 @@ con = common.connect() mycursor = con.cursor() -# 제무재표 크롤링 +# 재무제표 크롤링 # 티커리스트 불러오기 ticker_list = pd.read_sql(""" select * from kor_ticker @@ -44,7 +44,7 @@ def clean_fs(df, ticker, frequency): df = df[~pd.isnull(df['값'])] df['계정'] = df['계정'].replace({'계산에 참여한 계정 펼치기': ''}, regex=True) df['기준일'] = pd.to_datetime(df['기준일'], - format='%Y-%m') + pd.tseries.offsets.MonthEnd() + format='%Y/%m') + pd.tseries.offsets.MonthEnd() df['종목코드'] = ticker df['공시구분'] = frequency @@ -61,7 +61,7 @@ for i in tqdm(range(0, len(ticker_list))): try: # url 생성 - url = f'http://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp?pGB=1&gicode=A{ticker}' + url = f'https://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp?pGB=1&gicode=A{ticker}' # 데이터 받아오기 data = pd.read_html(url, displayed_only=False)