Compare commits

..

No commits in common. "4f25956fe0f626a06d8102942420a8ca8cd9d645" and "aa5807690be358e0a767038404cc431f1055943c" have entirely different histories.

7 changed files with 85 additions and 204 deletions

View File

@ -1,14 +1,29 @@
# 패키지 불러오기 # 패키지 불러오기
import numpy as np import os
import pandas as pd from urllib.parse import quote_plus
import quantcommon import pymysql
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
from dotenv import load_dotenv
load_dotenv()
user = os.getenv('DB_USER')
pw = os.getenv('DB_PW')
engine_for_pw = quote_plus(pw)
host = os.getenv('DB_HOST')
port = int(os.getenv('DB_PORT'))
db = os.getenv('DB_DB')
# DB 연결 # DB 연결
common = quantcommon.QuantCommon() engine = create_engine(f'mysql+pymysql://{user}:{engine_for_pw}@{host}:{port}/{db}')
engine = common.create_engine() con = pymysql.connect(user=user,
con = common.connect() passwd=pw,
host=host,
port=port,
db=db,
charset='utf8')
mycursor = con.cursor() mycursor = con.cursor()
# 가치 지표 계산 # 가치 지표 계산
# 분기 재무제표 불러오기 # 분기 재무제표 불러오기

View File

@ -1,114 +0,0 @@
import re
import time
import pandas as pd
import requests as rq
from bs4 import BeautifulSoup
from tqdm import tqdm
import quantcommon
# DB 연결
common = quantcommon.QuantCommon()
engine = common.create_engine()
con = common.connect()
mycursor = con.cursor()
# 재무제표 크롤링
# 티커리스트 불러오기
ticker_list = pd.read_sql("""
select * from kor_ticker
where 기준일 = (select max(기준일) from kor_ticker)
and 종목구분 = '보통주';
""", con=engine)
# DB 저장 쿼리
query = """
insert into kor_fs (계정, 기준일, , 종목코드, 공시구분)
values (%s,%s,%s,%s,%s) as new
on duplicate key update
=new.
"""
# 오류 발생시 저장할 리스트 생성
error_list = []
# 재무제표 클렌징 함수
def clean_fs(df, ticker, frequency):
df = df[~df.loc[:, ~df.columns.isin(['계정'])].isna().all(axis=1)]
df = df.drop_duplicates(['계정'], keep='first')
df = pd.melt(df, id_vars='계정', var_name='기준일', value_name='')
df = df[~pd.isnull(df[''])]
df['계정'] = df['계정'].replace({'계산에 참여한 계정 펼치기': ''}, regex=True)
print(df)
df['기준일'] = pd.to_datetime(df['기준일'],
format='%Y/%m') + pd.tseries.offsets.MonthEnd()
df['종목코드'] = ticker
df['공시구분'] = frequency
return df
i = 0
ticker = ticker_list['종목코드'][i]
try:
# url 생성
url = f'https://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp?pGB=1&gicode=A{ticker}'
# 데이터 받아오기
data = pd.read_html(url, displayed_only=False)
# print([item.head(3) for item in data])
# 연간 데이터
data_fs_y = pd.concat([
data[0].iloc[:, ~data[0].columns.str.contains('전년동기')], data[2],
data[4]
])
data_fs_y = data_fs_y.rename(columns={data_fs_y.columns[0]: "계정"})
# 결산년 찾기
page_data = rq.get(url)
page_data_html = BeautifulSoup(page_data.content, 'html.parser')
fiscal_data = page_data_html.select('div.corp_group1 > h2')
fiscal_data_text = fiscal_data[1].text
fiscal_data_text = re.findall('[0-9]+', fiscal_data_text)
# 결산년에 해당하는 계정만 남기기
data_fs_y = data_fs_y.loc[:, (data_fs_y.columns == '계정') | (
data_fs_y.columns.str[-2:].isin(fiscal_data_text))]
# 클렌징
data_fs_y_clean = clean_fs(data_fs_y, ticker, 'y')
# 분기 데이터
data_fs_q = pd.concat([
data[1].iloc[:, ~data[1].columns.str.contains('전년동기')], data[3],
data[5]
])
data_fs_q = data_fs_q.rename(columns={data_fs_q.columns[0]: "계정"})
data_fs_q_clean = clean_fs(data_fs_q, ticker, 'q')
# 두개 합치기
data_fs_bind = pd.concat([data_fs_y_clean, data_fs_q_clean])
print(data_fs_bind.head(3))
# 재무제표 데이터를 DB에 저장
args = data_fs_bind.values.tolist()
mycursor.executemany(query, args)
con.commit()
except Exception as e:
# 오류 발생시 해당 종목명을 저장하고 다음 루프로 이동
print(e)
error_list.append(ticker)
# DB 연결 종료
engine.dispose()
con.close()

View File

@ -1,21 +1,35 @@
import re import os
import time from urllib.parse import quote_plus
import pymysql
from sqlalchemy import create_engine
import pandas as pd import pandas as pd
import requests as rq import requests as rq
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import re
from tqdm import tqdm from tqdm import tqdm
import time
from dotenv import load_dotenv
import quantcommon load_dotenv()
user = os.getenv('DB_USER')
pw = os.getenv('DB_PW')
engine_for_pw = quote_plus(pw)
host = os.getenv('DB_HOST')
port = int(os.getenv('DB_PORT'))
db = os.getenv('DB_DB')
# DB 연결 # DB 연결
common = quantcommon.QuantCommon() engine = create_engine(f'mysql+pymysql://{user}:{engine_for_pw}@{host}:{port}/{db}')
engine = common.create_engine() con = pymysql.connect(user=user,
con = common.connect() passwd=pw,
host=host,
port=port,
db=db,
charset='utf8')
mycursor = con.cursor() mycursor = con.cursor()
# 재무제표 크롤링 # 무재표 크롤링
# 티커리스트 불러오기 # 티커리스트 불러오기
ticker_list = pd.read_sql(""" ticker_list = pd.read_sql("""
select * from kor_ticker select * from kor_ticker
@ -44,7 +58,7 @@ def clean_fs(df, ticker, frequency):
df = df[~pd.isnull(df[''])] df = df[~pd.isnull(df[''])]
df['계정'] = df['계정'].replace({'계산에 참여한 계정 펼치기': ''}, regex=True) df['계정'] = df['계정'].replace({'계산에 참여한 계정 펼치기': ''}, regex=True)
df['기준일'] = pd.to_datetime(df['기준일'], df['기준일'] = pd.to_datetime(df['기준일'],
format='%Y/%m') + pd.tseries.offsets.MonthEnd() format='%Y-%m') + pd.tseries.offsets.MonthEnd()
df['종목코드'] = ticker df['종목코드'] = ticker
df['공시구분'] = frequency df['공시구분'] = frequency
@ -61,7 +75,7 @@ for i in tqdm(range(0, len(ticker_list))):
try: try:
# url 생성 # url 생성
url = f'https://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp?pGB=1&gicode=A{ticker}' url = f'http://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp?pGB=1&gicode=A{ticker}'
# 데이터 받아오기 # 데이터 받아오기
data = pd.read_html(url, displayed_only=False) data = pd.read_html(url, displayed_only=False)

View File

@ -1,3 +1,5 @@
import logging
import os
import re import re
import time import time
from io import BytesIO from io import BytesIO
@ -7,8 +9,8 @@ import pandas as pd
import requests as rq import requests as rq
from tqdm import tqdm from tqdm import tqdm
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import pymysql
from dotenv import load_dotenv from dotenv import load_dotenv
import quantcommon
load_dotenv() load_dotenv()
@ -123,7 +125,12 @@ def process_for_total_stock(biz_day):
def save_ticker(ticker): def save_ticker(ticker):
con = quantcommon.QuantCommon().connect() con = pymysql.connect(user=os.getenv('DB_USER'),
passwd=os.getenv('DB_PW'),
host=os.getenv('DB_HOST'),
port=int(os.getenv('DB_PORT')),
db=os.getenv('DB_DB'),
charset='utf8')
mycursor = con.cursor() mycursor = con.cursor()
query = f""" query = f"""
@ -168,7 +175,12 @@ def process_for_wics(biz_day):
def save_sector(sector): def save_sector(sector):
con = quantcommon.QuantCommon().connect() con = pymysql.connect(user=os.getenv('DB_USER'),
passwd=os.getenv('DB_PW'),
host=os.getenv('DB_HOST'),
port=int(os.getenv('DB_PORT')),
db=os.getenv('DB_DB'),
charset='utf8')
mycursor = con.cursor() mycursor = con.cursor()
query = f""" query = f"""
@ -189,4 +201,4 @@ def save_sector(sector):
if __name__ == '__main__': if __name__ == '__main__':
latest_biz_day = get_latest_biz_day() latest_biz_day = get_latest_biz_day()
process_for_total_stock(latest_biz_day) process_for_total_stock(latest_biz_day)
process_for_wics(latest_biz_day) # process_for_wics(latest_biz_day)

View File

@ -1,21 +1,34 @@
# 패키지 불러오기 # 패키지 불러오기
import os
from urllib.parse import quote_plus
import time import pymysql
from datetime import date from sqlalchemy import create_engine
from io import BytesIO
import pandas as pd import pandas as pd
import requests as rq from datetime import date
from dateutil.relativedelta import relativedelta from dateutil.relativedelta import relativedelta
import requests as rq
import time
from tqdm import tqdm from tqdm import tqdm
from io import BytesIO
from dotenv import load_dotenv
import quantcommon load_dotenv()
user = os.getenv('DB_USER')
pw = os.getenv('DB_PW')
engine_for_pw = quote_plus(pw)
host = os.getenv('DB_HOST')
port = int(os.getenv('DB_PORT'))
db = os.getenv('DB_DB')
# DB 연결 # DB 연결
common = quantcommon.QuantCommon() engine = create_engine(f'mysql+pymysql://{user}:{engine_for_pw}@{host}:{port}/{db}')
engine = common.create_engine() con = pymysql.connect(user=user,
con = common.connect() passwd=pw,
host=host,
port=port,
db=db,
charset='utf8')
mycursor = con.cursor() mycursor = con.cursor()
# 티커리스트 불러오기 # 티커리스트 불러오기

View File

@ -1,31 +0,0 @@
import pandas as pd
import numpy as np
import quantcommon
#가치주 포트폴리오. PER, PBR이 낮은 회사 20개
# DB 연결
engine = quantcommon.QuantCommon().create_engine()
ticker_list = pd.read_sql("""
select * from kor_ticker
where 기준일 = (select max(기준일) from kor_ticker)
and 종목구분 = '보통주';
""", con=engine)
value_list = pd.read_sql("""
select * from kor_value
where 기준일 = (select max(기준일) from kor_value);
""", con=engine)
engine.dispose()
value_list.loc[value_list[''] <= 0, ''] = np.nan
value_pivot = value_list.pivot(index='종목코드', columns='지표', values='')
data_bind = ticker_list[['종목코드', '종목명']].merge(value_pivot,
how='left',
on='종목코드')
# print(data_bind.head())
value_rank = data_bind[['PER', 'PBR']].rank(axis = 0)
value_sum = value_rank.sum(axis = 1, skipna = False).rank()
print(data_bind.loc[value_sum <= 20, ['종목코드', '종목명', 'PER', 'PBR']])

View File

@ -1,28 +0,0 @@
import os
from urllib.parse import quote_plus
import pymysql
from dotenv import load_dotenv
from sqlalchemy import create_engine
class QuantCommon:
def __init__(self):
load_dotenv()
self.user = os.getenv('DB_USER')
self.pw = os.getenv('DB_PW')
self.engine_for_pw = quote_plus(self.pw)
self.host = os.getenv('DB_HOST')
self.port = int(os.getenv('DB_PORT'))
self.db = os.getenv('DB_DB')
def create_engine(self):
return create_engine(f'mysql+pymysql://{self.user}:{self.engine_for_pw}@{self.host}:{self.port}/{self.db}')
def connect(self):
return pymysql.connect(user=self.user,
passwd=self.pw,
host=self.host,
port=self.port,
db=self.db,
charset='utf8')