Compare commits
3 Commits
fcb0966a20
...
18016bfdec
| Author | SHA1 | Date | |
|---|---|---|---|
| 18016bfdec | |||
| 50c740501e | |||
| 02c4d58d8e |
119
example/10-stock-data.py
Normal file
119
example/10-stock-data.py
Normal file
@ -0,0 +1,119 @@
|
||||
import logging
|
||||
import re
|
||||
from io import BytesIO
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import requests as rq
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
GEN_OTP_URL = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd'
|
||||
DOWN_URL = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd'
|
||||
|
||||
# 최근 영업일을 가져옴
|
||||
def get_latest_biz_day():
|
||||
url = 'https://finance.naver.com/sise/sise_deposit.nhn'
|
||||
data = rq.post(url)
|
||||
data_html = BeautifulSoup(data.content, 'lxml')
|
||||
parse_day = data_html.select_one('div.subtop_sise_graph2 > ul.subtop_chart_note > li > span.tah').text
|
||||
biz_day = re.findall('[0-9]+', parse_day)
|
||||
biz_day = ''.join(biz_day)
|
||||
return biz_day
|
||||
|
||||
|
||||
# 업종 분류 현황 가져옴
|
||||
def get_stock_data(biz_day, mkt_id):
|
||||
# logging.basicConfig(level=logging.DEBUG)
|
||||
gen_otp_data = {
|
||||
'locale': 'ko_KR',
|
||||
'mktId': mkt_id, # STK: 코스피, KSQ: 코스닥
|
||||
'trdDd': biz_day,
|
||||
'money': '1',
|
||||
'csvxls_isNo': 'false',
|
||||
'name': 'fileDown',
|
||||
'url': 'dbms/MDC/STAT/standard/MDCSTAT03901'
|
||||
}
|
||||
headers = {
|
||||
'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
|
||||
}
|
||||
|
||||
otp = rq.post(url=GEN_OTP_URL, data=gen_otp_data, headers=headers, verify=False)
|
||||
# # 요청 디버깅
|
||||
# print("===== Request Details =====")
|
||||
# print(f"Method: {otp.request.method}")
|
||||
# print(f"URL: {otp.request.url}")
|
||||
# print(f"Headers: {otp.request.headers}")
|
||||
# print(f"Body: {otp.request.body}")
|
||||
#
|
||||
# # 응답 디버깅
|
||||
# print("===== Response Details =====")
|
||||
# print(f"Status Code: {otp.status_code}")
|
||||
# print(f"Headers: {otp.headers}")
|
||||
# print(f"Body: {otp.text}")
|
||||
|
||||
down_sector = rq.post(url=DOWN_URL, data={'code': otp.text}, headers=headers)
|
||||
return pd.read_csv(BytesIO(down_sector.content), encoding='EUC-KR')
|
||||
|
||||
|
||||
# 개별 지표 조회
|
||||
def get_ind_stock_data(biz_day):
|
||||
gen_otp_data = {
|
||||
'locale': 'ko_KR',
|
||||
'searchType': '1',
|
||||
'mktId': 'ALL',
|
||||
'trdDd': biz_day,
|
||||
'csvxls_isNo': 'false',
|
||||
'name': 'fileDown',
|
||||
'url': 'dbms/MDC/STAT/standard/MDCSTAT03501'
|
||||
}
|
||||
headers = {
|
||||
'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201050201',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
|
||||
}
|
||||
|
||||
otp = rq.post(url=GEN_OTP_URL, data=gen_otp_data, headers=headers, verify=False)
|
||||
|
||||
down_ind_sector = rq.post(url=DOWN_URL, data={'code': otp.text}, headers=headers)
|
||||
return pd.read_csv(BytesIO(down_ind_sector.content), encoding='EUC-KR')
|
||||
|
||||
|
||||
def get_total_stock_data(biz_day):
|
||||
# 업종 분류 현황(코스피, 코스닥)
|
||||
sector_stk = get_stock_data(biz_day, 'stk')
|
||||
sector_ksq = get_stock_data(biz_day, 'ksq')
|
||||
# 각각 조회 후 합침
|
||||
krx_sector = pd.concat([sector_stk, sector_ksq]).reset_index(drop=True)
|
||||
krx_sector['종목명'] = krx_sector['종목명'].str.strip()
|
||||
krx_sector['기준일'] = biz_day
|
||||
|
||||
# 개별 지표 조회
|
||||
krx_ind = get_ind_stock_data(biz_day)
|
||||
krx_ind['종목명'] = krx_ind['종목명'].str.strip()
|
||||
krx_ind['기준일'] = biz_day
|
||||
|
||||
# 데이터 정리
|
||||
# 종목, 개별 중 한군데만 있는 데이터 삭제(선박펀드, 광물펀드, 해외종목 등)
|
||||
diff = list(set(krx_sector['종목명']).symmetric_difference(set(krx_ind['종목명'])))
|
||||
kor_ticket = pd.merge(krx_sector, krx_ind, on=krx_sector.columns.intersection(krx_ind.columns).tolist(), how='outer')
|
||||
# 일반적인 종목과 SPAC, 우선주, 리츠, 기타 주식을 구분
|
||||
kor_ticket['종목구분'] = np.where(kor_ticket['종목명'].str.contains('스팩|제[0-9]+호'), '스팩',
|
||||
np.where(kor_ticket['종목코드'].str[-1:] != '0', '우선주',
|
||||
np.where(kor_ticket['종목명'].str.endswith('리츠'), '리츠',
|
||||
np.where(kor_ticket['종목명'].isin(diff), '기타',
|
||||
'보통주'
|
||||
))))
|
||||
kor_ticket = kor_ticket.reset_index(drop=True)
|
||||
kor_ticket.columns = kor_ticket.columns.str.replace(' ', '')
|
||||
kor_ticket = kor_ticket[['종목코드', '종목명', '시장구분', '종가',
|
||||
'시가총액', '기준일', 'EPS', '선행EPS', 'BPS', '주당배당금', '종목구분']]
|
||||
kor_ticket = kor_ticket.replace({np.nan: None})
|
||||
kor_ticket['기준일'] = pd.to_datetime(kor_ticket['기준일'])
|
||||
return kor_ticket
|
||||
|
||||
if __name__ == '__main__':
|
||||
latest_biz_day = get_latest_biz_day()
|
||||
data = get_total_stock_data(latest_biz_day)
|
||||
print(data)
|
||||
|
||||
|
||||
24
example/8-2-scrape.py
Normal file
24
example/8-2-scrape.py
Normal file
@ -0,0 +1,24 @@
|
||||
import requests as rq
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
|
||||
url = 'https://kind.krx.co.kr/disclosure/todaydisclosure.do'
|
||||
payload = {
|
||||
'method': 'searchTodayDisclosureSub',
|
||||
'currentPageSize': '15',
|
||||
'pageIndex': '1',
|
||||
'orderMode': '0',
|
||||
'orderStat': 'D',
|
||||
'forward': 'todaydisclosure_sub',
|
||||
'chose': 'S',
|
||||
'todayFlag': 'N',
|
||||
'selDate': '2025-01-24'
|
||||
}
|
||||
|
||||
data = rq.post(url, data=payload)
|
||||
html = BeautifulSoup(data.content, 'html.parser')
|
||||
|
||||
html_unicode = html.prettify()
|
||||
tbl = pd.read_html(html_unicode)
|
||||
|
||||
print(tbl[0].head())
|
||||
58
example/9-1-selenium.py
Normal file
58
example/9-1-selenium.py
Normal file
@ -0,0 +1,58 @@
|
||||
import os
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
import time
|
||||
from bs4 import BeautifulSoup
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
|
||||
# 동행복권 사이트로 이동
|
||||
# url = 'https://dhlottery.co.kr/'
|
||||
url = 'https://dhlottery.co.kr/user.do?method=login&returnUrl='
|
||||
driver.get(url)
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
# ID 입력
|
||||
driver.find_element(By.ID, value = 'userId').send_keys(os.getenv('DH_ID'))
|
||||
# 패스워드 입력
|
||||
driver.find_element(By.NAME, value = 'password').send_keys(os.getenv('DH_PW'))
|
||||
# 로그인 실행
|
||||
driver.find_element(By.XPATH, value = '//*[@id="article"]/div[2]/div/form/div/div[1]/fieldset/div[1]/a').click()
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
# 메인에서 뜨는 팝업 닫기
|
||||
while len(driver.window_handles) > 1:
|
||||
driver.switch_to.window(driver.window_handles[len(driver.window_handles) - 1])
|
||||
driver.close()
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
driver.switch_to.window(driver.window_handles[0])
|
||||
|
||||
# 로또 구매창 선택
|
||||
driver.find_element(By.XPATH, value='//*[@id="gnb"]/ul/li[1]/a').click()
|
||||
driver.find_element(By.XPATH, value='//*[@id="gnb"]/ul/li[1]/div/ul/li[1]/a').click()
|
||||
time.sleep(2)
|
||||
|
||||
driver.switch_to.window(driver.window_handles[1])
|
||||
|
||||
# 여기부터는 다시 확인 필요
|
||||
# 자동 번호 선택
|
||||
driver.find_element(By.XPATH, value='//*[@id="num2"]').click()
|
||||
time.sleep(1)
|
||||
|
||||
# 구매 수량 선택
|
||||
driver.find_element(By.XPATH, value='//*[@id="amoundApply"]').send_keys(Keys.CONTROL, '5')
|
||||
time.sleep(1)
|
||||
driver.find_element(By.XPATH, value='//*[@id="btnSelectNum"]').click()
|
||||
|
||||
while True:
|
||||
time.sleep(3)
|
||||
Loading…
x
Reference in New Issue
Block a user