출처 : https://teddylee777.github.io/python/lotto/
import warnings
import requests
from datetime import datetime
from tqdm import tqdm
import pandas as pd
from bs4 import BeautifulSoup
# Suppress warnings
warnings.filterwarnings('ignore')
# Function to get the latest draw count
def get_max_count():
url = 'https://dhlottery.co.kr/common.do?method=main'
html = requests.get(url).text
soup = BeautifulSoup(html, 'lxml')
max_count = int(soup.find('strong', id='lottoDrwNo').text)
return max_count
# Function to fetch lottery result for a given draw count
def crawling_lotto(count):
# Fetch the webpage for the given draw count
url = f'https://dhlottery.co.kr/gameResult.do?method=byWin&drwNo={count}'
html = requests.get(url).text
soup = BeautifulSoup(html, 'lxml')
# Parse the result
date = datetime.strptime(soup.find('p', class_='desc').text, '(%Y년 %m월 %d일 추첨)')
win_number = [int(i) for i in soup.find('div', class_='num win').find('p').text.strip().split('\n')]
bonus_number = int(soup.find('div', class_='num bonus').find('p').text.strip())
return {
'date': date,
'win_number': win_number,
'bonus_number': bonus_number
}
# Get the latest draw count
max_count = get_max_count()
# Crawl all the draw results
data_list = []
for i in tqdm(range(1, max_count + 1)):
result = crawling_lotto(i)
data_list.append({
'date': result['date'],
'num1': result['win_number'][0],
'num2': result['win_number'][1],
'num3': result['win_number'][2],
'num4': result['win_number'][3],
'num5': result['win_number'][4],
'num6': result['win_number'][5],
'bonus': result['bonus_number'],
})
# Convert to DataFrame and save to CSV
data = pd.DataFrame(data_list)
data.to_csv(f'lotto-{max_count}.csv', index=False)