본문 바로가기

카테고리 없음

로또 번호 2002부터 지금까지

출처 : https://teddylee777.github.io/python/lotto/

import warnings
import requests
from datetime import datetime
from tqdm import tqdm
import pandas as pd
from bs4 import BeautifulSoup

# Suppress warnings
warnings.filterwarnings('ignore')

# Function to get the latest draw count
def get_max_count():
    url = 'https://dhlottery.co.kr/common.do?method=main'
    html = requests.get(url).text
    soup = BeautifulSoup(html, 'lxml')
    max_count = int(soup.find('strong', id='lottoDrwNo').text)
    return max_count

# Function to fetch lottery result for a given draw count
def crawling_lotto(count):
    # Fetch the webpage for the given draw count
    url = f'https://dhlottery.co.kr/gameResult.do?method=byWin&drwNo={count}'
    html = requests.get(url).text
    soup = BeautifulSoup(html, 'lxml')

    # Parse the result
    date = datetime.strptime(soup.find('p', class_='desc').text, '(%Y년 %m월 %d일 추첨)')
    win_number = [int(i) for i in soup.find('div', class_='num win').find('p').text.strip().split('\n')]
    bonus_number = int(soup.find('div', class_='num bonus').find('p').text.strip())

    return {
        'date': date,
        'win_number': win_number,
        'bonus_number': bonus_number
    }

# Get the latest draw count
max_count = get_max_count()

# Crawl all the draw results
data_list = []
for i in tqdm(range(1, max_count + 1)):
    result = crawling_lotto(i)
    data_list.append({
        'date': result['date'],
        'num1': result['win_number'][0],
        'num2': result['win_number'][1],
        'num3': result['win_number'][2],
        'num4': result['win_number'][3],
        'num5': result['win_number'][4],
        'num6': result['win_number'][5],
        'bonus': result['bonus_number'],
    })

# Convert to DataFrame and save to CSV
data = pd.DataFrame(data_list)
data.to_csv(f'lotto-{max_count}.csv', index=False)