将数据帧写入不同的Excel表格

问题描述 投票:0回答:1

我正在抓取网上的一些数据并将其写入大约6个数据帧。然后,我想将每个数据帧写入Excel文件中的单独工作表。我在网上看过并尝试了两种不同的东西,我无法得到我追求的结果。如果我使用以下代码,它只会将最后一个数据帧写入excel,其他所有数据都将被覆盖:

book = "Sample.xlsx"
rb = openpyxl.load_workbook(book)
rb.create_sheet(pitches[x] + ' Data')
activeSheet = pitches[x] + ' Data'
writer = pd.ExcelWriter(book, engine='xlsxwriter')
combinedDF.to_excel(writer, sheet_name=activeSheet,  index=False)
writer.save()

如果我使用以下代码部分,它会创建每个单独的工作表,但不会将任何数据框数据写入excel文件:

book = "Sample.xlsx"
rb = openpyxl.load_workbook(book)
rb.create_sheet(pitches[x] + ' Data')
activeSheet = pitches[x] + ' Data'
combinedDF.to_excel(book, sheet_name=activeSheet,  index=False)
rb.save(book)

这是完整的代码:

from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.keys import Keys
import time
from bs4 import BeautifulSoup
import requests
import pandas as pd
import openpyxl
book = "Baseball Savant Data.xlsx"
rb = openpyxl.load_workbook(book)
pitches = ['Fastball', '2 Seam Fastball', 'Cut Fastball', 'Split-Finger 
Fastball', 'Sinker', 'Slider', 'Changeup', 'Curveball']


beginningTime = time.time()
browser = webdriver.Chrome()
browser.get('http://www.baseballsavant.com')
browser.maximize_window()
linkPage = browser.find_element_by_link_text('Statcast Search')
linkPage.click()
time.sleep(2)
myMinimumPitchCount = browser.find_element_by_xpath("""//*
[@id="min_pitches"]/option[@value='500']""").click()

myMinimumResultCount= browser.find_element_by_xpath("""//*
[@id="min_results"]/option[@value='50']""").click()

pitchCode = ['FF','FT','FC','FS','SI','SL','CH','CU']
time.sleep(2)
x = 0
y = 0


while x < len(pitchCode):
    if x == 0:
        current = ('chk_PT_' + pitchCode[x])
        pitchSelection = browser.find_element_by_class_name("mock-pulldown-
container")
        pitchSelection.click()
        currentPitch = browser.find_element_by_id(current).click()
        searchButton = browser.find_element_by_xpath("""//*
[@id="pfx_form"]/div[2]/div/input[1]""").click()
        time.sleep(3)

        while y < 2:
            if y == 0:
                currentURL = browser.current_url
                r = requests.get(currentURL)
                soup=BeautifulSoup(r.text, "html.parser")
                table_headers_data = soup.find("table", {"id" : 
"search_results"})
                statistics = soup.findAll("tr", {"class" : "search_row"})

                table_headers = [th.text.strip() for th in 
table_headers_data.findAll('th')[0:5]]
                data_rows = statistics[:]
                player_data = [[td.text.strip() for td in 
data_rows[i].findAll('td')[0:5]]
                    for i in range(len(data_rows))]

                dfPitchCount = pd.DataFrame(player_data, index=None, 
columns=table_headers)
                print('Y = ' + str(y))
                y+=1


            elif y != 0:
                wOBAAllowed = browser.find_element_by_xpath("""//*
[@id="sort_col"]/option[@value='woba']""").click()
                searchButton = browser.find_element_by_xpath("""//*
[@id="pfx_form"]/div[2]/div/input[1]""").click()
                time.sleep(2)
                currentURL = browser.current_url
                r = requests.get(currentURL)
                soup=BeautifulSoup(r.text, "html.parser")
                table_headers_data = soup.find("table", {"id" : 
"search_results"})
                statistics = soup.findAll("tr", {"class" : "search_row"})


                table_headers = [th.text.strip() for th in 
table_headers_data.findAll('th')[0:4]]

                data_rows = statistics[:]
                player_data = [[td.text.strip() for td in 
data_rows[i].findAll('td')[0:4]]
                   for i in range(len(data_rows))]

                dfwOBA = pd.DataFrame(player_data, index=None, 
columns=table_headers)
                combinedDF = pd.merge(dfPitchCount, dfwOBA, how='left', 
on="Player", sort=False, indicator = "True")
                print(rb.get_sheet_names())

                rb.create_sheet(pitches[x] + ' Data')
                activeSheet = pitches[x] + ' Data'
                writer = pd.ExcelWriter(book, engine='xlsxwriter')
                combinedDF.to_excel(writer, sheet_name=activeSheet, 
index=False )
                writer.save()
                pitchSort = browser.find_element_by_xpath("""//*
[@id="sort_col"]/option[@value='pitches']""").click()
                print('Y = ' + str(y))
                y+=1
                print('this is ' + str(x))
                x+=1


    elif x != 0:
        y=0
        print('y boogers = ' + str(y))
        pitchSelection = browser.find_element_by_class_name("mock-pulldown-
container")
        pitchSelection.click()
        time.sleep(5)
        current = ('chk_PT_' + pitchCode[x])
        previous = ('chk_PT_' + pitchCode[x-1])
        previousPitch = browser.find_element_by_id(previous)
        previousPitch.click()
        time.sleep(1)
        print(current)        
        pitchSelection.click()
        currentPitch = browser.find_element_by_id(current)
        currentPitch.click()
        time.sleep(1)
        print(previous)
        pitchSort = browser.find_element_by_xpath("""//*
[@id="sort_col"]/option[@value='pitches']""").click()
        searchButton = browser.find_element_by_xpath("""//*
[@id="pfx_form"]/div[2]/div/input[1]""").click()

        while y < 2:
            if y == 0:
                currentURL = browser.current_url
                r = requests.get(currentURL)
                soup=BeautifulSoup(r.text, "html.parser")
                table_headers_data = soup.find("table", {"id" : 
"search_results"})
                statistics = soup.findAll("tr", {"class" : "search_row"})

                table_headers = [th.text.strip() for th in 
table_headers_data.findAll('th')[0:5]]
                data_rows = statistics[:]
                player_data = [[td.text.strip() for td in 
data_rows[i].findAll('td')[0:5]]
                    for i in range(len(data_rows))]

                dfPitchCount = pd.DataFrame(player_data, index=None, 
columns=table_headers)

                y+=1

            elif y != 0:
                wOBAAllowed = browser.find_element_by_xpath("""//*
[@id="sort_col"]/option[@value='woba']""").click()
                searchButton = browser.find_element_by_xpath("""//*
[@id="pfx_form"]/div[2]/div/input[1]""").click()
                time.sleep(2)
                currentURL = browser.current_url
                r = requests.get(currentURL)
                soup=BeautifulSoup(r.text, "html.parser")
                table_headers_data = soup.find("table", {"id" : 
"search_results"})
                statistics = soup.findAll("tr", {"class" : "search_row"})


                table_headers = [th.text.strip() for th in 
table_headers_data.findAll('th')[0:4]]

                data_rows = statistics[:]
                player_data = [[td.text.strip() for td in 
data_rows[i].findAll('td')[0:4]]
                   for i in range(len(data_rows))]

                dfwOBA = pd.DataFrame(player_data, index=None, 
columns=table_headers)
                combinedDF = pd.merge(dfPitchCount, dfwOBA, how='left', 
on="Player", sort=False, indicator = "True")
                print(combinedDF)
                print(rb.get_sheet_names())

                rb.create_sheet(pitches[x] + ' Data')
                activeSheet = pitches[x] + ' Data'
                writer = pd.ExcelWriter(book, engine='xlsxwriter')

                combinedDF.to_excel(writer, sheet_name=activeSheet,  
index=False)
                writer.save()
                pitchSort = browser.find_element_by_xpath("""//*
[@id="sort_col"]/option[@value='pitches']""").click()

                y+=1

                x+=1
python excel pandas dataframe openpyxl
1个回答
1
投票

看来你错过了最重要的来源:to_excel的pandas文档:https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html

因此,将writer = pd.ExcelWriter(book, engine='xlsxwriter')writer.save()排除在循环之外:第一个开始x循环之前,第二个之后:你应该打开并保存excel文件一次,而不是每次写入。

© www.soinside.com 2019 - 2024. All rights reserved.