我正在尝试通过python将.csv
文件上传到Google电子表格。我让这项工作进行了一些自己的研究,并通过下面的链接阅读了自己的方式。但我正在寻找一种更高级的方法来进行此操作。
问题:
[API.spreadsheets().batchUpdate
覆盖所有现有数据。
目标:
将.csv
数据上传到Google表格中,并跳过现有数据以避免重复。
Link to post for uploading a csv file to Google Sheets
注意:我使用@Ufos解决方案
到目前为止,我的代码:
"""
Getting an connection to Google Sheets API and imports a csv file into a sheet and inserts all new data. If
data already exists in file it will ignore it and ony insert data that is not already in the sheet.
"""
# import pickle
from googleapiclient.discovery import build
from google.oauth2.service_account import Credentials
try:
# for Python2
from Tkinter import * ## notice capitalized T in Tkinter
import tkFileDialog as filedialog
except ImportError:
# for Python3
from tkinter import * ## notice lowercase 't' in tkinter here
from tkinter import messagebox
from tkinter import filedialog
import sys
SPREADSHEET_ID = '' # TODO: Get this one from the link in browser
worksheet_name = '' # TODO: Insert sheet name
# path_to_csv = ''
# path_to_credentials = 'Credentials/token.pickle'
#path_to_credentials = 'creds.json'
scopes = ["https://spreadsheets.google.com/feeds",
'https://www.googleapis.com/auth/spreadsheets',
"https://www.googleapis.com/auth/drive.file",
"https://www.googleapis.com/auth/drive"]
# convenience routines
def find_sheet_id_by_name(sheet_name):
# ugly, but works
sheets_with_properties = API \
.spreadsheets() \
.get(spreadsheetId=SPREADSHEET_ID, fields='sheets.properties') \
.execute() \
.get('sheets')
for sheet in sheets_with_properties:
if 'title' in sheet['properties'].keys():
if sheet['properties']['title'] == sheet_name:
return sheet['properties']['sheetId']
def push_csv_to_gsheet(csv_path, sheet_id):
with open(csv_path, 'r') as csv_file:
csvContents = csv_file.read()
body = {
'requests': [{
'pasteData': {
"coordinate": {
"sheetId": sheet_id,
"rowIndex": "0", # adapt this if you need different positioning
"columnIndex": "0", # adapt this if you need different positioning
},
"data": csvContents,
"type": 'PASTE_NORMAL',
"delimiter": ';',
}
}]
}
request = API.spreadsheets().batchUpdate(spreadsheetId=SPREADSHEET_ID, body=body)
response = request.execute()
return response
def get_csv_file():
root_tk = Tk()
root_tk.withdraw()
user_prompt_file_path = filedialog.askopenfilename(title='Choose .csv file') # // initialdir = "C:/<whatever>"
if len(user_prompt_file_path) == 0:
sys.exit("Stopped because no csv file was given")
print(f'File path to csv is: {user_prompt_file_path}')
return user_prompt_file_path
csv_file_path = get_csv_file()
# upload
# with open(path_to_credentials, 'rb') as token:
# # credentials = pickle.load(token)
# credentials = token.read()
credentials = Credentials.from_service_account_file("creds.json", scopes=scopes)
API = build('sheets', 'v4', credentials=credentials)
push_csv_to_gsheet(
csv_path=csv_file_path,
sheet_id=find_sheet_id_by_name(worksheet_name)
)
您可以使用list q
使用search parameter在云端硬盘中的文件,以检查是否已经存在具有该名称的文件,并避免执行代码。我使用Quickstart中的示例进行了一些修改:
from __future__ import print_function
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive.metadata.readonly']
def main():
"""Shows basic usage of the Drive v3 API.
Prints the names and ids of the first 10 files the user has access to.
"""
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
service = build('drive', 'v3', credentials=creds)
# Call the Drive v3 API
results = service.files().list(q="name = 'yourFile.csv'",fields="nextPageToken, files(id, name)").execute()
items = results.get('files', [])
if not items:
#------------------------------------
# Your uploading code here
#------------------------------------
else:
print('Files:')
for item in items:
print(u'{0} ({1})'.format(item['name'], item['id']))
if __name__ == '__main__':
main()
[有趣的部分始于# Call the Drive v3 API
,其余部分可以应用于任何Drive API python代码(当然,将范围更改为所需的范围)。关于快速入门,我只更改了service.files()
调用。添加用于将文件上传到if not
条件的代码。