使用 Cloud Function 将数据加载到 Google Cloud Storage 和 BigQuery [重复]

问题描述 投票:0回答:1

我正在使用 Google Cloud 中的一个云函数,希望能够输出一个 csv,然后将其同时加载到存储桶和 bigquery 中。

到目前为止,我的代码看起来类似于下面的快照。我的云函数入口点为 pagespeedurls。

import requests
import os.path
import datetime
from google.cloud import storage
from google.cloud import bigquery

# Documentation: https://developers.google.com/speed/docs/insights/v5/get-started

# JSON paths: https://developers.google.com/speed/docs/insights/v4/reference/pagespeedapi/runpagespeed

# Populate 'pagespeed.txt' file with URLs to query against API.
with open('pagespeed.txt') as pagespeedurls:
    date = datetime.datetime.now()
    download_dir = 'pagespeed-results' + str(date.today()) + '.csv'
    file = open(download_dir, 'w')
    content = pagespeedurls.readlines()
    content = [line.rstrip('\n') for line in content]

    columnTitleRow = "URL, Cumulative Layout Shift Result, Largest Contentful Paint Result, First Input Delay Result, First Contentful Paint, First Interactive, Largest Contentful Paint, Cumulative Layout Shift, Max Potential FID\n"
    file.write(columnTitleRow)

    # This is the google pagespeed api url structure, using for loop to insert each url in .txt file
    for line in content:
        # If no "strategy" parameter is included, the query by default returns desktop data.
        key = "API_KEY"
        pagespeed = f'https://www.googleapis.com/pagespeedonline/v5/runPagespeed?url={line}&key={key}&strategy=mobile'
        print(f'Requesting {pagespeed}...')
        request = requests.get(pagespeed)
        final = request.json()
        
        try:
            urlid = final['id']
            split = urlid.split('?') # This splits the absolute url from the api key parameter
            urlid = split[0] # This reassigns urlid to the absolute url
            ID = f'URL ~ {urlid}'
            ID2 = str(urlid)
            cruxcls = final['loadingExperience']['metrics']['CUMULATIVE_LAYOUT_SHIFT_SCORE']['category']
            CRUXCLS = f'Cumulative Layout Shift Result ~ {str(cruxcls)}'
            CRUXCLS2 = str(cruxcls)
            cruxlcp = final['loadingExperience']['metrics']['LARGEST_CONTENTFUL_PAINT_MS']['category']
            CRUXLCP = f'Largest Contentful Paint Result ~ {str(cruxlcp)}'
            CRUXLCP2 = str(cruxlcp)
            cruxfid = final['loadingExperience']['metrics']['FIRST_INPUT_DELAY_MS']['category']
            CRUXFID = f'First Input Delay Result ~ {str(cruxfid)}'
            CRUXFID2 = str(cruxfid)
            urlfcp = final['lighthouseResult']['audits']['first-contentful-paint']['displayValue']
            FCP = f'First Contentful Paint ~ {str(urlfcp)}'
            FCP2 = str(urlfcp)
            urlfi = final['lighthouseResult']['audits']['interactive']['displayValue']
            FI = f'First Interactive ~ {str(urlfi)}'
            FI2 = str(urlfi)
            urllcp = final['lighthouseResult']['audits']['largest-contentful-paint']['displayValue']
            LCP = f'Largest Contentful Paint ~ {str(urllcp)}'
            LCP2 = str(urllcp)
            urlcls = final['lighthouseResult']['audits']['cumulative-layout-shift']['displayValue']
            CLS = f'Cumulative Layout Shift ~ {str(urlcls)}'
            CLS2 = str(urlcls)
            urlmaxfid = final['lighthouseResult']['audits']['max-potential-fid']['numericValue']
            MAXFID = f'Cumulative Layout Shift ~ {str(urlmaxfid)}'
            MAXFID2 = str(urlmaxfid)
        except KeyError:
            print(f'<KeyError> One or more keys not found {line}.')
        
        try:
            row = f'{ID2},{CRUXCLS2},{CRUXLCP2},{CRUXFID2},{FCP2},{FI2},{LCP2},{CLS2},{MAXFID2}\n'
            file.write(row)
        except NameError:
            print(f'<NameError> Failing because of KeyError {line}.')
            file.write(f'<KeyError> & <NameError> Failing because of nonexistant Key ~ {line}.' + '\n')
        
        try:
            print(ID) 
            print(FCP)
            print(FI)
        except NameError:
            print(f'<NameError> Failing because of KeyError {line}.')

    file.close()

def upload_file(bucket_name):
  """Uploads a file to the google storage bucket."""
    bucket_name = "BUCKET_NAME"
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    bucket.upload_from_filename(download_dir)

# Load data into BigQuery
def load_data_from_gcs(dataset, table1, source ):
    bigquery_client = bigquery.Client(dataset)
    dataset = bigquery_client.dataset('pageSpeed')
    table = dataset.table(table1)
    job_name = str(uuid.uuid4())

    job = bigquery_client.load_table_from_storage(
        job_name, table, "gs://psi-reports")

    job.source_format = 'NEWLINE_DELIMITED_JSON'
    job.begin()
    wait_for_job(job)
    print("state of job is: " + job.state)
    print("errors: " + job.errors)

部署此程序时,我收到以下错误:

    "Function failed on loading user code. This is likely due to a bug in the user code. Error message: Code in file main.py can't be loaded.
    Detailed stack trace:
    Traceback (most recent call last):
      File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py", line 359, in check_or_load_user_function
        _function_handler.load_user_function()
      File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py", line 236, in load_user_function
        spec.loader.exec_module(main_module)
      File "<frozen importlib._bootstrap_external>", line 724, in exec_module
      File "<frozen importlib._bootstrap_external>", line 860, in get_code
      File "<frozen importlib._bootstrap_external>", line 791, in source_to_code
      File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
      File "/user_code/main.py", line 82
        bucket_name = "BUCKET_NAME"
    ^
IndentationError: unexpected indent
. Please visit https://cloud.google.com/functions/docs/troubleshooting for in-depth troubleshooting documentation. "

当我在本地运行脚本并为输出添加一个简单的路径时,它工作得很好。我仍然是在云中工作的新手,因此非常感谢任何和所有帮助!

python google-cloud-platform google-bigquery google-cloud-functions google-cloud-storage
1个回答
0
投票

我认为原因是您的评论缩进可能是错误的。

def upload_file(bucket_name):
  """Uploads a file to the google storage bucket."""

相反,请尝试以下

def upload_file(bucket_name):
    """Uploads a file to the google storage bucket."""

def upload_file(bucket_name):
"""Uploads a file to the google storage bucket."""
© www.soinside.com 2019 - 2024. All rights reserved.