如何在谷歌脚本中定义模式而不是使用模式检测?

问题描述 投票:0回答:4

我一直在使用以下谷歌脚本将表从云存储上传到bigquery。

try{      
        source="gs://path/table.csv";
        ProjectId="project";
        datasetId="dataset";
        tableId="tablename";
        schema={"configuration.load.autodetect":'true'};
             
        var tableReference = BigQuery.newTableReference();
        tableReference.setProjectId(ProjectId);
        tableReference.setDatasetId(datasetId);
        tableReference.setTableId(tableId);

        var load = BigQuery.newJobConfigurationLoad();
        load.setDestinationTable(tableReference);
        load.setSourceUris([source]);
        load.setSourceFormat('CSV');
        load.setAutodetect(true);
        load.setMaxBadRecords(0);
        load.setWriteDisposition('WRITE_TRUNCATE');

        var configuration = BigQuery.newJobConfiguration();
        configuration.setLoad(load);

        var newJob = BigQuery.newJob();
        newJob.setConfiguration(configuration);

        var job = BigQuery.Jobs.insert(newJob, ProjectId);

    }catch(err){ 
        Logger.log('Table upload error: %s', err);  
    } 

这一直工作正常,并使用自动检测架构来自动设置表架构,但现在我想自己定义表架构。我尝试对代码进行以下更改,但没有成功。

source="gs://path/table.csv";
        ProjectId="project";
        datasetId="dataset";
        tableId="tablename";
        schema={"configuration.load.autodetect":'false'};
        
        schema={fields:[{ name:'A': type:'STRING'},
                        { name:'B': type:'STRING'},
                        { name:'C': type:'INTEGER'},
                        { name:'D': type:'STRING'},
                        { name:'E': type:'STRING'} ] };

有人可以告诉我我做错了什么吗?

google-apps-script google-bigquery google-cloud-storage
4个回答
1
投票

你可以尝试这个模式定义吗

schema= {
        fields: [
          {name: 'name', type: 'STRING'},
          {name: 'post_abbr', type: 'STRING'},
        ],
      }

更多示例这里


1
投票

通常,我尝试在应用程序脚本中编写作业配置并通过函数对其进行参数化。我对当前的 job.insert load 做了一些修改。我希望它有效:-)

function insertRows(projectId, datasetId, tableId, schema, partField, sourceUris) {
  // Create the data upload job.
  Logger.log("Create (or) insert rows process started");
  var _sourceFormat = "CSV";
  var _createDisposition = "CREATE_IF_NEEDED";
  var _writeDisposition = "WRITE_TRUNCATE";
  
  var job = {
    configuration: {
      load: {
        sourceUris: sourceUris,
        destinationTable: {
          projectId: projectId,
          datasetId: datasetId,
          tableId: tableId
        }, 
        schema: {
          fields: schema
        },
        timePartitioning: {
          type: 'DAY',
          requirePartitionFilter: true, 
          field: partField
        },
        sourceFormat: _sourceFormat,
        writeDisposition: _writeDisposition,
        createDisposition: _createDisposition,
        useLegacySql: false
      }
    }
  };
  try {
    var bqRes = BigQuery.Jobs.insert(job, projectId);
    Logger.log(bqRes);
  }
  catch(err) {
    Logger.log("Table data insertion failed");
    Logger.log(err);
  }
}

示例:

...
var source_uris = ["gs://path/to/bucket/file"]
var schema = [{"type":"STRING","name":"field_1","mode":"NULLABLE"},{"type":"STRING","name":"field_2","mode":"NULLABLE"},{"type":"STRING","name":"field_3","mode":"NULLABLE"},{"type":"STRING","name":"field_4","mode":"NULLABLE"},{"type":"DATE","name":"field_5","mode":"NULLABLE"}]
...
insertRows(project_id, dataset_id, table_id, schema, 'field_5', source_uris)

0
投票

你删除了吗

load.setAutodetect(true);

并添加到那里

load.setSchema(schema);

原始代码是您 3 年前发布的 here on SO


0
投票

您可以使用 Apps 脚本预构建类型进行自动完成

/** @type {BigQuerySchema} */
const schema = {
  fields: [
    {
      name: "Foo",
      type: "STRING"
    }
  ]
}
  1. 添加类型
    /** @type {BigQuerySchema} */
  2. 键入
    const schema = {
    Alt+Space 查看自动完成帮助。
© www.soinside.com 2019 - 2024. All rights reserved.