我有一个可变大小的二维数组:
// Initialize 2-D array of x and y values
std::vector<std::vector<double>> x(rows, std::vector<double>(cols, 0.0));
std::vector<std::vector<double>> y(rows, std::vector<double>(cols, 0.0));
}
如何将这些值写入具有以下格式的镶木地板文件?
x0 | y0 | ... | xn | yn | |
---|---|---|---|---|---|
0 |
|
|
... |
|
|
... | ... | ... | ... | ... | ... |
N |
|
|
... |
|
|
其中 n = 行 - 1 和 N = 列 - 1.
到目前为止,这是我的尝试:
// Create schema for output file
arrow::FieldVector fields;
for(int i = 0; i < rows; i++) {
fields.push_back(arrow::field("x_" + std::to_string(i), arrow::float64()));
fields.push_back(arrow::field("y_" + std::to_string(i), arrow::float64()));
}
std::shared_ptr<arrow::Schema> schema = arrow::schema(fields);
// Store data into arrow::Table for output
arrow::ArrayVector array_vector;
for(int i = 0; i < rows; i++) {
arrow::FloatBuilder fbuilder;
std::shared_ptr<arrow::Array> data_array;
for(int j = 0; j < cols; j++) {
fbuilder.Append(x[i][j]);
}
fbuilder.Finish(&data_array);
array_vector.push_back(data_array);
}
std::shared_ptr<arrow::Table> table = arrow::Table::Make(schema, array_vector);
// Write table to output file
std::shared_ptr<arrow::io::FileOutputStream> outfile;
PARQUET_THROW_NOT_OK(arrow::io::FileOutputStream::Open("test.parquet", &outfile));
PARQUET_THROW_NOT_OK(parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), outfile, 3));
生成的“test.parquet”不包含任何内容,并在尝试使用 python 读取它时抛出以下错误:
pyarrow.lib.ArrowInvalid: Could not open Parquet input source '<Buffer>': Parquet file size is 0 bytes
我在尝试运行您的代码时收到许多错误和编译器警告。确保你注意那些。此外,
PARQUET_THROW_NOT_OK
应该抛出你所拥有的异常。
i < rows
? for(int i = 0; i < rows; i++) {
fields.push_back(arrow::field("x_" + std::to_string(i), arrow::float64()));
fields.push_back(arrow::field("y_" + std::to_string(i), arrow::float64()));
}
您忽略了数组构建器方法中的一些潜在的不良状态返回
Append
和Finish
.
您正在定义
2*rows
列(一组x_
和一组y_
)但您只创建rows
数组。
构建模式时使用
float64
。但是,您使用的构建器类型是 FloatBuilder
,即 float32
。如果你想要DoubleBuilder
,请使用float64
。
您将
&outfile
作为 arrow::io::FileOutputStream::Open
的第二个参数传递,但 Open
返回结果并且不接受输出参数。相反,该指针被隐式转换为 bool
.
这是一个大致相似的版本,它确实成功地创建了镶木地板文件:
#include <arrow/io/api.h>
#include <arrow/array/builder_primitive.h>
#include <arrow/result.h>
#include <arrow/status.h>
#include <arrow/table.h>
#include <parquet/arrow/writer.h>
#include <iostream>
using arrow::Status;
namespace
{
static constexpr int kRows = 100;
static constexpr int kCols = 100;
Status RunMain()
{
// Create schema for output file
arrow::FieldVector fields;
for (int i = 0; i < kRows; i++)
{
fields.push_back(arrow::field("x_" + std::to_string(i), arrow::float32()));
// fields.push_back(arrow::field("y_" + std::to_string(i), arrow::float64()));
}
std::shared_ptr<arrow::Schema> schema = arrow::schema(fields);
// Store data into arrow::Table for output
arrow::ArrayVector array_vector;
for (int i = 0; i < kRows; i++)
{
arrow::FloatBuilder fbuilder;
std::shared_ptr<arrow::Array> data_array;
for (int j = 0; j < kCols; j++)
{
ARROW_RETURN_NOT_OK(fbuilder.Append(i * kRows + j));
}
ARROW_RETURN_NOT_OK(fbuilder.Finish(&data_array));
array_vector.push_back(data_array);
}
std::shared_ptr<arrow::Table> table = arrow::Table::Make(schema, array_vector);
// Write table to output file
std::shared_ptr<arrow::io::FileOutputStream> outfile;
ARROW_ASSIGN_OR_RAISE(outfile, arrow::io::FileOutputStream::Open("test.parquet"));
ARROW_RETURN_NOT_OK(parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), outfile, 3));
return outfile->Close();
}
} // namespace
int main()
{
Status st = RunMain();
if (!st.ok())
{
std::cerr << st << std::endl;
return 1;
}
return 0;
}