#include <iostream>
#include <arrow/api.h>
#include <arrow/io/api.h>
#include <parquet/arrow/reader.h>
#include <parquet/arrow/writer.h>
#include <parquet/exception.h>
// #0 Build dummy data to pass around
// To have some input data, we first create an Arrow Table that holds
// some data.
std::shared_ptr<arrow::Table> generate_table()
{
arrow::Int64Builder i64builder;
PARQUET_THROW_NOT_OK(i64builder.AppendValues({ 1, 2, 3, 4, 5 }));
std::shared_ptr<arrow::Array> i64array;
PARQUET_THROW_NOT_OK(i64builder.Finish(&i64array));
arrow::StringBuilder strbuilder;
PARQUET_THROW_NOT_OK(strbuilder.Append("some"));
PARQUET_THROW_NOT_OK(strbuilder.Append("string"));
PARQUET_THROW_NOT_OK(strbuilder.Append("content"));
PARQUET_THROW_NOT_OK(strbuilder.Append("in"));
PARQUET_THROW_NOT_OK(strbuilder.Append("rows"));
std::shared_ptr<arrow::Array> strarray;
PARQUET_THROW_NOT_OK(strbuilder.Finish(&strarray));
std::shared_ptr<arrow::Schema> schema = arrow::schema(
{ arrow::field("int", arrow::int64()),
arrow::field("str", arrow::utf8()) });
return arrow::Table::Make(schema, { i64array, strarray });
}
// #1 Write out the data as a Parquet file
void write_parquet_file(const arrow::Table& table)
{
std::shared_ptr<arrow::io::FileOutputStream> outfile;
PARQUET_ASSIGN_OR_THROW(
outfile,
arrow::io::FileOutputStream::Open("parquet-arrow-example.parquet"));
// The last argument to the function call is the size of the RowGroup in
// the parquet file. Normally you would choose this to be rather large but
// for the example, we use a small value to have multiple RowGroups.
PARQUET_THROW_NOT_OK(
parquet::arrow::WriteTable(table, arrow::default_memory_pool(), outfile, 3));
}
// #2: Fully read in the file
void read_whole_file()
{
std::cout << "Reading parquet-arrow-example.parquet at once" << std::endl;
std::shared_ptr<arrow::io::ReadableFile> infile;
PARQUET_ASSIGN_OR_THROW(
infile,
arrow::io::ReadableFile::Open("parquet-arrow-example.parquet",
arrow::default_memory_pool()));
std::unique_ptr<parquet::arrow::FileReader> reader;
PARQUET_THROW_NOT_OK(
parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
std::shared_ptr<arrow::Table> table;
PARQUET_THROW_NOT_OK(reader->ReadTable(&table));
std::cout << "Loaded " << table->num_rows() << " rows in " << table->num_columns()
<< " columns." << std::endl;
}
// #3: Read only a single RowGroup of the parquet file
void read_single_rowgroup()
{
std::cout << "Reading first RowGroup of parquet-arrow-example.parquet" << std::endl;
std::shared_ptr<arrow::io::ReadableFile> infile;
PARQUET_ASSIGN_OR_THROW(
infile,
arrow::io::ReadableFile::Open("parquet-arrow-example.parquet",
arrow::default_memory_pool()));
std::unique_ptr<parquet::arrow::FileReader> reader;
PARQUET_THROW_NOT_OK(
parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
std::shared_ptr<arrow::Table> table;
PARQUET_THROW_NOT_OK(reader->RowGroup(0)->ReadTable(&table));
std::cout << "Loaded " << table->num_rows() << " rows in " << table->num_columns()
<< " columns." << std::endl;
}
// #4: Read only a single column of the whole parquet file
void read_single_column()
{
std::cout << "Reading first column of parquet-arrow-example.parquet" << std::endl;
std::shared_ptr<arrow::io::ReadableFile> infile;
PARQUET_ASSIGN_OR_THROW(
infile,
arrow::io::ReadableFile::Open("parquet-arrow-example.parquet",
arrow::default_memory_pool()));
std::unique_ptr<parquet::arrow::FileReader> reader;
PARQUET_THROW_NOT_OK(
parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
std::shared_ptr<arrow::ChunkedArray> array;
PARQUET_THROW_NOT_OK(reader->ReadColumn(0, &array));
PARQUET_THROW_NOT_OK(arrow::PrettyPrint(*array, 4, &std::cout));
std::cout << std::endl;
}
// #5: Read only a single column of a RowGroup (this is known as ColumnChunk)
// from the Parquet file.
void read_single_column_chunk()
{
std::cout << "Reading first ColumnChunk of the first RowGroup of "
"parquet-arrow-example.parquet"
<< std::endl;
std::shared_ptr<arrow::io::ReadableFile> infile;
PARQUET_ASSIGN_OR_THROW(
infile,
arrow::io::ReadableFile::Open("parquet-arrow-example.parquet",
arrow::default_memory_pool()));
std::unique_ptr<parquet::arrow::FileReader> reader;
PARQUET_THROW_NOT_OK(
parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
std::shared_ptr<arrow::ChunkedArray> array;
PARQUET_THROW_NOT_OK(reader->RowGroup(0)->Column(0)->Read(&array));
PARQUET_THROW_NOT_OK(arrow::PrettyPrint(*array, 4, &std::cout));
std::cout << std::endl;
}
int main(int argc, char** argv)
{
std::shared_ptr<arrow::Table> table = generate_table();
write_parquet_file(*table);
read_whole_file();
read_single_rowgroup();
read_single_column();
read_single_column_chunk();
}
没有合适的资源?快使用搜索试试~ 我知道了~
C++ 读写 parquet 文件 Demo
共438个文件
h:341个
dll:24个
obj:18个
需积分: 50 23 下载量 54 浏览量
2020-04-17
14:28:42
上传
评论 2
收藏 53.15MB ZIP 举报
温馨提示
项目使用vs2015,使用的git上的三方开源框架,进行了裁剪,基于C++创建,读写parquet文件。
资源推荐
资源详情
资源评论
收起资源包目录
C++ 读写 parquet 文件 Demo (438个子文件)
main.cpp 5KB
parquettest.VC.db 36.32MB
gandiva.dll 21.17MB
gandiva.dll 21.17MB
arrow.dll 10.74MB
arrow.dll 10.74MB
libcrypto-1_1-x64.dll 3.07MB
libcrypto-1_1-x64.dll 3.07MB
libprotobuf.dll 2.54MB
libprotobuf.dll 2.54MB
arrow_flight.dll 2.11MB
arrow_flight.dll 2.11MB
parquet.dll 1.87MB
parquet.dll 1.87MB
arrow_python.dll 1.19MB
arrow_python.dll 1.19MB
libssl-1_1-x64.dll 633KB
libssl-1_1-x64.dll 633KB
arrow_dataset.dll 591KB
arrow_dataset.dll 591KB
arrow_python_flight.dll 91KB
arrow_python_flight.dll 91KB
zlib.dll 88KB
zlib.dll 88KB
cares.dll 80KB
cares.dll 80KB
parquettest.exe 568KB
parquettest.exe 49KB
parquettest.vcxproj.filters 945B
date.h 224KB
tz.h 82KB
bpacking.h 80KB
type.h 58KB
array.h 53KB
bit_util.h 40KB
hashing.h 29KB
double-conversion.h 27KB
test_util.h 25KB
rle_encoding.h 25KB
properties.h 24KB
type_traits.h 23KB
filter.h 23KB
stl.h 21KB
sparse_tensor.h 21KB
types.h 21KB
builder_binary.h 19KB
encryption.h 19KB
gtest_util.h 18KB
parsing.h 17KB
test_util.h 17KB
schema.h 16KB
builder_primitive.h 16KB
builder_dict.h 16KB
buffer.h 16KB
pyarrow_api.h 15KB
iterator.h 15KB
result.h 15KB
types.h 15KB
writer.h 15KB
encoding.h 15KB
status.h 15KB
table.h 14KB
builder_nested.h 14KB
test_util.h 14KB
scalar.h 14KB
column_reader.h 14KB
flight.h 14KB
bit_stream_utils.h 13KB
metadata.h 13KB
ieee.h 13KB
filesystem.h 13KB
buffer_builder.h 13KB
reader.h 13KB
utils.h 12KB
statistics.h 12KB
random.h 12KB
reader.h 12KB
visitor_inline.h 11KB
dex.h 11KB
llvm_generator.h 11KB
tz_private.h 10KB
kernel.h 10KB
function_registry_common.h 10KB
column_writer.h 10KB
interfaces.h 10KB
client.h 10KB
io_util.h 10KB
partition.h 10KB
server.h 9KB
type_traits.h 9KB
bloom_filter.h 9KB
common.h 9KB
file_base.h 9KB
file.h 9KB
file_writer.h 9KB
logging.h 9KB
message.h 9KB
column_scanner.h 9KB
stream_reader.h 9KB
record_batch.h 9KB
共 438 条
- 1
- 2
- 3
- 4
- 5
资源评论
新之野望游戏工作室
- 粉丝: 7
- 资源: 3
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功