|
|
|
@ -1,4 +1,4 @@ |
|
|
|
|
// Copyright 2020 Google LLC |
|
|
|
|
// Copyright 2023 Google LLC |
|
|
|
|
// |
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
|
|
|
|
// you may not use this file except in compliance with the License. |
|
|
|
@ -33,6 +33,11 @@ option java_package = "com.google.cloud.bigquery.storage.v1beta1"; |
|
|
|
|
// BigQuery storage API. |
|
|
|
|
// |
|
|
|
|
// The BigQuery storage API can be used to read data stored in BigQuery. |
|
|
|
|
// |
|
|
|
|
// The v1beta1 API is not yet officially deprecated, and will go through a full |
|
|
|
|
// deprecation cycle (https://cloud.google.com/products#product-launch-stages) |
|
|
|
|
// before the service is turned down. However, new code should use the v1 API |
|
|
|
|
// going forward. |
|
|
|
|
service BigQueryStorage { |
|
|
|
|
option (google.api.default_host) = "bigquerystorage.googleapis.com"; |
|
|
|
|
option (google.api.oauth_scopes) = |
|
|
|
@ -49,7 +54,7 @@ service BigQueryStorage { |
|
|
|
|
// reached the end of each stream in the session, then all the data in the |
|
|
|
|
// table has been read. |
|
|
|
|
// |
|
|
|
|
// Read sessions automatically expire 24 hours after they are created and do |
|
|
|
|
// Read sessions automatically expire 6 hours after they are created and do |
|
|
|
|
// not require manual clean-up by the caller. |
|
|
|
|
rpc CreateReadSession(CreateReadSessionRequest) returns (ReadSession) { |
|
|
|
|
option (google.api.http) = { |
|
|
|
@ -60,7 +65,8 @@ service BigQueryStorage { |
|
|
|
|
body: "*" |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
option (google.api.method_signature) = "table_reference,parent,requested_streams"; |
|
|
|
|
option (google.api.method_signature) = |
|
|
|
|
"table_reference,parent,requested_streams"; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Reads rows from the table in the format prescribed by the read session. |
|
|
|
@ -82,7 +88,8 @@ service BigQueryStorage { |
|
|
|
|
// Creates additional streams for a ReadSession. This API can be used to |
|
|
|
|
// dynamically adjust the parallelism of a batch processing task upwards by |
|
|
|
|
// adding additional workers. |
|
|
|
|
rpc BatchCreateReadSessionStreams(BatchCreateReadSessionStreamsRequest) returns (BatchCreateReadSessionStreamsResponse) { |
|
|
|
|
rpc BatchCreateReadSessionStreams(BatchCreateReadSessionStreamsRequest) |
|
|
|
|
returns (BatchCreateReadSessionStreamsResponse) { |
|
|
|
|
option (google.api.http) = { |
|
|
|
|
post: "/v1beta1/{session.name=projects/*/sessions/*}" |
|
|
|
|
body: "*" |
|
|
|
@ -90,7 +97,7 @@ service BigQueryStorage { |
|
|
|
|
option (google.api.method_signature) = "session,requested_streams"; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Triggers the graceful termination of a single stream in a ReadSession. This |
|
|
|
|
// Causes a single stream in a ReadSession to gracefully stop. This |
|
|
|
|
// API can be used to dynamically adjust the parallelism of a batch processing |
|
|
|
|
// task downwards without losing data. |
|
|
|
|
// |
|
|
|
@ -125,7 +132,8 @@ service BigQueryStorage { |
|
|
|
|
// completion. |
|
|
|
|
// |
|
|
|
|
// This method is guaranteed to be idempotent. |
|
|
|
|
rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) { |
|
|
|
|
rpc SplitReadStream(SplitReadStreamRequest) |
|
|
|
|
returns (SplitReadStreamResponse) { |
|
|
|
|
option (google.api.http) = { |
|
|
|
|
get: "/v1beta1/{original_stream.name=projects/*/streams/*}" |
|
|
|
|
}; |
|
|
|
@ -193,6 +201,40 @@ message ReadSession { |
|
|
|
|
ShardingStrategy sharding_strategy = 9; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Data format for input or output data. |
|
|
|
|
enum DataFormat { |
|
|
|
|
// Data format is unspecified. |
|
|
|
|
DATA_FORMAT_UNSPECIFIED = 0; |
|
|
|
|
|
|
|
|
|
// Avro is a standard open source row based file format. |
|
|
|
|
// See https://avro.apache.org/ for more details. |
|
|
|
|
AVRO = 1; |
|
|
|
|
|
|
|
|
|
// Arrow is a standard open source column-based message format. |
|
|
|
|
// See https://arrow.apache.org/ for more details. |
|
|
|
|
ARROW = 3; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Strategy for distributing data among multiple streams in a read session. |
|
|
|
|
enum ShardingStrategy { |
|
|
|
|
// Same as LIQUID. |
|
|
|
|
SHARDING_STRATEGY_UNSPECIFIED = 0; |
|
|
|
|
|
|
|
|
|
// Assigns data to each stream based on the client's read rate. The faster the |
|
|
|
|
// client reads from a stream, the more data is assigned to the stream. In |
|
|
|
|
// this strategy, it's possible to read all data from a single stream even if |
|
|
|
|
// there are other streams present. |
|
|
|
|
LIQUID = 1; |
|
|
|
|
|
|
|
|
|
// Assigns data to each stream such that roughly the same number of rows can |
|
|
|
|
// be read from each stream. Because the server-side unit for assigning data |
|
|
|
|
// is collections of rows, the API does not guarantee that each stream will |
|
|
|
|
// return the same number or rows. Additionally, the limits are enforced based |
|
|
|
|
// on the number of pre-filtering rows, so some filters can lead to lopsided |
|
|
|
|
// assignments. |
|
|
|
|
BALANCED = 2; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Creates a new read session, which may include additional options such as |
|
|
|
|
// requested parallelism, projection filters and constraints. |
|
|
|
|
message CreateReadSessionRequest { |
|
|
|
@ -225,6 +267,7 @@ message CreateReadSessionRequest { |
|
|
|
|
TableReadOptions read_options = 4; |
|
|
|
|
|
|
|
|
|
// Data output format. Currently default to Avro. |
|
|
|
|
// DATA_FORMAT_UNSPECIFIED not supported. |
|
|
|
|
DataFormat format = 5; |
|
|
|
|
|
|
|
|
|
// The strategy to use for distributing data among multiple streams. Currently |
|
|
|
@ -232,38 +275,6 @@ message CreateReadSessionRequest { |
|
|
|
|
ShardingStrategy sharding_strategy = 7; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Data format for input or output data. |
|
|
|
|
enum DataFormat { |
|
|
|
|
// Data format is unspecified. |
|
|
|
|
DATA_FORMAT_UNSPECIFIED = 0; |
|
|
|
|
|
|
|
|
|
// Avro is a standard open source row based file format. |
|
|
|
|
// See https://avro.apache.org/ for more details. |
|
|
|
|
AVRO = 1; |
|
|
|
|
|
|
|
|
|
ARROW = 3; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Strategy for distributing data among multiple streams in a read session. |
|
|
|
|
enum ShardingStrategy { |
|
|
|
|
// Same as LIQUID. |
|
|
|
|
SHARDING_STRATEGY_UNSPECIFIED = 0; |
|
|
|
|
|
|
|
|
|
// Assigns data to each stream based on the client's read rate. The faster the |
|
|
|
|
// client reads from a stream, the more data is assigned to the stream. In |
|
|
|
|
// this strategy, it's possible to read all data from a single stream even if |
|
|
|
|
// there are other streams present. |
|
|
|
|
LIQUID = 1; |
|
|
|
|
|
|
|
|
|
// Assigns data to each stream such that roughly the same number of rows can |
|
|
|
|
// be read from each stream. Because the server-side unit for assigning data |
|
|
|
|
// is collections of rows, the API does not guarantee that each stream will |
|
|
|
|
// return the same number or rows. Additionally, the limits are enforced based |
|
|
|
|
// on the number of pre-filtering rows, so some filters can lead to lopsided |
|
|
|
|
// assignments. |
|
|
|
|
BALANCED = 2; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Requesting row data via `ReadRows` must provide Stream position information. |
|
|
|
|
message ReadRowsRequest { |
|
|
|
|
// Required. Identifier of the position in the stream to start reading from. |
|
|
|
@ -349,6 +360,19 @@ message ReadRowsResponse { |
|
|
|
|
// Throttling status. If unset, the latest response still describes |
|
|
|
|
// the current throttling status. |
|
|
|
|
ThrottleStatus throttle_status = 5; |
|
|
|
|
|
|
|
|
|
// The schema for the read. If read_options.selected_fields is set, the |
|
|
|
|
// schema may be different from the table schema as it will only contain |
|
|
|
|
// the selected fields. This schema is equivalent to the one returned by |
|
|
|
|
// CreateSession. This field is only populated in the first ReadRowsResponse |
|
|
|
|
// RPC. |
|
|
|
|
oneof schema { |
|
|
|
|
// Output only. Avro schema. |
|
|
|
|
AvroSchema avro_schema = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; |
|
|
|
|
|
|
|
|
|
// Output only. Arrow schema. |
|
|
|
|
ArrowSchema arrow_schema = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Information needed to request additional streams for an established read |
|
|
|
|