docs: update documentation for bigquerystorage v1beta1

PiperOrigin-RevId: 531289380
pull/804/head
Google APIs 2 years ago committed by Copybara-Service
parent ea7678c6e8
commit 3d497fa928
  1. 2
      google/cloud/bigquery/storage/v1beta1/arrow.proto
  2. 2
      google/cloud/bigquery/storage/v1beta1/avro.proto
  3. 1
      google/cloud/bigquery/storage/v1beta1/bigquerystorage_v1beta1.yaml
  4. 57
      google/cloud/bigquery/storage/v1beta1/read_options.proto
  5. 100
      google/cloud/bigquery/storage/v1beta1/storage.proto
  6. 2
      google/cloud/bigquery/storage/v1beta1/table_reference.proto

@ -1,4 +1,4 @@
// Copyright 2020 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

@ -1,4 +1,4 @@
// Copyright 2020 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

@ -19,5 +19,4 @@ authentication:
oauth:
canonical_scopes: |-
https://www.googleapis.com/auth/bigquery,
https://www.googleapis.com/auth/bigquery.readonly,
https://www.googleapis.com/auth/cloud-platform

@ -1,4 +1,4 @@
// Copyright 2020 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -21,19 +21,64 @@ option java_package = "com.google.cloud.bigquery.storage.v1beta1";
// Options dictating how we read a table.
message TableReadOptions {
// Optional. Names of the fields in the table that should be read. If empty,
// all fields will be read. If the specified field is a nested field, all the
// sub-fields in the field will be selected. The output field order is
// unrelated to the order of fields in selected_fields.
// Optional. The names of the fields in the table to be returned. If no
// field names are specified, then all fields in the table are returned.
//
// Nested fields -- the child elements of a STRUCT field -- can be selected
// individually using their fully-qualified names, and will be returned as
// record fields containing only the selected nested fields. If a STRUCT
// field is specified in the selected fields list, all of the child elements
// will be returned.
//
// As an example, consider a table with the following schema:
//
// {
// "name": "struct_field",
// "type": "RECORD",
// "mode": "NULLABLE",
// "fields": [
// {
// "name": "string_field1",
// "type": "STRING",
// . "mode": "NULLABLE"
// },
// {
// "name": "string_field2",
// "type": "STRING",
// "mode": "NULLABLE"
// }
// ]
// }
//
// Specifying "struct_field" in the selected fields list will result in a
// read session schema with the following logical structure:
//
// struct_field {
// string_field1
// string_field2
// }
//
// Specifying "struct_field.string_field1" in the selected fields list will
// result in a read session schema with the following logical structure:
//
// struct_field {
// string_field1
// }
//
// The order of the fields in the read session schema is derived from the
// table schema and does not correspond to the order in which the fields are
// specified in this list.
repeated string selected_fields = 1;
// Optional. SQL text filtering statement, similar to a WHERE clause in
// a query. Aggregates are not supported.
// a SQL query. Aggregates are not supported.
//
// Examples: "int_field > 5"
// "date_field = CAST('2014-9-27' as DATE)"
// "nullable_field is not NULL"
// "st_equals(geo_field, st_geofromtext("POINT(2, 2)"))"
// "numeric_field BETWEEN 1.0 AND 5.0"
//
// Restricted to a maximum length for 1 MB.
string row_restriction = 2;
}

@ -1,4 +1,4 @@
// Copyright 2020 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -33,6 +33,11 @@ option java_package = "com.google.cloud.bigquery.storage.v1beta1";
// BigQuery storage API.
//
// The BigQuery storage API can be used to read data stored in BigQuery.
//
// The v1beta1 API is not yet officially deprecated, and will go through a full
// deprecation cycle (https://cloud.google.com/products#product-launch-stages)
// before the service is turned down. However, new code should use the v1 API
// going forward.
service BigQueryStorage {
option (google.api.default_host) = "bigquerystorage.googleapis.com";
option (google.api.oauth_scopes) =
@ -49,7 +54,7 @@ service BigQueryStorage {
// reached the end of each stream in the session, then all the data in the
// table has been read.
//
// Read sessions automatically expire 24 hours after they are created and do
// Read sessions automatically expire 6 hours after they are created and do
// not require manual clean-up by the caller.
rpc CreateReadSession(CreateReadSessionRequest) returns (ReadSession) {
option (google.api.http) = {
@ -60,7 +65,8 @@ service BigQueryStorage {
body: "*"
}
};
option (google.api.method_signature) = "table_reference,parent,requested_streams";
option (google.api.method_signature) =
"table_reference,parent,requested_streams";
}
// Reads rows from the table in the format prescribed by the read session.
@ -82,7 +88,8 @@ service BigQueryStorage {
// Creates additional streams for a ReadSession. This API can be used to
// dynamically adjust the parallelism of a batch processing task upwards by
// adding additional workers.
rpc BatchCreateReadSessionStreams(BatchCreateReadSessionStreamsRequest) returns (BatchCreateReadSessionStreamsResponse) {
rpc BatchCreateReadSessionStreams(BatchCreateReadSessionStreamsRequest)
returns (BatchCreateReadSessionStreamsResponse) {
option (google.api.http) = {
post: "/v1beta1/{session.name=projects/*/sessions/*}"
body: "*"
@ -90,7 +97,7 @@ service BigQueryStorage {
option (google.api.method_signature) = "session,requested_streams";
}
// Triggers the graceful termination of a single stream in a ReadSession. This
// Causes a single stream in a ReadSession to gracefully stop. This
// API can be used to dynamically adjust the parallelism of a batch processing
// task downwards without losing data.
//
@ -125,7 +132,8 @@ service BigQueryStorage {
// completion.
//
// This method is guaranteed to be idempotent.
rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) {
rpc SplitReadStream(SplitReadStreamRequest)
returns (SplitReadStreamResponse) {
option (google.api.http) = {
get: "/v1beta1/{original_stream.name=projects/*/streams/*}"
};
@ -193,6 +201,40 @@ message ReadSession {
ShardingStrategy sharding_strategy = 9;
}
// Data format for input or output data.
enum DataFormat {
// Data format is unspecified.
DATA_FORMAT_UNSPECIFIED = 0;
// Avro is a standard open source row based file format.
// See https://avro.apache.org/ for more details.
AVRO = 1;
// Arrow is a standard open source column-based message format.
// See https://arrow.apache.org/ for more details.
ARROW = 3;
}
// Strategy for distributing data among multiple streams in a read session.
enum ShardingStrategy {
// Same as LIQUID.
SHARDING_STRATEGY_UNSPECIFIED = 0;
// Assigns data to each stream based on the client's read rate. The faster the
// client reads from a stream, the more data is assigned to the stream. In
// this strategy, it's possible to read all data from a single stream even if
// there are other streams present.
LIQUID = 1;
// Assigns data to each stream such that roughly the same number of rows can
// be read from each stream. Because the server-side unit for assigning data
// is collections of rows, the API does not guarantee that each stream will
// return the same number or rows. Additionally, the limits are enforced based
// on the number of pre-filtering rows, so some filters can lead to lopsided
// assignments.
BALANCED = 2;
}
// Creates a new read session, which may include additional options such as
// requested parallelism, projection filters and constraints.
message CreateReadSessionRequest {
@ -225,6 +267,7 @@ message CreateReadSessionRequest {
TableReadOptions read_options = 4;
// Data output format. Currently default to Avro.
// DATA_FORMAT_UNSPECIFIED not supported.
DataFormat format = 5;
// The strategy to use for distributing data among multiple streams. Currently
@ -232,38 +275,6 @@ message CreateReadSessionRequest {
ShardingStrategy sharding_strategy = 7;
}
// Data format for input or output data.
enum DataFormat {
// Data format is unspecified.
DATA_FORMAT_UNSPECIFIED = 0;
// Avro is a standard open source row based file format.
// See https://avro.apache.org/ for more details.
AVRO = 1;
ARROW = 3;
}
// Strategy for distributing data among multiple streams in a read session.
enum ShardingStrategy {
// Same as LIQUID.
SHARDING_STRATEGY_UNSPECIFIED = 0;
// Assigns data to each stream based on the client's read rate. The faster the
// client reads from a stream, the more data is assigned to the stream. In
// this strategy, it's possible to read all data from a single stream even if
// there are other streams present.
LIQUID = 1;
// Assigns data to each stream such that roughly the same number of rows can
// be read from each stream. Because the server-side unit for assigning data
// is collections of rows, the API does not guarantee that each stream will
// return the same number or rows. Additionally, the limits are enforced based
// on the number of pre-filtering rows, so some filters can lead to lopsided
// assignments.
BALANCED = 2;
}
// Requesting row data via `ReadRows` must provide Stream position information.
message ReadRowsRequest {
// Required. Identifier of the position in the stream to start reading from.
@ -349,6 +360,19 @@ message ReadRowsResponse {
// Throttling status. If unset, the latest response still describes
// the current throttling status.
ThrottleStatus throttle_status = 5;
// The schema for the read. If read_options.selected_fields is set, the
// schema may be different from the table schema as it will only contain
// the selected fields. This schema is equivalent to the one returned by
// CreateSession. This field is only populated in the first ReadRowsResponse
// RPC.
oneof schema {
// Output only. Avro schema.
AvroSchema avro_schema = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Arrow schema.
ArrowSchema arrow_schema = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
}
}
// Information needed to request additional streams for an established read

@ -1,4 +1,4 @@
// Copyright 2020 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

Loading…
Cancel
Save