Committer: @aribray PiperOrigin-RevId: 373471501pull/653/head
parent
162641cfe5
commit
cc882d10fb
5 changed files with 333 additions and 47 deletions
@ -0,0 +1,30 @@ |
||||
// Copyright 2021 Google LLC |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.cloud.aiplatform.v1beta1.schema.predict.prediction; |
||||
|
||||
import "google/api/annotations.proto"; |
||||
|
||||
option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1beta1/schema/predict/prediction;prediction"; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "TimeSeriesForecastingPredictionResultProto"; |
||||
option java_package = "com.google.cloud.aiplatform.v1beta1.schema.predict.prediction"; |
||||
|
||||
// Prediction output format for Time Series Forecasting. |
||||
message TimeSeriesForecastingPredictionResult { |
||||
// The regression value. |
||||
float value = 1; |
||||
} |
@ -0,0 +1,281 @@ |
||||
// Copyright 2021 Google LLC |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.cloud.aiplatform.v1beta1.schema.trainingjob.definition; |
||||
|
||||
import "google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/export_evaluated_data_items_config.proto"; |
||||
import "google/api/annotations.proto"; |
||||
|
||||
option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1beta1/schema/trainingjob/definition;definition"; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "AutoMLForecastingProto"; |
||||
option java_package = "com.google.cloud.aiplatform.v1beta1.schema.trainingjob.definition"; |
||||
|
||||
// A TrainingJob that trains and uploads an AutoML Forecasting Model. |
||||
message AutoMlForecasting { |
||||
// The input parameters of this TrainingJob. |
||||
AutoMlForecastingInputs inputs = 1; |
||||
|
||||
// The metadata information. |
||||
AutoMlForecastingMetadata metadata = 2; |
||||
} |
||||
|
||||
message AutoMlForecastingInputs { |
||||
message Transformation { |
||||
// Training pipeline will infer the proper transformation based on the |
||||
// statistic of dataset. |
||||
message AutoTransformation { |
||||
string column_name = 1; |
||||
} |
||||
|
||||
// Training pipeline will perform following transformation functions. |
||||
// |
||||
// * The value converted to float32. |
||||
// |
||||
// * The z_score of the value. |
||||
// |
||||
// * log(value+1) when the value is greater than or equal to 0. Otherwise, |
||||
// this transformation is not applied and the value is considered a |
||||
// missing value. |
||||
// |
||||
// * z_score of log(value+1) when the value is greater than or equal to 0. |
||||
// Otherwise, this transformation is not applied and the value is |
||||
// considered a missing value. |
||||
// |
||||
// * A boolean value that indicates whether the value is valid. |
||||
message NumericTransformation { |
||||
string column_name = 1; |
||||
} |
||||
|
||||
// Training pipeline will perform following transformation functions. |
||||
// |
||||
// * The categorical string as is--no change to case, punctuation, |
||||
// spelling, tense, and so on. |
||||
// |
||||
// * Convert the category name to a dictionary lookup index and generate an |
||||
// embedding for each index. |
||||
// |
||||
// * Categories that appear less than 5 times in the training dataset are |
||||
// treated as the "unknown" category. The "unknown" category gets its own |
||||
// special lookup index and resulting embedding. |
||||
message CategoricalTransformation { |
||||
string column_name = 1; |
||||
} |
||||
|
||||
// Training pipeline will perform following transformation functions. |
||||
// |
||||
// * Apply the transformation functions for Numerical columns. |
||||
// |
||||
// * Determine the year, month, day,and weekday. Treat each value from the |
||||
// timestamp as a Categorical column. |
||||
// |
||||
// * Invalid numerical values (for example, values that fall outside of a |
||||
// typical timestamp range, or are extreme values) receive no special |
||||
// treatment and are not removed. |
||||
message TimestampTransformation { |
||||
string column_name = 1; |
||||
|
||||
// The format in which that time field is expressed. The time_format must |
||||
// either be one of: |
||||
// |
||||
// * `unix-seconds` |
||||
// |
||||
// * `unix-milliseconds` |
||||
// |
||||
// * `unix-microseconds` |
||||
// |
||||
// * `unix-nanoseconds` |
||||
// |
||||
// (for respectively number of seconds, milliseconds, microseconds and |
||||
// nanoseconds since start of the Unix epoch); |
||||
// |
||||
// or be written in `strftime` syntax. |
||||
// |
||||
// If time_format is not set, then the |
||||
// default format is RFC 3339 `date-time` format, where |
||||
// `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z) |
||||
string time_format = 2; |
||||
} |
||||
|
||||
// Training pipeline will perform following transformation functions. |
||||
// |
||||
// * The text as is--no change to case, punctuation, spelling, tense, and |
||||
// so on. |
||||
// |
||||
// * Convert the category name to a dictionary lookup index and generate an |
||||
// embedding for each index. |
||||
message TextTransformation { |
||||
string column_name = 1; |
||||
} |
||||
|
||||
// The transformation that the training pipeline will apply to the input |
||||
// columns. |
||||
oneof transformation_detail { |
||||
AutoTransformation auto = 1; |
||||
|
||||
NumericTransformation numeric = 2; |
||||
|
||||
CategoricalTransformation categorical = 3; |
||||
|
||||
TimestampTransformation timestamp = 4; |
||||
|
||||
TextTransformation text = 5; |
||||
} |
||||
} |
||||
|
||||
// A duration of time expressed in time granularity units. |
||||
message Granularity { |
||||
// The time granularity unit of this time period. |
||||
// The supported units are: |
||||
// |
||||
// * "minute" |
||||
// |
||||
// * "hour" |
||||
// |
||||
// * "day" |
||||
// |
||||
// * "week" |
||||
// |
||||
// * "month" |
||||
// |
||||
// * "year" |
||||
string unit = 1; |
||||
|
||||
// The number of granularity_units between data points in the training |
||||
// data. If `granularity_unit` is `minute`, |
||||
// can be 1, 5, 10, 15, or 30. For all other values of `granularity_unit`, |
||||
// must be 1. |
||||
int64 quantity = 2; |
||||
} |
||||
|
||||
// The name of the column that the model is to predict. |
||||
string target_column = 1; |
||||
|
||||
// The name of the column that identifies the time series. |
||||
string time_series_identifier_column = 2; |
||||
|
||||
// The name of the column that identifies time order in the time series. |
||||
string time_column = 3; |
||||
|
||||
// Each transformation will apply transform function to given input column. |
||||
// And the result will be used for training. |
||||
// When creating transformation for BigQuery Struct column, the column should |
||||
// be flattened using "." as the delimiter. |
||||
repeated Transformation transformations = 4; |
||||
|
||||
// Objective function the model is optimizing towards. The training process |
||||
// creates a model that optimizes the value of the objective |
||||
// function over the validation set. |
||||
// |
||||
// The supported optimization objectives: |
||||
// |
||||
// * "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). |
||||
// |
||||
// * "minimize-mae" - Minimize mean-absolute error (MAE). |
||||
// |
||||
// * "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). |
||||
// |
||||
// * "minimize-rmspe" - Minimize root-mean-squared percentage error (RMSPE). |
||||
// |
||||
// * "minimize-wape-mae" - Minimize the combination of weighted absolute |
||||
// percentage error (WAPE) and mean-absolute-error (MAE). |
||||
// |
||||
// * "minimize-quantile-loss" - Minimize the quantile loss at the quantiles |
||||
// defined in `quantiles`. |
||||
string optimization_objective = 5; |
||||
|
||||
// Required. The train budget of creating this model, expressed in milli node |
||||
// hours i.e. 1,000 value in this field means 1 node hour. |
||||
// |
||||
// The training cost of the model will not exceed this budget. The final cost |
||||
// will be attempted to be close to the budget, though may end up being (even) |
||||
// noticeably smaller - at the backend's discretion. This especially may |
||||
// happen when further model training ceases to provide any improvements. |
||||
// |
||||
// If the budget is set to a value known to be insufficient to train a |
||||
// model for the given dataset, the training won't be attempted and |
||||
// will error. |
||||
// |
||||
// The train budget must be between 1,000 and 72,000 milli node hours, |
||||
// inclusive. |
||||
int64 train_budget_milli_node_hours = 6; |
||||
|
||||
// Column name that should be used as the weight column. |
||||
// Higher values in this column give more importance to the row |
||||
// during model training. The column must have numeric values between 0 and |
||||
// 10000 inclusively; 0 means the row is ignored for training. If weight |
||||
// column field is not set, then all rows are assumed to have equal weight |
||||
// of 1. |
||||
string weight_column = 7; |
||||
|
||||
// Column names that should be used as attribute columns. |
||||
// The value of these columns does not vary as a function of time. |
||||
// For example, store ID or item color. |
||||
repeated string time_series_attribute_columns = 19; |
||||
|
||||
// Names of columns that are unavailable when a forecast is requested. |
||||
// This column contains information for the given entity (identified |
||||
// by the time_series_identifier_column) that is unknown before the forecast |
||||
// For example, actual weather on a given day. |
||||
repeated string unavailable_at_forecast_columns = 20; |
||||
|
||||
// Names of columns that are available and provided when a forecast |
||||
// is requested. These columns |
||||
// contain information for the given entity (identified by the |
||||
// time_series_identifier_column column) that is known at forecast. |
||||
// For example, predicted weather for a specific day. |
||||
repeated string available_at_forecast_columns = 21; |
||||
|
||||
// Expected difference in time granularity between rows in the data. |
||||
Granularity data_granularity = 22; |
||||
|
||||
// The amount of time into the future for which forecasted values for the |
||||
// target are returned. Expressed in number of units defined by the |
||||
// `data_granularity` field. |
||||
int64 forecast_horizon = 23; |
||||
|
||||
// The amount of time into the past training and prediction data is used |
||||
// for model training and prediction respectively. Expressed in number of |
||||
// units defined by the `data_granularity` field. |
||||
int64 context_window = 24; |
||||
|
||||
// Configuration for exporting test set predictions to a BigQuery table. If |
||||
// this configuration is absent, then the export is not performed. |
||||
ExportEvaluatedDataItemsConfig export_evaluated_data_items_config = 15; |
||||
|
||||
// Quantiles to use for minimize-quantile-loss `optimization_objective`. Up to |
||||
// 5 quantiles are allowed of values between 0 and 1, exclusive. Required if |
||||
// the value of optimization_objective is minimize-quantile-loss. Represents |
||||
// the percent quantiles to use for that objective. Quantiles must be unique. |
||||
repeated double quantiles = 16; |
||||
|
||||
// Validation options for the data validation component. The available options |
||||
// are: |
||||
// |
||||
// * "fail-pipeline" - default, will validate against the validation and |
||||
// fail the pipeline if it fails. |
||||
// |
||||
// * "ignore-validation" - ignore the results of the validation and continue |
||||
string validation_options = 17; |
||||
} |
||||
|
||||
// Model metadata specific to AutoML Forecasting. |
||||
message AutoMlForecastingMetadata { |
||||
// Output only. The actual training cost of the model, expressed in milli |
||||
// node hours, i.e. 1,000 value in this field means 1 node hour. Guaranteed |
||||
// to not exceed the train budget. |
||||
int64 train_cost_milli_node_hours = 1; |
||||
} |
Loading…
Reference in new issue