parent
b042947963
commit
7e973fab0e
14 changed files with 3741 additions and 0 deletions
@ -0,0 +1,14 @@ |
||||
Stores, processes, explores and shares genomic data. This API implements |
||||
the Global Alliance for Genomics and Health (GA4GH) v0.5.1 API as well as |
||||
several extensions. |
||||
|
||||
The Google Genomics API supports access via both |
||||
[JSON/REST](https://cloud.google.com/genomics/reference/rest) and |
||||
[gRPC](https://cloud.google.com/genomics/reference/rpc). JSON/REST is more |
||||
broadly available and is easier for getting started with Google Genomics; it |
||||
works well for small metadata resources (datasets, variant sets, read group |
||||
sets) and for browsing small genomic regions for datasets of any size. For |
||||
performant bulk data access (reads and variants), use gRPC. |
||||
|
||||
See also an [overview of genomic resources](https://cloud.google.com/genomics/v1/users-guide) |
||||
and an overview of [Genomics on Google Cloud](https://cloud.google.com/genomics/overview). |
@ -0,0 +1,662 @@ |
||||
// Copyright 2016 Google Inc. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.genomics.v1; |
||||
|
||||
import "google/api/annotations.proto"; |
||||
import "google/protobuf/empty.proto"; |
||||
import "google/protobuf/field_mask.proto"; |
||||
import "google/protobuf/struct.proto"; |
||||
import "google/protobuf/wrappers.proto"; |
||||
import "google/rpc/status.proto"; |
||||
|
||||
option cc_enable_arenas = true; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "AnnotationsProto"; |
||||
option java_package = "com.google.genomics.v1"; |
||||
|
||||
|
||||
// This service provides storage and positional retrieval of genomic |
||||
// reference annotations, including variant annotations. |
||||
service AnnotationServiceV1 { |
||||
// Creates a new annotation set. Caller must have WRITE permission for the |
||||
// associated dataset. |
||||
// |
||||
// The following fields are required: |
||||
// |
||||
// * [datasetId][google.genomics.v1.AnnotationSet.dataset_id] |
||||
// * [referenceSetId][google.genomics.v1.AnnotationSet.reference_set_id] |
||||
// |
||||
// All other fields may be optionally specified, unless documented as being |
||||
// server-generated (for example, the `id` field). |
||||
rpc CreateAnnotationSet(CreateAnnotationSetRequest) returns (AnnotationSet) { |
||||
option (google.api.http) = { post: "/v1/annotationsets" body: "annotation_set" }; |
||||
} |
||||
|
||||
// Gets an annotation set. Caller must have READ permission for |
||||
// the associated dataset. |
||||
rpc GetAnnotationSet(GetAnnotationSetRequest) returns (AnnotationSet) { |
||||
option (google.api.http) = { get: "/v1/annotationsets/{annotation_set_id}" }; |
||||
} |
||||
|
||||
// Updates an annotation set. The update must respect all mutability |
||||
// restrictions and other invariants described on the annotation set resource. |
||||
// Caller must have WRITE permission for the associated dataset. |
||||
rpc UpdateAnnotationSet(UpdateAnnotationSetRequest) returns (AnnotationSet) { |
||||
option (google.api.http) = { put: "/v1/annotationsets/{annotation_set_id}" body: "annotation_set" }; |
||||
} |
||||
|
||||
// Deletes an annotation set. Caller must have WRITE permission |
||||
// for the associated annotation set. |
||||
rpc DeleteAnnotationSet(DeleteAnnotationSetRequest) returns (google.protobuf.Empty) { |
||||
option (google.api.http) = { delete: "/v1/annotationsets/{annotation_set_id}" }; |
||||
} |
||||
|
||||
// Searches for annotation sets that match the given criteria. Annotation sets |
||||
// are returned in an unspecified order. This order is consistent, such that |
||||
// two queries for the same content (regardless of page size) yield annotation |
||||
// sets in the same order across their respective streams of paginated |
||||
// responses. Caller must have READ permission for the queried datasets. |
||||
rpc SearchAnnotationSets(SearchAnnotationSetsRequest) returns (SearchAnnotationSetsResponse) { |
||||
option (google.api.http) = { post: "/v1/annotationsets/search" body: "*" }; |
||||
} |
||||
|
||||
// Creates a new annotation. Caller must have WRITE permission |
||||
// for the associated annotation set. |
||||
// |
||||
// The following fields are required: |
||||
// |
||||
// * [annotationSetId][google.genomics.v1.Annotation.annotation_set_id] |
||||
// * [referenceName][google.genomics.v1.Annotation.reference_name] or |
||||
// [referenceId][google.genomics.v1.Annotation.reference_id] |
||||
// |
||||
// ### Transcripts |
||||
// |
||||
// For annotations of type TRANSCRIPT, the following fields of |
||||
// [transcript][google.genomics.v1.Annotation.transcript] must be provided: |
||||
// |
||||
// * [exons.start][google.genomics.v1.Transcript.Exon.start] |
||||
// * [exons.end][google.genomics.v1.Transcript.Exon.end] |
||||
// |
||||
// All other fields may be optionally specified, unless documented as being |
||||
// server-generated (for example, the `id` field). The annotated |
||||
// range must be no longer than 100Mbp (mega base pairs). See the |
||||
// [Annotation resource][google.genomics.v1.Annotation] |
||||
// for additional restrictions on each field. |
||||
rpc CreateAnnotation(CreateAnnotationRequest) returns (Annotation) { |
||||
option (google.api.http) = { post: "/v1/annotations" body: "annotation" }; |
||||
} |
||||
|
||||
// Creates one or more new annotations atomically. All annotations must |
||||
// belong to the same annotation set. Caller must have WRITE |
||||
// permission for this annotation set. For optimal performance, batch |
||||
// positionally adjacent annotations together. |
||||
// |
||||
// If the request has a systemic issue, such as an attempt to write to |
||||
// an inaccessible annotation set, the entire RPC will fail accordingly. For |
||||
// lesser data issues, when possible an error will be isolated to the |
||||
// corresponding batch entry in the response; the remaining well formed |
||||
// annotations will be created normally. |
||||
// |
||||
// For details on the requirements for each individual annotation resource, |
||||
// see |
||||
// [CreateAnnotation][google.genomics.v1.AnnotationServiceV1.CreateAnnotation]. |
||||
rpc BatchCreateAnnotations(BatchCreateAnnotationsRequest) returns (BatchCreateAnnotationsResponse) { |
||||
option (google.api.http) = { post: "/v1/annotations:batchCreate" body: "*" }; |
||||
} |
||||
|
||||
// Gets an annotation. Caller must have READ permission |
||||
// for the associated annotation set. |
||||
rpc GetAnnotation(GetAnnotationRequest) returns (Annotation) { |
||||
option (google.api.http) = { get: "/v1/annotations/{annotation_id}" }; |
||||
} |
||||
|
||||
// Updates an annotation. Caller must have |
||||
// WRITE permission for the associated dataset. |
||||
rpc UpdateAnnotation(UpdateAnnotationRequest) returns (Annotation) { |
||||
option (google.api.http) = { put: "/v1/annotations/{annotation_id}" body: "annotation" }; |
||||
} |
||||
|
||||
// Deletes an annotation. Caller must have WRITE permission for |
||||
// the associated annotation set. |
||||
rpc DeleteAnnotation(DeleteAnnotationRequest) returns (google.protobuf.Empty) { |
||||
option (google.api.http) = { delete: "/v1/annotations/{annotation_id}" }; |
||||
} |
||||
|
||||
// Searches for annotations that match the given criteria. Results are |
||||
// ordered by genomic coordinate (by reference sequence, then position). |
||||
// Annotations with equivalent genomic coordinates are returned in an |
||||
// unspecified order. This order is consistent, such that two queries for the |
||||
// same content (regardless of page size) yield annotations in the same order |
||||
// across their respective streams of paginated responses. Caller must have |
||||
// READ permission for the queried annotation sets. |
||||
rpc SearchAnnotations(SearchAnnotationsRequest) returns (SearchAnnotationsResponse) { |
||||
option (google.api.http) = { post: "/v1/annotations/search" body: "*" }; |
||||
} |
||||
} |
||||
|
||||
// An annotation set is a logical grouping of annotations that share consistent |
||||
// type information and provenance. Examples of annotation sets include 'all |
||||
// genes from refseq', and 'all variant annotations from ClinVar'. |
||||
message AnnotationSet { |
||||
// The server-generated annotation set ID, unique across all annotation sets. |
||||
string id = 1; |
||||
|
||||
// The dataset to which this annotation set belongs. |
||||
string dataset_id = 2; |
||||
|
||||
// The ID of the reference set that defines the coordinate space for this |
||||
// set's annotations. |
||||
string reference_set_id = 3; |
||||
|
||||
// The display name for this annotation set. |
||||
string name = 4; |
||||
|
||||
// The source URI describing the file from which this annotation set was |
||||
// generated, if any. |
||||
string source_uri = 5; |
||||
|
||||
// The type of annotations contained within this set. |
||||
AnnotationType type = 6; |
||||
|
||||
// A map of additional read alignment information. This must be of the form |
||||
// map<string, string[]> (string key mapping to a list of string values). |
||||
map<string, google.protobuf.ListValue> info = 17; |
||||
} |
||||
|
||||
// An annotation describes a region of reference genome. The value of an |
||||
// annotation may be one of several canonical types, supplemented by arbitrary |
||||
// info tags. An annotation is not inherently associated with a specific |
||||
// sample or individual (though a client could choose to use annotations in |
||||
// this way). Example canonical annotation types are `GENE` and |
||||
// `VARIANT`. |
||||
message Annotation { |
||||
// The server-generated annotation ID, unique across all annotations. |
||||
string id = 1; |
||||
|
||||
// The annotation set to which this annotation belongs. |
||||
string annotation_set_id = 2; |
||||
|
||||
// The display name of this annotation. |
||||
string name = 3; |
||||
|
||||
// The ID of the Google Genomics reference associated with this range. |
||||
string reference_id = 4; |
||||
|
||||
// The display name corresponding to the reference specified by |
||||
// `referenceId`, for example `chr1`, `1`, or `chrX`. |
||||
string reference_name = 5; |
||||
|
||||
// The start position of the range on the reference, 0-based inclusive. |
||||
int64 start = 6; |
||||
|
||||
// The end position of the range on the reference, 0-based exclusive. |
||||
int64 end = 7; |
||||
|
||||
// Whether this range refers to the reverse strand, as opposed to the forward |
||||
// strand. Note that regardless of this field, the start/end position of the |
||||
// range always refer to the forward strand. |
||||
bool reverse_strand = 8; |
||||
|
||||
// The data type for this annotation. Must match the containing annotation |
||||
// set's type. |
||||
AnnotationType type = 9; |
||||
|
||||
oneof value { |
||||
// A variant annotation, which describes the effect of a variant on the |
||||
// genome, the coding sequence, and/or higher level consequences at the |
||||
// organism level e.g. pathogenicity. This field is only set for annotations |
||||
// of type `VARIANT`. |
||||
VariantAnnotation variant = 10; |
||||
|
||||
// A transcript value represents the assertion that a particular region of |
||||
// the reference genome may be transcribed as RNA. An alternative splicing |
||||
// pattern would be represented as a separate transcript object. This field |
||||
// is only set for annotations of type `TRANSCRIPT`. |
||||
Transcript transcript = 11; |
||||
} |
||||
|
||||
// A map of additional read alignment information. This must be of the form |
||||
// map<string, string[]> (string key mapping to a list of string values). |
||||
map<string, google.protobuf.ListValue> info = 12; |
||||
} |
||||
|
||||
message VariantAnnotation { |
||||
message ClinicalCondition { |
||||
// A set of names for the condition. |
||||
repeated string names = 1; |
||||
|
||||
// The set of external IDs for this condition. |
||||
repeated ExternalId external_ids = 2; |
||||
|
||||
// The MedGen concept id associated with this gene. |
||||
// Search for these IDs at http://www.ncbi.nlm.nih.gov/medgen/ |
||||
string concept_id = 3; |
||||
|
||||
// The OMIM id for this condition. |
||||
// Search for these IDs at http://omim.org/ |
||||
string omim_id = 4; |
||||
} |
||||
|
||||
enum Type { |
||||
TYPE_UNSPECIFIED = 0; |
||||
|
||||
// `TYPE_OTHER` should be used when no other Type will suffice. |
||||
// Further explanation of the variant type may be included in the |
||||
// [info][google.genomics.v1.Annotation.info] field. |
||||
TYPE_OTHER = 1; |
||||
|
||||
// `INSERTION` indicates an insertion. |
||||
INSERTION = 2; |
||||
|
||||
// `DELETION` indicates a deletion. |
||||
DELETION = 3; |
||||
|
||||
// `SUBSTITUTION` indicates a block substitution of |
||||
// two or more nucleotides. |
||||
SUBSTITUTION = 4; |
||||
|
||||
// `SNP` indicates a single nucleotide polymorphism. |
||||
SNP = 5; |
||||
|
||||
// `STRUCTURAL` indicates a large structural variant, |
||||
// including chromosomal fusions, inversions, etc. |
||||
STRUCTURAL = 6; |
||||
|
||||
// `CNV` indicates a variation in copy number. |
||||
CNV = 7; |
||||
} |
||||
|
||||
enum Effect { |
||||
EFFECT_UNSPECIFIED = 0; |
||||
|
||||
// `EFFECT_OTHER` should be used when no other Effect |
||||
// will suffice. |
||||
EFFECT_OTHER = 1; |
||||
|
||||
// `FRAMESHIFT` indicates a mutation in which the insertion or |
||||
// deletion of nucleotides resulted in a frameshift change. |
||||
FRAMESHIFT = 2; |
||||
|
||||
// `FRAME_PRESERVING_INDEL` indicates a mutation in which a |
||||
// multiple of three nucleotides has been inserted or deleted, resulting |
||||
// in no change to the reading frame of the coding sequence. |
||||
FRAME_PRESERVING_INDEL = 3; |
||||
|
||||
// `SYNONYMOUS_SNP` indicates a single nucleotide polymorphism |
||||
// mutation that results in no amino acid change. |
||||
SYNONYMOUS_SNP = 4; |
||||
|
||||
// `NONSYNONYMOUS_SNP` indicates a single nucleotide |
||||
// polymorphism mutation that results in an amino acid change. |
||||
NONSYNONYMOUS_SNP = 5; |
||||
|
||||
// `STOP_GAIN` indicates a mutation that leads to the creation |
||||
// of a stop codon at the variant site. Frameshift mutations creating |
||||
// downstream stop codons do not count as `STOP_GAIN`. |
||||
STOP_GAIN = 6; |
||||
|
||||
// `STOP_LOSS` indicates a mutation that eliminates a |
||||
// stop codon at the variant site. |
||||
STOP_LOSS = 7; |
||||
|
||||
// `SPLICE_SITE_DISRUPTION` indicates that this variant is |
||||
// found in a splice site for the associated transcript, and alters the |
||||
// normal splicing pattern. |
||||
SPLICE_SITE_DISRUPTION = 8; |
||||
} |
||||
|
||||
enum ClinicalSignificance { |
||||
CLINICAL_SIGNIFICANCE_UNSPECIFIED = 0; |
||||
|
||||
// `OTHER` should be used when no other clinical significance |
||||
// value will suffice. |
||||
CLINICAL_SIGNIFICANCE_OTHER = 1; |
||||
|
||||
UNCERTAIN = 2; |
||||
|
||||
BENIGN = 3; |
||||
|
||||
LIKELY_BENIGN = 4; |
||||
|
||||
LIKELY_PATHOGENIC = 5; |
||||
|
||||
PATHOGENIC = 6; |
||||
|
||||
DRUG_RESPONSE = 7; |
||||
|
||||
HISTOCOMPATIBILITY = 8; |
||||
|
||||
CONFERS_SENSITIVITY = 9; |
||||
|
||||
RISK_FACTOR = 10; |
||||
|
||||
ASSOCIATION = 11; |
||||
|
||||
PROTECTIVE = 12; |
||||
|
||||
// `MULTIPLE_REPORTED` should be used when multiple clinical |
||||
// signficances are reported for a variant. The original clinical |
||||
// significance values may be provided in the `info` field. |
||||
MULTIPLE_REPORTED = 13; |
||||
} |
||||
|
||||
// Type has been adapted from ClinVar's list of variant types. |
||||
Type type = 1; |
||||
|
||||
// Effect of the variant on the coding sequence. |
||||
Effect effect = 2; |
||||
|
||||
// The alternate allele for this variant. If multiple alternate alleles |
||||
// exist at this location, create a separate variant for each one, as they |
||||
// may represent distinct conditions. |
||||
string alternate_bases = 3; |
||||
|
||||
// Google annotation ID of the gene affected by this variant. This should |
||||
// be provided when the variant is created. |
||||
string gene_id = 4; |
||||
|
||||
// Google annotation IDs of the transcripts affected by this variant. These |
||||
// should be provided when the variant is created. |
||||
repeated string transcript_ids = 5; |
||||
|
||||
// The set of conditions associated with this variant. |
||||
// A condition describes the way a variant influences human health. |
||||
repeated ClinicalCondition conditions = 6; |
||||
|
||||
// Describes the clinical significance of a variant. |
||||
// It is adapted from the ClinVar controlled vocabulary for clinical |
||||
// significance described at: |
||||
// http://www.ncbi.nlm.nih.gov/clinvar/docs/clinsig/ |
||||
ClinicalSignificance clinical_significance = 7; |
||||
} |
||||
|
||||
// A transcript represents the assertion that a particular region of the |
||||
// reference genome may be transcribed as RNA. |
||||
message Transcript { |
||||
message Exon { |
||||
// The start position of the exon on this annotation's reference sequence, |
||||
// 0-based inclusive. Note that this is relative to the reference start, and |
||||
// **not** the containing annotation start. |
||||
int64 start = 1; |
||||
|
||||
// The end position of the exon on this annotation's reference sequence, |
||||
// 0-based exclusive. Note that this is relative to the reference start, and |
||||
// *not* the containing annotation start. |
||||
int64 end = 2; |
||||
|
||||
// The frame of this exon. Contains a value of 0, 1, or 2, which indicates |
||||
// the offset of the first coding base of the exon within the reading frame |
||||
// of the coding DNA sequence, if any. This field is dependent on the |
||||
// strandedness of this annotation (see |
||||
// [Annotation.reverse_strand][google.genomics.v1.Annotation.reverse_strand]). |
||||
// For forward stranded annotations, this offset is relative to the |
||||
// [exon.start][google.genomics.v1.Transcript.Exon.start]. For reverse |
||||
// strand annotations, this offset is relative to the |
||||
// [exon.end][google.genomics.v1.Transcript.Exon.end] `- 1`. |
||||
// |
||||
// Unset if this exon does not intersect the coding sequence. Upon creation |
||||
// of a transcript, the frame must be populated for all or none of the |
||||
// coding exons. |
||||
google.protobuf.Int32Value frame = 3; |
||||
} |
||||
|
||||
message CodingSequence { |
||||
// The start of the coding sequence on this annotation's reference sequence, |
||||
// 0-based inclusive. Note that this position is relative to the reference |
||||
// start, and *not* the containing annotation start. |
||||
int64 start = 1; |
||||
|
||||
// The end of the coding sequence on this annotation's reference sequence, |
||||
// 0-based exclusive. Note that this position is relative to the reference |
||||
// start, and *not* the containing annotation start. |
||||
int64 end = 2; |
||||
} |
||||
|
||||
// The annotation ID of the gene from which this transcript is transcribed. |
||||
string gene_id = 1; |
||||
|
||||
// The <a href="http://en.wikipedia.org/wiki/Exon">exons</a> that compose |
||||
// this transcript. This field should be unset for genomes where transcript |
||||
// splicing does not occur, for example prokaryotes. |
||||
// |
||||
// Introns are regions of the transcript that are not included in the |
||||
// spliced RNA product. Though not explicitly modeled here, intron ranges can |
||||
// be deduced; all regions of this transcript that are not exons are introns. |
||||
// |
||||
// Exonic sequences do not necessarily code for a translational product |
||||
// (amino acids). Only the regions of exons bounded by the |
||||
// [codingSequence][google.genomics.v1.Transcript.coding_sequence] correspond |
||||
// to coding DNA sequence. |
||||
// |
||||
// Exons are ordered by start position and may not overlap. |
||||
repeated Exon exons = 2; |
||||
|
||||
// The range of the coding sequence for this transcript, if any. To determine |
||||
// the exact ranges of coding sequence, intersect this range with those of the |
||||
// [exons][google.genomics.v1.Transcript.exons], if any. If there are any |
||||
// [exons][google.genomics.v1.Transcript.exons], the |
||||
// [codingSequence][google.genomics.v1.Transcript.coding_sequence] must start |
||||
// and end within them. |
||||
// |
||||
// Note that in some cases, the reference genome will not exactly match the |
||||
// observed mRNA transcript e.g. due to variance in the source genome from |
||||
// reference. In these cases, |
||||
// [exon.frame][google.genomics.v1.Transcript.Exon.frame] will not necessarily |
||||
// match the expected reference reading frame and coding exon reference bases |
||||
// cannot necessarily be concatenated to produce the original transcript mRNA. |
||||
CodingSequence coding_sequence = 3; |
||||
} |
||||
|
||||
message ExternalId { |
||||
// The name of the source of this data. |
||||
string source_name = 1; |
||||
|
||||
// The id used by the source of this data. |
||||
string id = 2; |
||||
} |
||||
|
||||
message CreateAnnotationSetRequest { |
||||
// The annotation set to create. |
||||
AnnotationSet annotation_set = 1; |
||||
} |
||||
|
||||
message GetAnnotationSetRequest { |
||||
// The ID of the annotation set to be retrieved. |
||||
string annotation_set_id = 1; |
||||
} |
||||
|
||||
message UpdateAnnotationSetRequest { |
||||
// The ID of the annotation set to be updated. |
||||
string annotation_set_id = 1; |
||||
|
||||
// The new annotation set. |
||||
AnnotationSet annotation_set = 2; |
||||
|
||||
// An optional mask specifying which fields to update. Mutable fields are |
||||
// [name][google.genomics.v1.AnnotationSet.name], |
||||
// [source_uri][google.genomics.v1.AnnotationSet.source_uri], and |
||||
// [info][google.genomics.v1.AnnotationSet.info]. If unspecified, all |
||||
// mutable fields will be updated. |
||||
google.protobuf.FieldMask update_mask = 3; |
||||
} |
||||
|
||||
message DeleteAnnotationSetRequest { |
||||
// The ID of the annotation set to be deleted. |
||||
string annotation_set_id = 1; |
||||
} |
||||
|
||||
message SearchAnnotationSetsRequest { |
||||
// Required. The dataset IDs to search within. Caller must have `READ` access |
||||
// to these datasets. |
||||
repeated string dataset_ids = 1; |
||||
|
||||
// If specified, only annotation sets associated with the given reference set |
||||
// are returned. |
||||
string reference_set_id = 2; |
||||
|
||||
// Only return annotations sets for which a substring of the name matches this |
||||
// string (case insensitive). |
||||
string name = 3; |
||||
|
||||
// If specified, only annotation sets that have any of these types are |
||||
// returned. |
||||
repeated AnnotationType types = 4; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// To get the next page of results, set this parameter to the value of |
||||
// `nextPageToken` from the previous response. |
||||
string page_token = 5; |
||||
|
||||
// The maximum number of results to return in a single page. If unspecified, |
||||
// defaults to 128. The maximum value is 1024. |
||||
int32 page_size = 6; |
||||
} |
||||
|
||||
message SearchAnnotationSetsResponse { |
||||
// The matching annotation sets. |
||||
repeated AnnotationSet annotation_sets = 1; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// Provide this value in a subsequent request to return the next page of |
||||
// results. This field will be empty if there aren't any additional results. |
||||
string next_page_token = 2; |
||||
} |
||||
|
||||
message CreateAnnotationRequest { |
||||
// The annotation to be created. |
||||
Annotation annotation = 1; |
||||
} |
||||
|
||||
message BatchCreateAnnotationsRequest { |
||||
// The annotations to be created. At most 4096 can be specified in a single |
||||
// request. |
||||
repeated Annotation annotations = 1; |
||||
} |
||||
|
||||
message BatchCreateAnnotationsResponse { |
||||
message Entry { |
||||
// The creation status. |
||||
google.rpc.Status status = 1; |
||||
|
||||
// The created annotation, if creation was successful. |
||||
Annotation annotation = 2; |
||||
} |
||||
|
||||
// The resulting per-annotation entries, ordered consistently with the |
||||
// original request. |
||||
repeated Entry entries = 1; |
||||
} |
||||
|
||||
message GetAnnotationRequest { |
||||
// The ID of the annotation to be retrieved. |
||||
string annotation_id = 1; |
||||
} |
||||
|
||||
message UpdateAnnotationRequest { |
||||
// The ID of the annotation to be updated. |
||||
string annotation_id = 1; |
||||
|
||||
// The new annotation. |
||||
Annotation annotation = 2; |
||||
|
||||
// An optional mask specifying which fields to update. Mutable fields are |
||||
// [name][google.genomics.v1.Annotation.name], |
||||
// [variant][google.genomics.v1.Annotation.variant], |
||||
// [transcript][google.genomics.v1.Annotation.transcript], and |
||||
// [info][google.genomics.v1.Annotation.info]. If unspecified, all mutable |
||||
// fields will be updated. |
||||
google.protobuf.FieldMask update_mask = 3; |
||||
} |
||||
|
||||
message DeleteAnnotationRequest { |
||||
// The ID of the annotation to be deleted. |
||||
string annotation_id = 1; |
||||
} |
||||
|
||||
message SearchAnnotationsRequest { |
||||
// Required. The annotation sets to search within. The caller must have |
||||
// `READ` access to these annotation sets. |
||||
// All queried annotation sets must have the same type. |
||||
repeated string annotation_set_ids = 1; |
||||
|
||||
// Required. `reference_id` or `reference_name` must be set. |
||||
oneof reference { |
||||
// The ID of the reference to query. |
||||
string reference_id = 2; |
||||
|
||||
// The name of the reference to query, within the reference set associated |
||||
// with this query. |
||||
string reference_name = 3; |
||||
} |
||||
|
||||
// The start position of the range on the reference, 0-based inclusive. If |
||||
// specified, |
||||
// [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or |
||||
// [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name] |
||||
// must be specified. Defaults to 0. |
||||
int64 start = 4; |
||||
|
||||
// The end position of the range on the reference, 0-based exclusive. If |
||||
// [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or |
||||
// [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name] |
||||
// must be specified, Defaults to the length of the reference. |
||||
int64 end = 5; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// To get the next page of results, set this parameter to the value of |
||||
// `nextPageToken` from the previous response. |
||||
string page_token = 6; |
||||
|
||||
// The maximum number of results to return in a single page. If unspecified, |
||||
// defaults to 256. The maximum value is 2048. |
||||
int32 page_size = 7; |
||||
} |
||||
|
||||
message SearchAnnotationsResponse { |
||||
// The matching annotations. |
||||
repeated Annotation annotations = 1; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// Provide this value in a subsequent request to return the next page of |
||||
// results. This field will be empty if there aren't any additional results. |
||||
string next_page_token = 2; |
||||
} |
||||
|
||||
// When an [Annotation][google.genomics.v1.Annotation] or |
||||
// [AnnotationSet][google.genomics.v1.AnnotationSet] is created, if `type` is |
||||
// not specified it will be set to `GENERIC`. |
||||
enum AnnotationType { |
||||
ANNOTATION_TYPE_UNSPECIFIED = 0; |
||||
|
||||
// A `GENERIC` annotation type should be used when no other annotation |
||||
// type will suffice. This represents an untyped annotation of the reference |
||||
// genome. |
||||
GENERIC = 1; |
||||
|
||||
// A `VARIANT` annotation type. |
||||
VARIANT = 2; |
||||
|
||||
// A `GENE` annotation type represents the existence of a gene at the |
||||
// associated reference coordinates. The start coordinate is typically the |
||||
// gene's transcription start site and the end is typically the end of the |
||||
// gene's last exon. |
||||
GENE = 3; |
||||
|
||||
// A `TRANSCRIPT` annotation type represents the assertion that a |
||||
// particular region of the reference genome may be transcribed as RNA. |
||||
TRANSCRIPT = 4; |
||||
} |
@ -0,0 +1,98 @@ |
||||
// Copyright 2016 Google Inc. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.genomics.v1; |
||||
|
||||
import "google/api/annotations.proto"; |
||||
|
||||
option cc_enable_arenas = true; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "CigarProto"; |
||||
option java_package = "com.google.genomics.v1"; |
||||
|
||||
|
||||
// A single CIGAR operation. |
||||
message CigarUnit { |
||||
// Describes the different types of CIGAR alignment operations that exist. |
||||
// Used wherever CIGAR alignments are used. |
||||
enum Operation { |
||||
OPERATION_UNSPECIFIED = 0; |
||||
|
||||
// An alignment match indicates that a sequence can be aligned to the |
||||
// reference without evidence of an INDEL. Unlike the |
||||
// `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, |
||||
// the `ALIGNMENT_MATCH` operator does not indicate whether the |
||||
// reference and read sequences are an exact match. This operator is |
||||
// equivalent to SAM's `M`. |
||||
ALIGNMENT_MATCH = 1; |
||||
|
||||
// The insert operator indicates that the read contains evidence of bases |
||||
// being inserted into the reference. This operator is equivalent to SAM's |
||||
// `I`. |
||||
INSERT = 2; |
||||
|
||||
// The delete operator indicates that the read contains evidence of bases |
||||
// being deleted from the reference. This operator is equivalent to SAM's |
||||
// `D`. |
||||
DELETE = 3; |
||||
|
||||
// The skip operator indicates that this read skips a long segment of the |
||||
// reference, but the bases have not been deleted. This operator is commonly |
||||
// used when working with RNA-seq data, where reads may skip long segments |
||||
// of the reference between exons. This operator is equivalent to SAM's |
||||
// `N`. |
||||
SKIP = 4; |
||||
|
||||
// The soft clip operator indicates that bases at the start/end of a read |
||||
// have not been considered during alignment. This may occur if the majority |
||||
// of a read maps, except for low quality bases at the start/end of a read. |
||||
// This operator is equivalent to SAM's `S`. Bases that are soft |
||||
// clipped will still be stored in the read. |
||||
CLIP_SOFT = 5; |
||||
|
||||
// The hard clip operator indicates that bases at the start/end of a read |
||||
// have been omitted from this alignment. This may occur if this linear |
||||
// alignment is part of a chimeric alignment, or if the read has been |
||||
// trimmed (for example, during error correction or to trim poly-A tails for |
||||
// RNA-seq). This operator is equivalent to SAM's `H`. |
||||
CLIP_HARD = 6; |
||||
|
||||
// The pad operator indicates that there is padding in an alignment. This |
||||
// operator is equivalent to SAM's `P`. |
||||
PAD = 7; |
||||
|
||||
// This operator indicates that this portion of the aligned sequence exactly |
||||
// matches the reference. This operator is equivalent to SAM's `=`. |
||||
SEQUENCE_MATCH = 8; |
||||
|
||||
// This operator indicates that this portion of the aligned sequence is an |
||||
// alignment match to the reference, but a sequence mismatch. This can |
||||
// indicate a SNP or a read error. This operator is equivalent to SAM's |
||||
// `X`. |
||||
SEQUENCE_MISMATCH = 9; |
||||
} |
||||
|
||||
Operation operation = 1; |
||||
|
||||
// The number of genomic bases that the operation runs for. Required. |
||||
int64 operation_length = 2; |
||||
|
||||
// `referenceSequence` is only used at mismatches |
||||
// (`SEQUENCE_MISMATCH`) and deletions (`DELETE`). |
||||
// Filling this field replaces SAM's MD tag. If the relevant information is |
||||
// not available, this field is unset. |
||||
string reference_sequence = 3; |
||||
} |
@ -0,0 +1,211 @@ |
||||
// Copyright 2016 Google Inc. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.genomics.v1; |
||||
|
||||
import "google/api/annotations.proto"; |
||||
import "google/iam/v1/iam_policy.proto"; |
||||
import "google/iam/v1/policy.proto"; |
||||
import "google/protobuf/empty.proto"; |
||||
import "google/protobuf/field_mask.proto"; |
||||
import "google/protobuf/timestamp.proto"; |
||||
|
||||
option cc_enable_arenas = true; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "DatasetsProto"; |
||||
option java_package = "com.google.genomics.v1"; |
||||
|
||||
|
||||
// This service manages datasets, which are collections of genomic data. |
||||
service DatasetServiceV1 { |
||||
// Lists datasets within a project. |
||||
// |
||||
// For the definitions of datasets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc ListDatasets(ListDatasetsRequest) returns (ListDatasetsResponse) { |
||||
option (google.api.http) = { get: "/v1/datasets" }; |
||||
} |
||||
|
||||
// Creates a new dataset. |
||||
// |
||||
// For the definitions of datasets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc CreateDataset(CreateDatasetRequest) returns (Dataset) { |
||||
option (google.api.http) = { post: "/v1/datasets" body: "dataset" }; |
||||
} |
||||
|
||||
// Gets a dataset by ID. |
||||
// |
||||
// For the definitions of datasets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc GetDataset(GetDatasetRequest) returns (Dataset) { |
||||
option (google.api.http) = { get: "/v1/datasets/{dataset_id}" }; |
||||
} |
||||
|
||||
// Updates a dataset. |
||||
// |
||||
// For the definitions of datasets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// This method supports patch semantics. |
||||
rpc UpdateDataset(UpdateDatasetRequest) returns (Dataset) { |
||||
option (google.api.http) = { patch: "/v1/datasets/{dataset_id}" body: "dataset" }; |
||||
} |
||||
|
||||
// Deletes a dataset and all of its contents (all read group sets, |
||||
// reference sets, variant sets, call sets, annotation sets, etc.) |
||||
// This is reversible (up to one week after the deletion) via |
||||
// the |
||||
// [datasets.undelete][google.genomics.v1.DatasetServiceV1.UndeleteDataset] |
||||
// operation. |
||||
// |
||||
// For the definitions of datasets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc DeleteDataset(DeleteDatasetRequest) returns (google.protobuf.Empty) { |
||||
option (google.api.http) = { delete: "/v1/datasets/{dataset_id}" }; |
||||
} |
||||
|
||||
// Undeletes a dataset by restoring a dataset which was deleted via this API. |
||||
// |
||||
// For the definitions of datasets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// This operation is only possible for a week after the deletion occurred. |
||||
rpc UndeleteDataset(UndeleteDatasetRequest) returns (Dataset) { |
||||
option (google.api.http) = { post: "/v1/datasets/{dataset_id}:undelete" body: "*" }; |
||||
} |
||||
|
||||
// Sets the access control policy on the specified dataset. Replaces any |
||||
// existing policy. |
||||
// |
||||
// For the definitions of datasets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// See <a href="/iam/docs/managing-policies#setting_a_policy">Setting a |
||||
// Policy</a> for more information. |
||||
rpc SetIamPolicy(google.iam.v1.SetIamPolicyRequest) returns (google.iam.v1.Policy) { |
||||
option (google.api.http) = { post: "/v1/{resource=datasets/*}:setIamPolicy" body: "*" }; |
||||
} |
||||
|
||||
// Gets the access control policy for the dataset. This is empty if the |
||||
// policy or resource does not exist. |
||||
// |
||||
// See <a href="/iam/docs/managing-policies#getting_a_policy">Getting a |
||||
// Policy</a> for more information. |
||||
// |
||||
// For the definitions of datasets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc GetIamPolicy(google.iam.v1.GetIamPolicyRequest) returns (google.iam.v1.Policy) { |
||||
option (google.api.http) = { post: "/v1/{resource=datasets/*}:getIamPolicy" body: "*" }; |
||||
} |
||||
|
||||
// Returns permissions that a caller has on the specified resource. |
||||
// See <a href="/iam/docs/managing-policies#testing_permissions">Testing |
||||
// Permissions</a> for more information. |
||||
// |
||||
// For the definitions of datasets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc TestIamPermissions(google.iam.v1.TestIamPermissionsRequest) returns (google.iam.v1.TestIamPermissionsResponse) { |
||||
option (google.api.http) = { post: "/v1/{resource=datasets/*}:testIamPermissions" body: "*" }; |
||||
} |
||||
} |
||||
|
||||
// A Dataset is a collection of genomic data. |
||||
// |
||||
// For more genomics resource definitions, see [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
message Dataset { |
||||
// The server-generated dataset ID, unique across all datasets. |
||||
string id = 1; |
||||
|
||||
// The Google Developers Console project ID that this dataset belongs to. |
||||
string project_id = 2; |
||||
|
||||
// The dataset name. |
||||
string name = 3; |
||||
|
||||
// The time this dataset was created, in seconds from the epoch. |
||||
google.protobuf.Timestamp create_time = 4; |
||||
} |
||||
|
||||
// The dataset list request. |
||||
message ListDatasetsRequest { |
||||
// Required. The project to list datasets for. |
||||
string project_id = 1; |
||||
|
||||
// The maximum number of results to return in a single page. If unspecified, |
||||
// defaults to 50. The maximum value is 1024. |
||||
int32 page_size = 2; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// To get the next page of results, set this parameter to the value of |
||||
// `nextPageToken` from the previous response. |
||||
string page_token = 3; |
||||
} |
||||
|
||||
// The dataset list response. |
||||
message ListDatasetsResponse { |
||||
// The list of matching Datasets. |
||||
repeated Dataset datasets = 1; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// Provide this value in a subsequent request to return the next page of |
||||
// results. This field will be empty if there aren't any additional results. |
||||
string next_page_token = 2; |
||||
} |
||||
|
||||
message CreateDatasetRequest { |
||||
// The dataset to be created. Must contain projectId and name. |
||||
Dataset dataset = 1; |
||||
} |
||||
|
||||
message UpdateDatasetRequest { |
||||
// The ID of the dataset to be updated. |
||||
string dataset_id = 1; |
||||
|
||||
// The new dataset data. |
||||
Dataset dataset = 2; |
||||
|
||||
// An optional mask specifying which fields to update. At this time, the only |
||||
// mutable field is [name][google.genomics.v1.Dataset.name]. The only |
||||
// acceptable value is "name". If unspecified, all mutable fields will be |
||||
// updated. |
||||
google.protobuf.FieldMask update_mask = 3; |
||||
} |
||||
|
||||
message DeleteDatasetRequest { |
||||
// The ID of the dataset to be deleted. |
||||
string dataset_id = 1; |
||||
} |
||||
|
||||
message UndeleteDatasetRequest { |
||||
// The ID of the dataset to be undeleted. |
||||
string dataset_id = 1; |
||||
} |
||||
|
||||
message GetDatasetRequest { |
||||
// The ID of the dataset. |
||||
string dataset_id = 1; |
||||
} |
@ -0,0 +1,58 @@ |
||||
// Copyright 2016 Google Inc. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.genomics.v1; |
||||
|
||||
import "google/api/annotations.proto"; |
||||
import "google/protobuf/any.proto"; |
||||
import "google/protobuf/timestamp.proto"; |
||||
|
||||
option cc_enable_arenas = true; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "OperationsProto"; |
||||
option java_package = "com.google.genomics.v1"; |
||||
|
||||
|
||||
// Metadata describing an [Operation][google.longrunning.Operation]. |
||||
message OperationMetadata { |
||||
// The Google Cloud Project in which the job is scoped. |
||||
string project_id = 1; |
||||
|
||||
// The time at which the job was submitted to the Genomics service. |
||||
google.protobuf.Timestamp create_time = 2; |
||||
|
||||
// The time at which the job stopped running. |
||||
google.protobuf.Timestamp end_time = 4; |
||||
|
||||
// The original request that started the operation. Note that this will be in |
||||
// current version of the API. If the operation was started with v1beta2 API |
||||
// and a GetOperation is performed on v1 API, a v1 request will be returned. |
||||
google.protobuf.Any request = 5; |
||||
|
||||
// Optional event messages that were generated during the job's execution. |
||||
// This also contains any warnings that were generated during import |
||||
// or export. |
||||
repeated OperationEvent events = 6; |
||||
|
||||
// Runtime metadata on this Operation. |
||||
google.protobuf.Any runtime_metadata = 8; |
||||
} |
||||
|
||||
// An event that occurred during an [Operation][google.longrunning.Operation]. |
||||
message OperationEvent { |
||||
// Required description of event. |
||||
string description = 3; |
||||
} |
@ -0,0 +1,41 @@ |
||||
// Copyright 2016 Google Inc. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.genomics.v1; |
||||
|
||||
import "google/api/annotations.proto"; |
||||
|
||||
option cc_enable_arenas = true; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "PositionProto"; |
||||
option java_package = "com.google.genomics.v1"; |
||||
|
||||
|
||||
// An abstraction for referring to a genomic position, in relation to some |
||||
// already known reference. For now, represents a genomic position as a |
||||
// reference name, a base number on that reference (0-based), and a |
||||
// determination of forward or reverse strand. |
||||
message Position { |
||||
// The name of the reference in whatever reference set is being used. |
||||
string reference_name = 1; |
||||
|
||||
// The 0-based offset from the start of the forward strand for that reference. |
||||
int64 position = 2; |
||||
|
||||
// Whether this position is on the reverse strand, as opposed to the forward |
||||
// strand. |
||||
bool reverse_strand = 3; |
||||
} |
@ -0,0 +1,38 @@ |
||||
// Copyright 2016 Google Inc. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.genomics.v1; |
||||
|
||||
import "google/api/annotations.proto"; |
||||
|
||||
option cc_enable_arenas = true; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "RangeProto"; |
||||
option java_package = "com.google.genomics.v1"; |
||||
|
||||
|
||||
// A 0-based half-open genomic coordinate range for search requests. |
||||
message Range { |
||||
// The reference sequence name, for example `chr1`, |
||||
// `1`, or `chrX`. |
||||
string reference_name = 1; |
||||
|
||||
// The start position of the range on the reference, 0-based inclusive. |
||||
int64 start = 2; |
||||
|
||||
// The end position of the range on the reference, 0-based exclusive. |
||||
int64 end = 3; |
||||
} |
@ -0,0 +1,220 @@ |
||||
// Copyright 2016 Google Inc. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.genomics.v1; |
||||
|
||||
import "google/api/annotations.proto"; |
||||
import "google/genomics/v1/cigar.proto"; |
||||
import "google/genomics/v1/position.proto"; |
||||
import "google/protobuf/struct.proto"; |
||||
|
||||
option cc_enable_arenas = true; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "ReadAlignmentProto"; |
||||
option java_package = "com.google.genomics.v1"; |
||||
|
||||
|
||||
// A linear alignment can be represented by one CIGAR string. Describes the |
||||
// mapped position and local alignment of the read to the reference. |
||||
message LinearAlignment { |
||||
// The position of this alignment. |
||||
Position position = 1; |
||||
|
||||
// The mapping quality of this alignment. Represents how likely |
||||
// the read maps to this position as opposed to other locations. |
||||
// |
||||
// Specifically, this is -10 log10 Pr(mapping position is wrong), rounded to |
||||
// the nearest integer. |
||||
int32 mapping_quality = 2; |
||||
|
||||
// Represents the local alignment of this sequence (alignment matches, indels, |
||||
// etc) against the reference. |
||||
repeated CigarUnit cigar = 3; |
||||
} |
||||
|
||||
// A read alignment describes a linear alignment of a string of DNA to a |
||||
// [reference sequence][google.genomics.v1.Reference], in addition to metadata |
||||
// about the fragment (the molecule of DNA sequenced) and the read (the bases |
||||
// which were read by the sequencer). A read is equivalent to a line in a SAM |
||||
// file. A read belongs to exactly one read group and exactly one |
||||
// [read group set][google.genomics.v1.ReadGroupSet]. |
||||
// |
||||
// For more genomics resource definitions, see [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// ### Reverse-stranded reads |
||||
// |
||||
// Mapped reads (reads having a non-null `alignment`) can be aligned to either |
||||
// the forward or the reverse strand of their associated reference. Strandedness |
||||
// of a mapped read is encoded by `alignment.position.reverseStrand`. |
||||
// |
||||
// If we consider the reference to be a forward-stranded coordinate space of |
||||
// `[0, reference.length)` with `0` as the left-most position and |
||||
// `reference.length` as the right-most position, reads are always aligned left |
||||
// to right. That is, `alignment.position.position` always refers to the |
||||
// left-most reference coordinate and `alignment.cigar` describes the alignment |
||||
// of this read to the reference from left to right. All per-base fields such as |
||||
// `alignedSequence` and `alignedQuality` share this same left-to-right |
||||
// orientation; this is true of reads which are aligned to either strand. For |
||||
// reverse-stranded reads, this means that `alignedSequence` is the reverse |
||||
// complement of the bases that were originally reported by the sequencing |
||||
// machine. |
||||
// |
||||
// ### Generating a reference-aligned sequence string |
||||
// |
||||
// When interacting with mapped reads, it's often useful to produce a string |
||||
// representing the local alignment of the read to reference. The following |
||||
// pseudocode demonstrates one way of doing this: |
||||
// |
||||
// out = "" |
||||
// offset = 0 |
||||
// for c in read.alignment.cigar { |
||||
// switch c.operation { |
||||
// case "ALIGNMENT_MATCH", "SEQUENCE_MATCH", "SEQUENCE_MISMATCH": |
||||
// out += read.alignedSequence[offset:offset+c.operationLength] |
||||
// offset += c.operationLength |
||||
// break |
||||
// case "CLIP_SOFT", "INSERT": |
||||
// offset += c.operationLength |
||||
// break |
||||
// case "PAD": |
||||
// out += repeat("*", c.operationLength) |
||||
// break |
||||
// case "DELETE": |
||||
// out += repeat("-", c.operationLength) |
||||
// break |
||||
// case "SKIP": |
||||
// out += repeat(" ", c.operationLength) |
||||
// break |
||||
// case "CLIP_HARD": |
||||
// break |
||||
// } |
||||
// } |
||||
// return out |
||||
// |
||||
// ### Converting to SAM's CIGAR string |
||||
// |
||||
// The following pseudocode generates a SAM CIGAR string from the |
||||
// `cigar` field. Note that this is a lossy conversion |
||||
// (`cigar.referenceSequence` is lost). |
||||
// |
||||
// cigarMap = { |
||||
// "ALIGNMENT_MATCH": "M", |
||||
// "INSERT": "I", |
||||
// "DELETE": "D", |
||||
// "SKIP": "N", |
||||
// "CLIP_SOFT": "S", |
||||
// "CLIP_HARD": "H", |
||||
// "PAD": "P", |
||||
// "SEQUENCE_MATCH": "=", |
||||
// "SEQUENCE_MISMATCH": "X", |
||||
// } |
||||
// cigarStr = "" |
||||
// for c in read.alignment.cigar { |
||||
// cigarStr += c.operationLength + cigarMap[c.operation] |
||||
// } |
||||
// return cigarStr |
||||
message Read { |
||||
// The server-generated read ID, unique across all reads. This is different |
||||
// from the `fragmentName`. |
||||
string id = 1; |
||||
|
||||
// The ID of the read group this read belongs to. A read belongs to exactly |
||||
// one read group. This is a server-generated ID which is distinct from SAM's |
||||
// RG tag (for that value, see |
||||
// [ReadGroup.name][google.genomics.v1.ReadGroup.name]). |
||||
string read_group_id = 2; |
||||
|
||||
// The ID of the read group set this read belongs to. A read belongs to |
||||
// exactly one read group set. |
||||
string read_group_set_id = 3; |
||||
|
||||
// The fragment name. Equivalent to QNAME (query template name) in SAM. |
||||
string fragment_name = 4; |
||||
|
||||
// The orientation and the distance between reads from the fragment are |
||||
// consistent with the sequencing protocol (SAM flag 0x2). |
||||
bool proper_placement = 5; |
||||
|
||||
// The fragment is a PCR or optical duplicate (SAM flag 0x400). |
||||
bool duplicate_fragment = 6; |
||||
|
||||
// The observed length of the fragment, equivalent to TLEN in SAM. |
||||
int32 fragment_length = 7; |
||||
|
||||
// The read number in sequencing. 0-based and less than numberReads. This |
||||
// field replaces SAM flag 0x40 and 0x80. |
||||
int32 read_number = 8; |
||||
|
||||
// The number of reads in the fragment (extension to SAM flag 0x1). |
||||
int32 number_reads = 9; |
||||
|
||||
// Whether this read did not pass filters, such as platform or vendor quality |
||||
// controls (SAM flag 0x200). |
||||
bool failed_vendor_quality_checks = 10; |
||||
|
||||
// The linear alignment for this alignment record. This field is null for |
||||
// unmapped reads. |
||||
LinearAlignment alignment = 11; |
||||
|
||||
// Whether this alignment is secondary. Equivalent to SAM flag 0x100. |
||||
// A secondary alignment represents an alternative to the primary alignment |
||||
// for this read. Aligners may return secondary alignments if a read can map |
||||
// ambiguously to multiple coordinates in the genome. By convention, each read |
||||
// has one and only one alignment where both `secondaryAlignment` |
||||
// and `supplementaryAlignment` are false. |
||||
bool secondary_alignment = 12; |
||||
|
||||
// Whether this alignment is supplementary. Equivalent to SAM flag 0x800. |
||||
// Supplementary alignments are used in the representation of a chimeric |
||||
// alignment. In a chimeric alignment, a read is split into multiple |
||||
// linear alignments that map to different reference contigs. The first |
||||
// linear alignment in the read will be designated as the representative |
||||
// alignment; the remaining linear alignments will be designated as |
||||
// supplementary alignments. These alignments may have different mapping |
||||
// quality scores. In each linear alignment in a chimeric alignment, the read |
||||
// will be hard clipped. The `alignedSequence` and |
||||
// `alignedQuality` fields in the alignment record will only |
||||
// represent the bases for its respective linear alignment. |
||||
bool supplementary_alignment = 13; |
||||
|
||||
// The bases of the read sequence contained in this alignment record, |
||||
// **without CIGAR operations applied** (equivalent to SEQ in SAM). |
||||
// `alignedSequence` and `alignedQuality` may be |
||||
// shorter than the full read sequence and quality. This will occur if the |
||||
// alignment is part of a chimeric alignment, or if the read was trimmed. When |
||||
// this occurs, the CIGAR for this read will begin/end with a hard clip |
||||
// operator that will indicate the length of the excised sequence. |
||||
string aligned_sequence = 14; |
||||
|
||||
// The quality of the read sequence contained in this alignment record |
||||
// (equivalent to QUAL in SAM). |
||||
// `alignedSequence` and `alignedQuality` may be shorter than the full read |
||||
// sequence and quality. This will occur if the alignment is part of a |
||||
// chimeric alignment, or if the read was trimmed. When this occurs, the CIGAR |
||||
// for this read will begin/end with a hard clip operator that will indicate |
||||
// the length of the excised sequence. |
||||
repeated int32 aligned_quality = 15; |
||||
|
||||
// The mapping of the primary alignment of the |
||||
// `(readNumber+1)%numberReads` read in the fragment. It replaces |
||||
// mate position and mate strand in SAM. |
||||
Position next_mate_position = 16; |
||||
|
||||
// A map of additional read alignment information. This must be of the form |
||||
// map<string, string[]> (string key mapping to a list of string values). |
||||
map<string, google.protobuf.ListValue> info = 17; |
||||
} |
@ -0,0 +1,105 @@ |
||||
// Copyright 2016 Google Inc. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.genomics.v1; |
||||
|
||||
import "google/api/annotations.proto"; |
||||
import "google/protobuf/struct.proto"; |
||||
|
||||
option cc_enable_arenas = true; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "ReadGroupProto"; |
||||
option java_package = "com.google.genomics.v1"; |
||||
|
||||
|
||||
// A read group is all the data that's processed the same way by the sequencer. |
||||
message ReadGroup { |
||||
message Experiment { |
||||
// A client-supplied library identifier; a library is a collection of DNA |
||||
// fragments which have been prepared for sequencing from a sample. This |
||||
// field is important for quality control as error or bias can be introduced |
||||
// during sample preparation. |
||||
string library_id = 1; |
||||
|
||||
// The platform unit used as part of this experiment, for example |
||||
// flowcell-barcode.lane for Illumina or slide for SOLiD. Corresponds to the |
||||
// @RG PU field in the SAM spec. |
||||
string platform_unit = 2; |
||||
|
||||
// The sequencing center used as part of this experiment. |
||||
string sequencing_center = 3; |
||||
|
||||
// The instrument model used as part of this experiment. This maps to |
||||
// sequencing technology in the SAM spec. |
||||
string instrument_model = 4; |
||||
} |
||||
|
||||
message Program { |
||||
// The command line used to run this program. |
||||
string command_line = 1; |
||||
|
||||
// The user specified locally unique ID of the program. Used along with |
||||
// `prevProgramId` to define an ordering between programs. |
||||
string id = 2; |
||||
|
||||
// The display name of the program. This is typically the colloquial name of |
||||
// the tool used, for example 'bwa' or 'picard'. |
||||
string name = 3; |
||||
|
||||
// The ID of the program run before this one. |
||||
string prev_program_id = 4; |
||||
|
||||
// The version of the program run. |
||||
string version = 5; |
||||
} |
||||
|
||||
// The server-generated read group ID, unique for all read groups. |
||||
// Note: This is different than the @RG ID field in the SAM spec. For that |
||||
// value, see [name][google.genomics.v1.ReadGroup.name]. |
||||
string id = 1; |
||||
|
||||
// The dataset to which this read group belongs. |
||||
string dataset_id = 2; |
||||
|
||||
// The read group name. This corresponds to the @RG ID field in the SAM spec. |
||||
string name = 3; |
||||
|
||||
// A free-form text description of this read group. |
||||
string description = 4; |
||||
|
||||
// A client-supplied sample identifier for the reads in this read group. |
||||
string sample_id = 5; |
||||
|
||||
// The experiment used to generate this read group. |
||||
Experiment experiment = 6; |
||||
|
||||
// The predicted insert size of this read group. The insert size is the length |
||||
// the sequenced DNA fragment from end-to-end, not including the adapters. |
||||
int32 predicted_insert_size = 7; |
||||
|
||||
// The programs used to generate this read group. Programs are always |
||||
// identical for all read groups within a read group set. For this reason, |
||||
// only the first read group in a returned set will have this field |
||||
// populated. |
||||
repeated Program programs = 10; |
||||
|
||||
// The reference set the reads in this read group are aligned to. |
||||
string reference_set_id = 11; |
||||
|
||||
// A map of additional read group information. This must be of the form |
||||
// map<string, string[]> (string key mapping to a list of string values). |
||||
map<string, google.protobuf.ListValue> info = 12; |
||||
} |
@ -0,0 +1,63 @@ |
||||
// Copyright 2016 Google Inc. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.genomics.v1; |
||||
|
||||
import "google/api/annotations.proto"; |
||||
import "google/genomics/v1/readgroup.proto"; |
||||
import "google/protobuf/struct.proto"; |
||||
|
||||
option cc_enable_arenas = true; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "ReadGroupSetProto"; |
||||
option java_package = "com.google.genomics.v1"; |
||||
|
||||
|
||||
// A read group set is a logical collection of read groups, which are |
||||
// collections of reads produced by a sequencer. A read group set typically |
||||
// models reads corresponding to one sample, sequenced one way, and aligned one |
||||
// way. |
||||
// |
||||
// * A read group set belongs to one dataset. |
||||
// * A read group belongs to one read group set. |
||||
// * A read belongs to one read group. |
||||
// |
||||
// For more genomics resource definitions, see [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
message ReadGroupSet { |
||||
// The server-generated read group set ID, unique for all read group sets. |
||||
string id = 1; |
||||
|
||||
// The dataset to which this read group set belongs. |
||||
string dataset_id = 2; |
||||
|
||||
// The reference set to which the reads in this read group set are aligned. |
||||
string reference_set_id = 3; |
||||
|
||||
// The read group set name. By default this will be initialized to the sample |
||||
// name of the sequenced data contained in this set. |
||||
string name = 4; |
||||
|
||||
// The filename of the original source file for this read group set, if any. |
||||
string filename = 5; |
||||
|
||||
// The read groups in this set. There are typically 1-10 read groups in a read |
||||
// group set. |
||||
repeated ReadGroup read_groups = 6; |
||||
|
||||
// A map of additional read group set information. |
||||
map<string, google.protobuf.ListValue> info = 7; |
||||
} |
@ -0,0 +1,461 @@ |
||||
// Copyright 2016 Google Inc. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.genomics.v1; |
||||
|
||||
import "google/api/annotations.proto"; |
||||
import "google/genomics/v1/range.proto"; |
||||
import "google/genomics/v1/readalignment.proto"; |
||||
import "google/genomics/v1/readgroupset.proto"; |
||||
import "google/longrunning/operations.proto"; |
||||
import "google/protobuf/empty.proto"; |
||||
import "google/protobuf/field_mask.proto"; |
||||
|
||||
option cc_enable_arenas = true; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "ReadsProto"; |
||||
option java_package = "com.google.genomics.v1"; |
||||
|
||||
|
||||
service StreamingReadService { |
||||
// Returns a stream of all the reads matching the search request, ordered |
||||
// by reference name, position, and ID. |
||||
rpc StreamReads(StreamReadsRequest) returns (stream StreamReadsResponse) { |
||||
option (google.api.http) = { post: "/v1/reads:stream" body: "*" }; |
||||
} |
||||
} |
||||
|
||||
// The Readstore. A data store for DNA sequencing Reads. |
||||
// |
||||
service ReadServiceV1 { |
||||
// Creates read group sets by asynchronously importing the provided |
||||
// information. |
||||
// |
||||
// For the definitions of read group sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// The caller must have WRITE permissions to the dataset. |
||||
// |
||||
// ## Notes on [BAM](https://samtools.github.io/hts-specs/SAMv1.pdf) import |
||||
// |
||||
// - Tags will be converted to strings - tag types are not preserved |
||||
// - Comments (`@CO`) in the input file header will not be preserved |
||||
// - Original header order of references (`@SQ`) will not be preserved |
||||
// - Any reverse stranded unmapped reads will be reverse complemented, and |
||||
// their qualities (also the "BQ" and "OQ" tags, if any) will be reversed |
||||
// - Unmapped reads will be stripped of positional information (reference name |
||||
// and position) |
||||
rpc ImportReadGroupSets(ImportReadGroupSetsRequest) returns (google.longrunning.Operation) { |
||||
option (google.api.http) = { post: "/v1/readgroupsets:import" body: "*" }; |
||||
} |
||||
|
||||
// Exports a read group set to a BAM file in Google Cloud Storage. |
||||
// |
||||
// For the definitions of read group sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// Note that currently there may be some differences between exported BAM |
||||
// files and the original BAM file at the time of import. See |
||||
// [ImportReadGroupSets](google.genomics.v1.ReadServiceV1.ImportReadGroupSets) |
||||
// for caveats. |
||||
rpc ExportReadGroupSet(ExportReadGroupSetRequest) returns (google.longrunning.Operation) { |
||||
option (google.api.http) = { post: "/v1/readgroupsets/{read_group_set_id}:export" body: "*" }; |
||||
} |
||||
|
||||
// Searches for read group sets matching the criteria. |
||||
// |
||||
// For the definitions of read group sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// Implements |
||||
// [GlobalAllianceApi.searchReadGroupSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L135). |
||||
rpc SearchReadGroupSets(SearchReadGroupSetsRequest) returns (SearchReadGroupSetsResponse) { |
||||
option (google.api.http) = { post: "/v1/readgroupsets/search" body: "*" }; |
||||
} |
||||
|
||||
// Updates a read group set. |
||||
// |
||||
// For the definitions of read group sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// This method supports patch semantics. |
||||
rpc UpdateReadGroupSet(UpdateReadGroupSetRequest) returns (ReadGroupSet) { |
||||
option (google.api.http) = { patch: "/v1/readgroupsets/{read_group_set_id}" body: "read_group_set" }; |
||||
} |
||||
|
||||
// Deletes a read group set. |
||||
// |
||||
// For the definitions of read group sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc DeleteReadGroupSet(DeleteReadGroupSetRequest) returns (google.protobuf.Empty) { |
||||
option (google.api.http) = { delete: "/v1/readgroupsets/{read_group_set_id}" }; |
||||
} |
||||
|
||||
// Gets a read group set by ID. |
||||
// |
||||
// For the definitions of read group sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc GetReadGroupSet(GetReadGroupSetRequest) returns (ReadGroupSet) { |
||||
option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}" }; |
||||
} |
||||
|
||||
// Lists fixed width coverage buckets for a read group set, each of which |
||||
// correspond to a range of a reference sequence. Each bucket summarizes |
||||
// coverage information across its corresponding genomic range. |
||||
// |
||||
// For the definitions of read group sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// Coverage is defined as the number of reads which are aligned to a given |
||||
// base in the reference sequence. Coverage buckets are available at several |
||||
// precomputed bucket widths, enabling retrieval of various coverage 'zoom |
||||
// levels'. The caller must have READ permissions for the target read group |
||||
// set. |
||||
rpc ListCoverageBuckets(ListCoverageBucketsRequest) returns (ListCoverageBucketsResponse) { |
||||
option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}/coveragebuckets" }; |
||||
} |
||||
|
||||
// Gets a list of reads for one or more read group sets. |
||||
// |
||||
// For the definitions of read group sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// Reads search operates over a genomic coordinate space of reference sequence |
||||
// & position defined over the reference sequences to which the requested |
||||
// read group sets are aligned. |
||||
// |
||||
// If a target positional range is specified, search returns all reads whose |
||||
// alignment to the reference genome overlap the range. A query which |
||||
// specifies only read group set IDs yields all reads in those read group |
||||
// sets, including unmapped reads. |
||||
// |
||||
// All reads returned (including reads on subsequent pages) are ordered by |
||||
// genomic coordinate (by reference sequence, then position). Reads with |
||||
// equivalent genomic coordinates are returned in an unspecified order. This |
||||
// order is consistent, such that two queries for the same content (regardless |
||||
// of page size) yield reads in the same order across their respective streams |
||||
// of paginated responses. |
||||
// |
||||
// Implements |
||||
// [GlobalAllianceApi.searchReads](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L85). |
||||
rpc SearchReads(SearchReadsRequest) returns (SearchReadsResponse) { |
||||
option (google.api.http) = { post: "/v1/reads/search" body: "*" }; |
||||
} |
||||
} |
||||
|
||||
// The read group set search request. |
||||
message SearchReadGroupSetsRequest { |
||||
// Restricts this query to read group sets within the given datasets. At least |
||||
// one ID must be provided. |
||||
repeated string dataset_ids = 1; |
||||
|
||||
// Only return read group sets for which a substring of the name matches this |
||||
// string. |
||||
string name = 3; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// To get the next page of results, set this parameter to the value of |
||||
// `nextPageToken` from the previous response. |
||||
string page_token = 2; |
||||
|
||||
// The maximum number of results to return in a single page. If unspecified, |
||||
// defaults to 256. The maximum value is 1024. |
||||
int32 page_size = 4; |
||||
} |
||||
|
||||
// The read group set search response. |
||||
message SearchReadGroupSetsResponse { |
||||
// The list of matching read group sets. |
||||
repeated ReadGroupSet read_group_sets = 1; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// Provide this value in a subsequent request to return the next page of |
||||
// results. This field will be empty if there aren't any additional results. |
||||
string next_page_token = 2; |
||||
} |
||||
|
||||
// The read group set import request. |
||||
message ImportReadGroupSetsRequest { |
||||
enum PartitionStrategy { |
||||
PARTITION_STRATEGY_UNSPECIFIED = 0; |
||||
|
||||
// In most cases, this strategy yields one read group set per file. This is |
||||
// the default behavior. |
||||
// |
||||
// Allocate one read group set per file per sample. For BAM files, read |
||||
// groups are considered to share a sample if they have identical sample |
||||
// names. Furthermore, all reads for each file which do not belong to a read |
||||
// group, if any, will be grouped into a single read group set per-file. |
||||
PER_FILE_PER_SAMPLE = 1; |
||||
|
||||
// Includes all read groups in all imported files into a single read group |
||||
// set. Requires that the headers for all imported files are equivalent. All |
||||
// reads which do not belong to a read group, if any, will be grouped into a |
||||
// separate read group set. |
||||
MERGE_ALL = 2; |
||||
} |
||||
|
||||
// Required. The ID of the dataset these read group sets will belong to. The |
||||
// caller must have WRITE permissions to this dataset. |
||||
string dataset_id = 1; |
||||
|
||||
// The reference set to which the imported read group sets are aligned to, if |
||||
// any. The reference names of this reference set must be a superset of those |
||||
// found in the imported file headers. If no reference set id is provided, a |
||||
// best effort is made to associate with a matching reference set. |
||||
string reference_set_id = 4; |
||||
|
||||
// A list of URIs pointing at [BAM |
||||
// files](https://samtools.github.io/hts-specs/SAMv1.pdf) |
||||
// in Google Cloud Storage. |
||||
repeated string source_uris = 2; |
||||
|
||||
// The partition strategy describes how read groups are partitioned into read |
||||
// group sets. |
||||
PartitionStrategy partition_strategy = 5; |
||||
} |
||||
|
||||
// The read group set import response. |
||||
message ImportReadGroupSetsResponse { |
||||
// IDs of the read group sets that were created. |
||||
repeated string read_group_set_ids = 1; |
||||
} |
||||
|
||||
// The read group set export request. |
||||
message ExportReadGroupSetRequest { |
||||
// Required. The Google Developers Console project ID that owns this |
||||
// export. The caller must have WRITE access to this project. |
||||
string project_id = 1; |
||||
|
||||
// Required. A Google Cloud Storage URI for the exported BAM file. |
||||
// The currently authenticated user must have write access to the new file. |
||||
// An error will be returned if the URI already contains data. |
||||
string export_uri = 2; |
||||
|
||||
// Required. The ID of the read group set to export. The caller must have |
||||
// READ access to this read group set. |
||||
string read_group_set_id = 3; |
||||
|
||||
// The reference names to export. If this is not specified, all reference |
||||
// sequences, including unmapped reads, are exported. |
||||
// Use `*` to export only unmapped reads. |
||||
repeated string reference_names = 4; |
||||
} |
||||
|
||||
message UpdateReadGroupSetRequest { |
||||
// The ID of the read group set to be updated. The caller must have WRITE |
||||
// permissions to the dataset associated with this read group set. |
||||
string read_group_set_id = 1; |
||||
|
||||
// The new read group set data. See `updateMask` for details on mutability of |
||||
// fields. |
||||
ReadGroupSet read_group_set = 2; |
||||
|
||||
// An optional mask specifying which fields to update. Supported fields: |
||||
// |
||||
// * [name][google.genomics.v1.ReadGroupSet.name]. |
||||
// * [referenceSetId][google.genomics.v1.ReadGroupSet.reference_set_id]. |
||||
// |
||||
// Leaving `updateMask` unset is equivalent to specifying all mutable |
||||
// fields. |
||||
google.protobuf.FieldMask update_mask = 3; |
||||
} |
||||
|
||||
message DeleteReadGroupSetRequest { |
||||
// The ID of the read group set to be deleted. The caller must have WRITE |
||||
// permissions to the dataset associated with this read group set. |
||||
string read_group_set_id = 1; |
||||
} |
||||
|
||||
message GetReadGroupSetRequest { |
||||
// The ID of the read group set. |
||||
string read_group_set_id = 1; |
||||
} |
||||
|
||||
message ListCoverageBucketsRequest { |
||||
// Required. The ID of the read group set over which coverage is requested. |
||||
string read_group_set_id = 1; |
||||
|
||||
// The name of the reference to query, within the reference set associated |
||||
// with this query. Optional. |
||||
string reference_name = 3; |
||||
|
||||
// The start position of the range on the reference, 0-based inclusive. If |
||||
// specified, `referenceName` must also be specified. Defaults to 0. |
||||
int64 start = 4; |
||||
|
||||
// The end position of the range on the reference, 0-based exclusive. If |
||||
// specified, `referenceName` must also be specified. If unset or 0, defaults |
||||
// to the length of the reference. |
||||
int64 end = 5; |
||||
|
||||
// The desired width of each reported coverage bucket in base pairs. This |
||||
// will be rounded down to the nearest precomputed bucket width; the value |
||||
// of which is returned as `bucketWidth` in the response. Defaults |
||||
// to infinity (each bucket spans an entire reference sequence) or the length |
||||
// of the target range, if specified. The smallest precomputed |
||||
// `bucketWidth` is currently 2048 base pairs; this is subject to |
||||
// change. |
||||
int64 target_bucket_width = 6; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// To get the next page of results, set this parameter to the value of |
||||
// `nextPageToken` from the previous response. |
||||
string page_token = 7; |
||||
|
||||
// The maximum number of results to return in a single page. If unspecified, |
||||
// defaults to 1024. The maximum value is 2048. |
||||
int32 page_size = 8; |
||||
} |
||||
|
||||
// A bucket over which read coverage has been precomputed. A bucket corresponds |
||||
// to a specific range of the reference sequence. |
||||
message CoverageBucket { |
||||
// The genomic coordinate range spanned by this bucket. |
||||
Range range = 1; |
||||
|
||||
// The average number of reads which are aligned to each individual |
||||
// reference base in this bucket. |
||||
float mean_coverage = 2; |
||||
} |
||||
|
||||
message ListCoverageBucketsResponse { |
||||
// The length of each coverage bucket in base pairs. Note that buckets at the |
||||
// end of a reference sequence may be shorter. This value is omitted if the |
||||
// bucket width is infinity (the default behaviour, with no range or |
||||
// `targetBucketWidth`). |
||||
int64 bucket_width = 1; |
||||
|
||||
// The coverage buckets. The list of buckets is sparse; a bucket with 0 |
||||
// overlapping reads is not returned. A bucket never crosses more than one |
||||
// reference sequence. Each bucket has width `bucketWidth`, unless |
||||
// its end is the end of the reference sequence. |
||||
repeated CoverageBucket coverage_buckets = 2; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// Provide this value in a subsequent request to return the next page of |
||||
// results. This field will be empty if there aren't any additional results. |
||||
string next_page_token = 3; |
||||
} |
||||
|
||||
// The read search request. |
||||
message SearchReadsRequest { |
||||
// The IDs of the read groups sets within which to search for reads. All |
||||
// specified read group sets must be aligned against a common set of reference |
||||
// sequences; this defines the genomic coordinates for the query. Must specify |
||||
// one of `readGroupSetIds` or `readGroupIds`. |
||||
repeated string read_group_set_ids = 1; |
||||
|
||||
// The IDs of the read groups within which to search for reads. All specified |
||||
// read groups must belong to the same read group sets. Must specify one of |
||||
// `readGroupSetIds` or `readGroupIds`. |
||||
repeated string read_group_ids = 5; |
||||
|
||||
// The reference sequence name, for example `chr1`, `1`, or `chrX`. If set to |
||||
// `*`, only unmapped reads are returned. If unspecified, all reads (mapped |
||||
// and unmapped) are returned. |
||||
string reference_name = 7; |
||||
|
||||
// The start position of the range on the reference, 0-based inclusive. If |
||||
// specified, `referenceName` must also be specified. |
||||
int64 start = 8; |
||||
|
||||
// The end position of the range on the reference, 0-based exclusive. If |
||||
// specified, `referenceName` must also be specified. |
||||
int64 end = 9; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// To get the next page of results, set this parameter to the value of |
||||
// `nextPageToken` from the previous response. |
||||
string page_token = 3; |
||||
|
||||
// The maximum number of results to return in a single page. If unspecified, |
||||
// defaults to 256. The maximum value is 2048. |
||||
int32 page_size = 4; |
||||
} |
||||
|
||||
// The read search response. |
||||
message SearchReadsResponse { |
||||
// The list of matching alignments sorted by mapped genomic coordinate, |
||||
// if any, ascending in position within the same reference. Unmapped reads, |
||||
// which have no position, are returned contiguously and are sorted in |
||||
// ascending lexicographic order by fragment name. |
||||
repeated Read alignments = 1; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// Provide this value in a subsequent request to return the next page of |
||||
// results. This field will be empty if there aren't any additional results. |
||||
string next_page_token = 2; |
||||
} |
||||
|
||||
// The stream reads request. |
||||
message StreamReadsRequest { |
||||
// The Google Developers Console project ID or number which will be billed |
||||
// for this access. The caller must have WRITE access to this project. |
||||
// Required. |
||||
string project_id = 1; |
||||
|
||||
// The ID of the read group set from which to stream reads. |
||||
string read_group_set_id = 2; |
||||
|
||||
// The reference sequence name, for example `chr1`, |
||||
// `1`, or `chrX`. If set to *, only unmapped reads are |
||||
// returned. |
||||
string reference_name = 3; |
||||
|
||||
// The start position of the range on the reference, 0-based inclusive. If |
||||
// specified, `referenceName` must also be specified. |
||||
int64 start = 4; |
||||
|
||||
// The end position of the range on the reference, 0-based exclusive. If |
||||
// specified, `referenceName` must also be specified. |
||||
int64 end = 5; |
||||
|
||||
// Restricts results to a shard containing approximately `1/totalShards` |
||||
// of the normal response payload for this query. Results from a sharded |
||||
// request are disjoint from those returned by all queries which differ only |
||||
// in their shard parameter. A shard may yield 0 results; this is especially |
||||
// likely for large values of `totalShards`. |
||||
// |
||||
// Valid values are `[0, totalShards)`. |
||||
int32 shard = 6; |
||||
|
||||
// Specifying `totalShards` causes a disjoint subset of the normal response |
||||
// payload to be returned for each query with a unique `shard` parameter |
||||
// specified. A best effort is made to yield equally sized shards. Sharding |
||||
// can be used to distribute processing amongst workers, where each worker is |
||||
// assigned a unique `shard` number and all workers specify the same |
||||
// `totalShards` number. The union of reads returned for all sharded queries |
||||
// `[0, totalShards)` is equal to those returned by a single unsharded query. |
||||
// |
||||
// Queries for different values of `totalShards` with common divisors will |
||||
// share shard boundaries. For example, streaming `shard` 2 of 5 |
||||
// `totalShards` yields the same results as streaming `shard`s 4 and 5 of 10 |
||||
// `totalShards`. This property can be leveraged for adaptive retries. |
||||
int32 total_shards = 7; |
||||
} |
||||
|
||||
message StreamReadsResponse { |
||||
repeated Read alignments = 1; |
||||
} |
@ -0,0 +1,281 @@ |
||||
// Copyright 2016 Google Inc. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.genomics.v1; |
||||
|
||||
import "google/api/annotations.proto"; |
||||
|
||||
option cc_enable_arenas = true; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "ReferencesProto"; |
||||
option java_package = "com.google.genomics.v1"; |
||||
|
||||
|
||||
service ReferenceServiceV1 { |
||||
// Searches for reference sets which match the given criteria. |
||||
// |
||||
// For the definitions of references and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// Implements |
||||
// [GlobalAllianceApi.searchReferenceSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L71) |
||||
rpc SearchReferenceSets(SearchReferenceSetsRequest) returns (SearchReferenceSetsResponse) { |
||||
option (google.api.http) = { post: "/v1/referencesets/search" body: "*" }; |
||||
} |
||||
|
||||
// Gets a reference set. |
||||
// |
||||
// For the definitions of references and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// Implements |
||||
// [GlobalAllianceApi.getReferenceSet](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L83). |
||||
rpc GetReferenceSet(GetReferenceSetRequest) returns (ReferenceSet) { |
||||
option (google.api.http) = { get: "/v1/referencesets/{reference_set_id}" }; |
||||
} |
||||
|
||||
// Searches for references which match the given criteria. |
||||
// |
||||
// For the definitions of references and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// Implements |
||||
// [GlobalAllianceApi.searchReferences](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L146). |
||||
rpc SearchReferences(SearchReferencesRequest) returns (SearchReferencesResponse) { |
||||
option (google.api.http) = { post: "/v1/references/search" body: "*" }; |
||||
} |
||||
|
||||
// Gets a reference. |
||||
// |
||||
// For the definitions of references and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// Implements |
||||
// [GlobalAllianceApi.getReference](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L158). |
||||
rpc GetReference(GetReferenceRequest) returns (Reference) { |
||||
option (google.api.http) = { get: "/v1/references/{reference_id}" }; |
||||
} |
||||
|
||||
// Lists the bases in a reference, optionally restricted to a range. |
||||
// |
||||
// For the definitions of references and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// Implements |
||||
// [GlobalAllianceApi.getReferenceBases](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L221). |
||||
rpc ListBases(ListBasesRequest) returns (ListBasesResponse) { |
||||
option (google.api.http) = { get: "/v1/references/{reference_id}/bases" }; |
||||
} |
||||
} |
||||
|
||||
// A reference is a canonical assembled DNA sequence, intended to act as a |
||||
// reference coordinate space for other genomic annotations. A single reference |
||||
// might represent the human chromosome 1 or mitochandrial DNA, for instance. A |
||||
// reference belongs to one or more reference sets. |
||||
// |
||||
// For more genomics resource definitions, see [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
message Reference { |
||||
// The server-generated reference ID, unique across all references. |
||||
string id = 1; |
||||
|
||||
// The length of this reference's sequence. |
||||
int64 length = 2; |
||||
|
||||
// MD5 of the upper-case sequence excluding all whitespace characters (this |
||||
// is equivalent to SQ:M5 in SAM). This value is represented in lower case |
||||
// hexadecimal format. |
||||
string md5checksum = 3; |
||||
|
||||
// The name of this reference, for example `22`. |
||||
string name = 4; |
||||
|
||||
// The URI from which the sequence was obtained. Typically specifies a FASTA |
||||
// format file. |
||||
string source_uri = 5; |
||||
|
||||
// All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally |
||||
// with a version number, for example `GCF_000001405.26`. |
||||
repeated string source_accessions = 6; |
||||
|
||||
// ID from http://www.ncbi.nlm.nih.gov/taxonomy. For example, 9606 for human. |
||||
int32 ncbi_taxon_id = 7; |
||||
} |
||||
|
||||
// A reference set is a set of references which typically comprise a reference |
||||
// assembly for a species, such as `GRCh38` which is representative |
||||
// of the human genome. A reference set defines a common coordinate space for |
||||
// comparing reference-aligned experimental data. A reference set contains 1 or |
||||
// more references. |
||||
// |
||||
// For more genomics resource definitions, see [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
message ReferenceSet { |
||||
// The server-generated reference set ID, unique across all reference sets. |
||||
string id = 1; |
||||
|
||||
// The IDs of the reference objects that are part of this set. |
||||
// `Reference.md5checksum` must be unique within this set. |
||||
repeated string reference_ids = 2; |
||||
|
||||
// Order-independent MD5 checksum which identifies this reference set. The |
||||
// checksum is computed by sorting all lower case hexidecimal string |
||||
// `reference.md5checksum` (for all reference in this set) in |
||||
// ascending lexicographic order, concatenating, and taking the MD5 of that |
||||
// value. The resulting value is represented in lower case hexadecimal format. |
||||
string md5checksum = 3; |
||||
|
||||
// ID from http://www.ncbi.nlm.nih.gov/taxonomy (for example, 9606 for human) |
||||
// indicating the species which this reference set is intended to model. Note |
||||
// that contained references may specify a different `ncbiTaxonId`, as |
||||
// assemblies may contain reference sequences which do not belong to the |
||||
// modeled species, for example EBV in a human reference genome. |
||||
int32 ncbi_taxon_id = 4; |
||||
|
||||
// Free text description of this reference set. |
||||
string description = 5; |
||||
|
||||
// Public id of this reference set, such as `GRCh37`. |
||||
string assembly_id = 6; |
||||
|
||||
// The URI from which the references were obtained. |
||||
string source_uri = 7; |
||||
|
||||
// All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally |
||||
// with a version number, for example `NC_000001.11`. |
||||
repeated string source_accessions = 8; |
||||
} |
||||
|
||||
message SearchReferenceSetsRequest { |
||||
// If present, return reference sets for which the |
||||
// [md5checksum][google.genomics.v1.ReferenceSet.md5checksum] matches exactly. |
||||
repeated string md5checksums = 1; |
||||
|
||||
// If present, return reference sets for which a prefix of any of |
||||
// [sourceAccessions][google.genomics.v1.ReferenceSet.source_accessions] |
||||
// match any of these strings. Accession numbers typically have a main number |
||||
// and a version, for example `NC_000001.11`. |
||||
repeated string accessions = 2; |
||||
|
||||
// If present, return reference sets for which a substring of their |
||||
// `assemblyId` matches this string (case insensitive). |
||||
string assembly_id = 3; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// To get the next page of results, set this parameter to the value of |
||||
// `nextPageToken` from the previous response. |
||||
string page_token = 4; |
||||
|
||||
// The maximum number of results to return in a single page. If unspecified, |
||||
// defaults to 1024. The maximum value is 4096. |
||||
int32 page_size = 5; |
||||
} |
||||
|
||||
message SearchReferenceSetsResponse { |
||||
// The matching references sets. |
||||
repeated ReferenceSet reference_sets = 1; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// Provide this value in a subsequent request to return the next page of |
||||
// results. This field will be empty if there aren't any additional results. |
||||
string next_page_token = 2; |
||||
} |
||||
|
||||
message GetReferenceSetRequest { |
||||
// The ID of the reference set. |
||||
string reference_set_id = 1; |
||||
} |
||||
|
||||
message SearchReferencesRequest { |
||||
// If present, return references for which the |
||||
// [md5checksum][google.genomics.v1.Reference.md5checksum] matches exactly. |
||||
repeated string md5checksums = 1; |
||||
|
||||
// If present, return references for which a prefix of any of |
||||
// [sourceAccessions][google.genomics.v1.Reference.source_accessions] match |
||||
// any of these strings. Accession numbers typically have a main number and a |
||||
// version, for example `GCF_000001405.26`. |
||||
repeated string accessions = 2; |
||||
|
||||
// If present, return only references which belong to this reference set. |
||||
string reference_set_id = 3; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// To get the next page of results, set this parameter to the value of |
||||
// `nextPageToken` from the previous response. |
||||
string page_token = 4; |
||||
|
||||
// The maximum number of results to return in a single page. If unspecified, |
||||
// defaults to 1024. The maximum value is 4096. |
||||
int32 page_size = 5; |
||||
} |
||||
|
||||
message SearchReferencesResponse { |
||||
// The matching references. |
||||
repeated Reference references = 1; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// Provide this value in a subsequent request to return the next page of |
||||
// results. This field will be empty if there aren't any additional results. |
||||
string next_page_token = 2; |
||||
} |
||||
|
||||
message GetReferenceRequest { |
||||
// The ID of the reference. |
||||
string reference_id = 1; |
||||
} |
||||
|
||||
message ListBasesRequest { |
||||
// The ID of the reference. |
||||
string reference_id = 1; |
||||
|
||||
// The start position (0-based) of this query. Defaults to 0. |
||||
int64 start = 2; |
||||
|
||||
// The end position (0-based, exclusive) of this query. Defaults to the length |
||||
// of this reference. |
||||
int64 end = 3; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// To get the next page of results, set this parameter to the value of |
||||
// `nextPageToken` from the previous response. |
||||
string page_token = 4; |
||||
|
||||
// The maximum number of bases to return in a single page. If unspecified, |
||||
// defaults to 200Kbp (kilo base pairs). The maximum value is 10Mbp (mega base |
||||
// pairs). |
||||
int32 page_size = 5; |
||||
} |
||||
|
||||
message ListBasesResponse { |
||||
// The offset position (0-based) of the given `sequence` from the |
||||
// start of this `Reference`. This value will differ for each page |
||||
// in a paginated request. |
||||
int64 offset = 1; |
||||
|
||||
// A substring of the bases that make up this reference. |
||||
string sequence = 2; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// Provide this value in a subsequent request to return the next page of |
||||
// results. This field will be empty if there aren't any additional results. |
||||
string next_page_token = 3; |
||||
} |
@ -0,0 +1,903 @@ |
||||
// Copyright 2016 Google Inc. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.genomics.v1; |
||||
|
||||
import "google/api/annotations.proto"; |
||||
import "google/longrunning/operations.proto"; |
||||
import "google/protobuf/empty.proto"; |
||||
import "google/protobuf/field_mask.proto"; |
||||
import "google/protobuf/struct.proto"; |
||||
|
||||
option cc_enable_arenas = true; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "VariantsProto"; |
||||
option java_package = "com.google.genomics.v1"; |
||||
|
||||
|
||||
service StreamingVariantService { |
||||
// Returns a stream of all the variants matching the search request, ordered |
||||
// by reference name, position, and ID. |
||||
rpc StreamVariants(StreamVariantsRequest) returns (stream StreamVariantsResponse) { |
||||
option (google.api.http) = { post: "/v1/variants:stream" body: "*" }; |
||||
} |
||||
} |
||||
|
||||
service VariantServiceV1 { |
||||
// Creates variant data by asynchronously importing the provided information. |
||||
// |
||||
// For the definitions of variant sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// The variants for import will be merged with any existing variant that |
||||
// matches its reference sequence, start, end, reference bases, and |
||||
// alternative bases. If no such variant exists, a new one will be created. |
||||
// |
||||
// When variants are merged, the call information from the new variant |
||||
// is added to the existing variant, and Variant info fields are merged |
||||
// as specified in |
||||
// [infoMergeConfig][google.genomics.v1.ImportVariantsRequest.info_merge_config]. |
||||
// As a special case, for single-sample VCF files, QUAL and FILTER fields will |
||||
// be moved to the call level; these are sometimes interpreted in a |
||||
// call-specific context. |
||||
// Imported VCF headers are appended to the metadata already in a variant set. |
||||
rpc ImportVariants(ImportVariantsRequest) returns (google.longrunning.Operation) { |
||||
option (google.api.http) = { post: "/v1/variants:import" body: "*" }; |
||||
} |
||||
|
||||
// Creates a new variant set. |
||||
// |
||||
// For the definitions of variant sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// The provided variant set must have a valid `datasetId` set - all other |
||||
// fields are optional. Note that the `id` field will be ignored, as this is |
||||
// assigned by the server. |
||||
rpc CreateVariantSet(CreateVariantSetRequest) returns (VariantSet) { |
||||
option (google.api.http) = { post: "/v1/variantsets" body: "variant_set" }; |
||||
} |
||||
|
||||
// Exports variant set data to an external destination. |
||||
// |
||||
// For the definitions of variant sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc ExportVariantSet(ExportVariantSetRequest) returns (google.longrunning.Operation) { |
||||
option (google.api.http) = { post: "/v1/variantsets/{variant_set_id}:export" body: "*" }; |
||||
} |
||||
|
||||
// Gets a variant set by ID. |
||||
// |
||||
// For the definitions of variant sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc GetVariantSet(GetVariantSetRequest) returns (VariantSet) { |
||||
option (google.api.http) = { get: "/v1/variantsets/{variant_set_id}" }; |
||||
} |
||||
|
||||
// Returns a list of all variant sets matching search criteria. |
||||
// |
||||
// For the definitions of variant sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// Implements |
||||
// [GlobalAllianceApi.searchVariantSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L49). |
||||
rpc SearchVariantSets(SearchVariantSetsRequest) returns (SearchVariantSetsResponse) { |
||||
option (google.api.http) = { post: "/v1/variantsets/search" body: "*" }; |
||||
} |
||||
|
||||
// Deletes a variant set including all variants, call sets, and calls within. |
||||
// This is not reversible. |
||||
// |
||||
// For the definitions of variant sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc DeleteVariantSet(DeleteVariantSetRequest) returns (google.protobuf.Empty) { |
||||
option (google.api.http) = { delete: "/v1/variantsets/{variant_set_id}" }; |
||||
} |
||||
|
||||
// Updates a variant set using patch semantics. |
||||
// |
||||
// For the definitions of variant sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc UpdateVariantSet(UpdateVariantSetRequest) returns (VariantSet) { |
||||
option (google.api.http) = { patch: "/v1/variantsets/{variant_set_id}" body: "variant_set" }; |
||||
} |
||||
|
||||
// Gets a list of variants matching the criteria. |
||||
// |
||||
// For the definitions of variants and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// Implements |
||||
// [GlobalAllianceApi.searchVariants](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L126). |
||||
rpc SearchVariants(SearchVariantsRequest) returns (SearchVariantsResponse) { |
||||
option (google.api.http) = { post: "/v1/variants/search" body: "*" }; |
||||
} |
||||
|
||||
// Creates a new variant. |
||||
// |
||||
// For the definitions of variants and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc CreateVariant(CreateVariantRequest) returns (Variant) { |
||||
option (google.api.http) = { post: "/v1/variants" body: "variant" }; |
||||
} |
||||
|
||||
// Updates a variant. |
||||
// |
||||
// For the definitions of variants and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// This method supports patch semantics. Returns the modified variant without |
||||
// its calls. |
||||
rpc UpdateVariant(UpdateVariantRequest) returns (Variant) { |
||||
option (google.api.http) = { patch: "/v1/variants/{variant_id}" body: "variant" }; |
||||
} |
||||
|
||||
// Deletes a variant. |
||||
// |
||||
// For the definitions of variants and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc DeleteVariant(DeleteVariantRequest) returns (google.protobuf.Empty) { |
||||
option (google.api.http) = { delete: "/v1/variants/{variant_id}" }; |
||||
} |
||||
|
||||
// Gets a variant by ID. |
||||
// |
||||
// For the definitions of variants and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc GetVariant(GetVariantRequest) returns (Variant) { |
||||
option (google.api.http) = { get: "/v1/variants/{variant_id}" }; |
||||
} |
||||
|
||||
// Merges the given variants with existing variants. |
||||
// |
||||
// For the definitions of variants and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// Each variant will be |
||||
// merged with an existing variant that matches its reference sequence, |
||||
// start, end, reference bases, and alternative bases. If no such variant |
||||
// exists, a new one will be created. |
||||
// |
||||
// When variants are merged, the call information from the new variant |
||||
// is added to the existing variant. Variant info fields are merged as |
||||
// specified in the |
||||
// [infoMergeConfig][google.genomics.v1.MergeVariantsRequest.info_merge_config] |
||||
// field of the MergeVariantsRequest. |
||||
// |
||||
// Please exercise caution when using this method! It is easy to introduce |
||||
// mistakes in existing variants and difficult to back out of them. For |
||||
// example, |
||||
// suppose you were trying to merge a new variant with an existing one and |
||||
// both |
||||
// variants contain calls that belong to callsets with the same callset ID. |
||||
// |
||||
// // Existing variant - irrelevant fields trimmed for clarity |
||||
// { |
||||
// "variantSetId": "10473108253681171589", |
||||
// "referenceName": "1", |
||||
// "start": "10582", |
||||
// "referenceBases": "G", |
||||
// "alternateBases": [ |
||||
// "A" |
||||
// ], |
||||
// "calls": [ |
||||
// { |
||||
// "callSetId": "10473108253681171589-0", |
||||
// "callSetName": "CALLSET0", |
||||
// "genotype": [ |
||||
// 0, |
||||
// 1 |
||||
// ], |
||||
// } |
||||
// ] |
||||
// } |
||||
// |
||||
// // New variant with conflicting call information |
||||
// { |
||||
// "variantSetId": "10473108253681171589", |
||||
// "referenceName": "1", |
||||
// "start": "10582", |
||||
// "referenceBases": "G", |
||||
// "alternateBases": [ |
||||
// "A" |
||||
// ], |
||||
// "calls": [ |
||||
// { |
||||
// "callSetId": "10473108253681171589-0", |
||||
// "callSetName": "CALLSET0", |
||||
// "genotype": [ |
||||
// 1, |
||||
// 1 |
||||
// ], |
||||
// } |
||||
// ] |
||||
// } |
||||
// |
||||
// The resulting merged variant would overwrite the existing calls with those |
||||
// from the new variant: |
||||
// |
||||
// { |
||||
// "variantSetId": "10473108253681171589", |
||||
// "referenceName": "1", |
||||
// "start": "10582", |
||||
// "referenceBases": "G", |
||||
// "alternateBases": [ |
||||
// "A" |
||||
// ], |
||||
// "calls": [ |
||||
// { |
||||
// "callSetId": "10473108253681171589-0", |
||||
// "callSetName": "CALLSET0", |
||||
// "genotype": [ |
||||
// 1, |
||||
// 1 |
||||
// ], |
||||
// } |
||||
// ] |
||||
// } |
||||
// |
||||
// This may be the desired outcome, but it is up to the user to determine if |
||||
// if that is indeed the case. |
||||
rpc MergeVariants(MergeVariantsRequest) returns (google.protobuf.Empty) { |
||||
option (google.api.http) = { post: "/v1/variants:merge" body: "*" }; |
||||
} |
||||
|
||||
// Gets a list of call sets matching the criteria. |
||||
// |
||||
// For the definitions of call sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// Implements |
||||
// [GlobalAllianceApi.searchCallSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L178). |
||||
rpc SearchCallSets(SearchCallSetsRequest) returns (SearchCallSetsResponse) { |
||||
option (google.api.http) = { post: "/v1/callsets/search" body: "*" }; |
||||
} |
||||
|
||||
// Creates a new call set. |
||||
// |
||||
// For the definitions of call sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc CreateCallSet(CreateCallSetRequest) returns (CallSet) { |
||||
option (google.api.http) = { post: "/v1/callsets" body: "call_set" }; |
||||
} |
||||
|
||||
// Updates a call set. |
||||
// |
||||
// For the definitions of call sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// This method supports patch semantics. |
||||
rpc UpdateCallSet(UpdateCallSetRequest) returns (CallSet) { |
||||
option (google.api.http) = { patch: "/v1/callsets/{call_set_id}" body: "call_set" }; |
||||
} |
||||
|
||||
// Deletes a call set. |
||||
// |
||||
// For the definitions of call sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc DeleteCallSet(DeleteCallSetRequest) returns (google.protobuf.Empty) { |
||||
option (google.api.http) = { delete: "/v1/callsets/{call_set_id}" }; |
||||
} |
||||
|
||||
// Gets a call set by ID. |
||||
// |
||||
// For the definitions of call sets and other genomics resources, see |
||||
// [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
rpc GetCallSet(GetCallSetRequest) returns (CallSet) { |
||||
option (google.api.http) = { get: "/v1/callsets/{call_set_id}" }; |
||||
} |
||||
} |
||||
|
||||
// Metadata describes a single piece of variant call metadata. |
||||
// These data include a top level key and either a single value string (value) |
||||
// or a list of key-value pairs (info.) |
||||
// Value and info are mutually exclusive. |
||||
message VariantSetMetadata { |
||||
enum Type { |
||||
TYPE_UNSPECIFIED = 0; |
||||
|
||||
INTEGER = 1; |
||||
|
||||
FLOAT = 2; |
||||
|
||||
FLAG = 3; |
||||
|
||||
CHARACTER = 4; |
||||
|
||||
STRING = 5; |
||||
} |
||||
|
||||
// The top-level key. |
||||
string key = 1; |
||||
|
||||
// The value field for simple metadata |
||||
string value = 2; |
||||
|
||||
// User-provided ID field, not enforced by this API. |
||||
// Two or more pieces of structured metadata with identical |
||||
// id and key fields are considered equivalent. |
||||
string id = 4; |
||||
|
||||
// The type of data. Possible types include: Integer, Float, |
||||
// Flag, Character, and String. |
||||
Type type = 5; |
||||
|
||||
// The number of values that can be included in a field described by this |
||||
// metadata. |
||||
string number = 8; |
||||
|
||||
// A textual description of this metadata. |
||||
string description = 7; |
||||
|
||||
// Remaining structured metadata key-value pairs. This must be of the form |
||||
// map<string, string[]> (string key mapping to a list of string values). |
||||
map<string, google.protobuf.ListValue> info = 3; |
||||
} |
||||
|
||||
// A variant set is a collection of call sets and variants. It contains summary |
||||
// statistics of those contents. A variant set belongs to a dataset. |
||||
// |
||||
// For more genomics resource definitions, see [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
message VariantSet { |
||||
// The dataset to which this variant set belongs. |
||||
string dataset_id = 1; |
||||
|
||||
// The server-generated variant set ID, unique across all variant sets. |
||||
string id = 2; |
||||
|
||||
// The reference set to which the variant set is mapped. The reference set |
||||
// describes the alignment provenance of the variant set, while the |
||||
// `referenceBounds` describe the shape of the actual variant data. The |
||||
// reference set's reference names are a superset of those found in the |
||||
// `referenceBounds`. |
||||
// |
||||
// For example, given a variant set that is mapped to the GRCh38 reference set |
||||
// and contains a single variant on reference 'X', `referenceBounds` would |
||||
// contain only an entry for 'X', while the associated reference set |
||||
// enumerates all possible references: '1', '2', 'X', 'Y', 'MT', etc. |
||||
string reference_set_id = 6; |
||||
|
||||
// A list of all references used by the variants in a variant set |
||||
// with associated coordinate upper bounds for each one. |
||||
repeated ReferenceBound reference_bounds = 5; |
||||
|
||||
// The metadata associated with this variant set. |
||||
repeated VariantSetMetadata metadata = 4; |
||||
|
||||
// User-specified, mutable name. |
||||
string name = 7; |
||||
|
||||
// A textual description of this variant set. |
||||
string description = 8; |
||||
} |
||||
|
||||
// A variant represents a change in DNA sequence relative to a reference |
||||
// sequence. For example, a variant could represent a SNP or an insertion. |
||||
// Variants belong to a variant set. |
||||
// |
||||
// For more genomics resource definitions, see [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
// |
||||
// Each of the calls on a variant represent a determination of genotype with |
||||
// respect to that variant. For example, a call might assign probability of 0.32 |
||||
// to the occurrence of a SNP named rs1234 in a sample named NA12345. A call |
||||
// belongs to a call set, which contains related calls typically from one |
||||
// sample. |
||||
message Variant { |
||||
// The ID of the variant set this variant belongs to. |
||||
string variant_set_id = 15; |
||||
|
||||
// The server-generated variant ID, unique across all variants. |
||||
string id = 2; |
||||
|
||||
// Names for the variant, for example a RefSNP ID. |
||||
repeated string names = 3; |
||||
|
||||
// The date this variant was created, in milliseconds from the epoch. |
||||
int64 created = 12; |
||||
|
||||
// The reference on which this variant occurs. |
||||
// (such as `chr20` or `X`) |
||||
string reference_name = 14; |
||||
|
||||
// The position at which this variant occurs (0-based). |
||||
// This corresponds to the first base of the string of reference bases. |
||||
int64 start = 16; |
||||
|
||||
// The end position (0-based) of this variant. This corresponds to the first |
||||
// base after the last base in the reference allele. So, the length of |
||||
// the reference allele is (end - start). This is useful for variants |
||||
// that don't explicitly give alternate bases, for example large deletions. |
||||
int64 end = 13; |
||||
|
||||
// The reference bases for this variant. They start at the given |
||||
// position. |
||||
string reference_bases = 6; |
||||
|
||||
// The bases that appear instead of the reference bases. |
||||
repeated string alternate_bases = 7; |
||||
|
||||
// A measure of how likely this variant is to be real. |
||||
// A higher value is better. |
||||
double quality = 8; |
||||
|
||||
// A list of filters (normally quality filters) this variant has failed. |
||||
// `PASS` indicates this variant has passed all filters. |
||||
repeated string filter = 9; |
||||
|
||||
// A map of additional variant information. This must be of the form |
||||
// map<string, string[]> (string key mapping to a list of string values). |
||||
map<string, google.protobuf.ListValue> info = 10; |
||||
|
||||
// The variant calls for this particular variant. Each one represents the |
||||
// determination of genotype with respect to this variant. |
||||
repeated VariantCall calls = 11; |
||||
} |
||||
|
||||
// A call represents the determination of genotype with respect to a particular |
||||
// variant. It may include associated information such as quality and phasing. |
||||
// For example, a call might assign a probability of 0.32 to the occurrence of |
||||
// a SNP named rs1234 in a call set with the name NA12345. |
||||
message VariantCall { |
||||
// The ID of the call set this variant call belongs to. |
||||
string call_set_id = 8; |
||||
|
||||
// The name of the call set this variant call belongs to. |
||||
string call_set_name = 9; |
||||
|
||||
// The genotype of this variant call. Each value represents either the value |
||||
// of the `referenceBases` field or a 1-based index into |
||||
// `alternateBases`. If a variant had a `referenceBases` |
||||
// value of `T` and an `alternateBases` |
||||
// value of `["A", "C"]`, and the `genotype` was |
||||
// `[2, 1]`, that would mean the call |
||||
// represented the heterozygous value `CA` for this variant. |
||||
// If the `genotype` was instead `[0, 1]`, the |
||||
// represented value would be `TA`. Ordering of the |
||||
// genotype values is important if the `phaseset` is present. |
||||
// If a genotype is not called (that is, a `.` is present in the |
||||
// GT string) -1 is returned. |
||||
repeated int32 genotype = 7; |
||||
|
||||
// If this field is present, this variant call's genotype ordering implies |
||||
// the phase of the bases and is consistent with any other variant calls in |
||||
// the same reference sequence which have the same phaseset value. |
||||
// When importing data from VCF, if the genotype data was phased but no |
||||
// phase set was specified this field will be set to `*`. |
||||
string phaseset = 5; |
||||
|
||||
// The genotype likelihoods for this variant call. Each array entry |
||||
// represents how likely a specific genotype is for this call. The value |
||||
// ordering is defined by the GL tag in the VCF spec. |
||||
// If Phred-scaled genotype likelihood scores (PL) are available and |
||||
// log10(P) genotype likelihood scores (GL) are not, PL scores are converted |
||||
// to GL scores. If both are available, PL scores are stored in `info`. |
||||
repeated double genotype_likelihood = 6; |
||||
|
||||
// A map of additional variant call information. This must be of the form |
||||
// map<string, string[]> (string key mapping to a list of string values). |
||||
map<string, google.protobuf.ListValue> info = 2; |
||||
} |
||||
|
||||
// A call set is a collection of variant calls, typically for one sample. It |
||||
// belongs to a variant set. |
||||
// |
||||
// For more genomics resource definitions, see [Fundamentals of Google |
||||
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||
message CallSet { |
||||
// The server-generated call set ID, unique across all call sets. |
||||
string id = 1; |
||||
|
||||
// The call set name. |
||||
string name = 2; |
||||
|
||||
// The sample ID this call set corresponds to. |
||||
string sample_id = 7; |
||||
|
||||
// The IDs of the variant sets this call set belongs to. This field must |
||||
// have exactly length one, as a call set belongs to a single variant set. |
||||
// This field is repeated for compatibility with the |
||||
// [GA4GH 0.5.1 |
||||
// API](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variants.avdl#L76). |
||||
repeated string variant_set_ids = 6; |
||||
|
||||
// The date this call set was created in milliseconds from the epoch. |
||||
int64 created = 5; |
||||
|
||||
// A map of additional call set information. This must be of the form |
||||
// map<string, string[]> (string key mapping to a list of string values). |
||||
map<string, google.protobuf.ListValue> info = 4; |
||||
} |
||||
|
||||
// ReferenceBound records an upper bound for the starting coordinate of |
||||
// variants in a particular reference. |
||||
message ReferenceBound { |
||||
// The name of the reference associated with this reference bound. |
||||
string reference_name = 1; |
||||
|
||||
// An upper bound (inclusive) on the starting coordinate of any |
||||
// variant in the reference sequence. |
||||
int64 upper_bound = 2; |
||||
} |
||||
|
||||
// The variant data import request. |
||||
message ImportVariantsRequest { |
||||
enum Format { |
||||
FORMAT_UNSPECIFIED = 0; |
||||
|
||||
// VCF (Variant Call Format). The VCF files should be uncompressed. gVCF is |
||||
// also supported. |
||||
FORMAT_VCF = 1; |
||||
|
||||
// Complete Genomics masterVarBeta format. The masterVarBeta files should |
||||
// be bzip2 compressed. |
||||
FORMAT_COMPLETE_GENOMICS = 2; |
||||
} |
||||
|
||||
// Required. The variant set to which variant data should be imported. |
||||
string variant_set_id = 1; |
||||
|
||||
// A list of URIs referencing variant files in Google Cloud Storage. URIs can |
||||
// include wildcards [as described |
||||
// here](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). |
||||
// Note that recursive wildcards ('**') are not supported. |
||||
repeated string source_uris = 2; |
||||
|
||||
// The format of the variant data being imported. If unspecified, defaults to |
||||
// to `VCF`. |
||||
Format format = 3; |
||||
|
||||
// Convert reference names to the canonical representation. |
||||
// hg19 haploytypes (those reference names containing "_hap") |
||||
// are not modified in any way. |
||||
// All other reference names are modified according to the following rules: |
||||
// The reference name is capitalized. |
||||
// The "chr" prefix is dropped for all autosomes and sex chromsomes. |
||||
// For example "chr17" becomes "17" and "chrX" becomes "X". |
||||
// All mitochondrial chromosomes ("chrM", "chrMT", etc) become "MT". |
||||
bool normalize_reference_names = 5; |
||||
|
||||
// A mapping between info field keys and the InfoMergeOperations to |
||||
// be performed on them. This is plumbed down to the MergeVariantRequests |
||||
// generated by the resulting import job. |
||||
map<string, InfoMergeOperation> info_merge_config = 6; |
||||
} |
||||
|
||||
// The variant data import response. |
||||
message ImportVariantsResponse { |
||||
// IDs of the call sets created during the import. |
||||
repeated string call_set_ids = 1; |
||||
} |
||||
|
||||
// The CreateVariantSet request |
||||
message CreateVariantSetRequest { |
||||
// Required. The variant set to be created. Must have a valid `datasetId`. |
||||
VariantSet variant_set = 1; |
||||
} |
||||
|
||||
// The variant data export request. |
||||
message ExportVariantSetRequest { |
||||
enum Format { |
||||
FORMAT_UNSPECIFIED = 0; |
||||
|
||||
// Export the data to Google BigQuery. |
||||
FORMAT_BIGQUERY = 1; |
||||
} |
||||
|
||||
// Required. The ID of the variant set that contains variant data which |
||||
// should be exported. The caller must have READ access to this variant set. |
||||
string variant_set_id = 1; |
||||
|
||||
// If provided, only variant call information from the specified call sets |
||||
// will be exported. By default all variant calls are exported. |
||||
repeated string call_set_ids = 2; |
||||
|
||||
// Required. The Google Cloud project ID that owns the destination |
||||
// BigQuery dataset. The caller must have WRITE access to this project. This |
||||
// project will also own the resulting export job. |
||||
string project_id = 3; |
||||
|
||||
// The format for the exported data. |
||||
Format format = 4; |
||||
|
||||
// Required. The BigQuery dataset to export data to. This dataset must already |
||||
// exist. Note that this is distinct from the Genomics concept of "dataset". |
||||
string bigquery_dataset = 5; |
||||
|
||||
// Required. The BigQuery table to export data to. |
||||
// If the table doesn't exist, it will be created. If it already exists, it |
||||
// will be overwritten. |
||||
string bigquery_table = 6; |
||||
} |
||||
|
||||
// The variant set request. |
||||
message GetVariantSetRequest { |
||||
// Required. The ID of the variant set. |
||||
string variant_set_id = 1; |
||||
} |
||||
|
||||
// The search variant sets request. |
||||
message SearchVariantSetsRequest { |
||||
// Exactly one dataset ID must be provided here. Only variant sets which |
||||
// belong to this dataset will be returned. |
||||
repeated string dataset_ids = 1; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// To get the next page of results, set this parameter to the value of |
||||
// `nextPageToken` from the previous response. |
||||
string page_token = 2; |
||||
|
||||
// The maximum number of results to return in a single page. If unspecified, |
||||
// defaults to 1024. |
||||
int32 page_size = 3; |
||||
} |
||||
|
||||
// The search variant sets response. |
||||
message SearchVariantSetsResponse { |
||||
// The variant sets belonging to the requested dataset. |
||||
repeated VariantSet variant_sets = 1; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// Provide this value in a subsequent request to return the next page of |
||||
// results. This field will be empty if there aren't any additional results. |
||||
string next_page_token = 2; |
||||
} |
||||
|
||||
// The delete variant set request. |
||||
message DeleteVariantSetRequest { |
||||
// The ID of the variant set to be deleted. |
||||
string variant_set_id = 1; |
||||
} |
||||
|
||||
message UpdateVariantSetRequest { |
||||
// The ID of the variant to be updated (must already exist). |
||||
string variant_set_id = 1; |
||||
|
||||
// The new variant data. Only the variant_set.metadata will be considered |
||||
// for update. |
||||
VariantSet variant_set = 2; |
||||
|
||||
// An optional mask specifying which fields to update. Supported fields: |
||||
// |
||||
// * [metadata][google.genomics.v1.VariantSet.metadata]. |
||||
// * [name][google.genomics.v1.VariantSet.name]. |
||||
// * [description][google.genomics.v1.VariantSet.description]. |
||||
// |
||||
// Leaving `updateMask` unset is equivalent to specifying all mutable |
||||
// fields. |
||||
google.protobuf.FieldMask update_mask = 5; |
||||
} |
||||
|
||||
// The variant search request. |
||||
message SearchVariantsRequest { |
||||
// At most one variant set ID must be provided. Only variants from this |
||||
// variant set will be returned. If omitted, a call set id must be included in |
||||
// the request. |
||||
repeated string variant_set_ids = 1; |
||||
|
||||
// Only return variants which have exactly this name. |
||||
string variant_name = 2; |
||||
|
||||
// Only return variant calls which belong to call sets with these ids. |
||||
// Leaving this blank returns all variant calls. If a variant has no |
||||
// calls belonging to any of these call sets, it won't be returned at all. |
||||
// Currently, variants with no calls from any call set will never be returned. |
||||
repeated string call_set_ids = 3; |
||||
|
||||
// Required. Only return variants in this reference sequence. |
||||
string reference_name = 4; |
||||
|
||||
// The beginning of the window (0-based, inclusive) for which |
||||
// overlapping variants should be returned. If unspecified, defaults to 0. |
||||
int64 start = 5; |
||||
|
||||
// The end of the window, 0-based exclusive. If unspecified or 0, defaults to |
||||
// the length of the reference. |
||||
int64 end = 6; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// To get the next page of results, set this parameter to the value of |
||||
// `nextPageToken` from the previous response. |
||||
string page_token = 7; |
||||
|
||||
// The maximum number of variants to return in a single page. If unspecified, |
||||
// defaults to 5000. The maximum value is 10000. |
||||
int32 page_size = 8; |
||||
|
||||
// The maximum number of calls to return in a single page. Note that this |
||||
// limit may be exceeded in the event that a matching variant contains more |
||||
// calls than the requested maximum. If unspecified, defaults to 5000. The |
||||
// maximum value is 10000. |
||||
int32 max_calls = 9; |
||||
} |
||||
|
||||
// The variant search response. |
||||
message SearchVariantsResponse { |
||||
// The list of matching Variants. |
||||
repeated Variant variants = 1; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// Provide this value in a subsequent request to return the next page of |
||||
// results. This field will be empty if there aren't any additional results. |
||||
string next_page_token = 2; |
||||
} |
||||
|
||||
message CreateVariantRequest { |
||||
// The variant to be created. |
||||
Variant variant = 1; |
||||
} |
||||
|
||||
message UpdateVariantRequest { |
||||
// The ID of the variant to be updated. |
||||
string variant_id = 1; |
||||
|
||||
// The new variant data. |
||||
Variant variant = 2; |
||||
|
||||
// An optional mask specifying which fields to update. At this time, mutable |
||||
// fields are [names][google.genomics.v1.Variant.names] and |
||||
// [info][google.genomics.v1.Variant.info]. Acceptable values are "names" and |
||||
// "info". If unspecified, all mutable fields will be updated. |
||||
google.protobuf.FieldMask update_mask = 3; |
||||
} |
||||
|
||||
message DeleteVariantRequest { |
||||
// The ID of the variant to be deleted. |
||||
string variant_id = 1; |
||||
} |
||||
|
||||
message GetVariantRequest { |
||||
// The ID of the variant. |
||||
string variant_id = 1; |
||||
} |
||||
|
||||
message MergeVariantsRequest { |
||||
// The destination variant set. |
||||
string variant_set_id = 1; |
||||
|
||||
// The variants to be merged with existing variants. |
||||
repeated Variant variants = 2; |
||||
|
||||
// A mapping between info field keys and the InfoMergeOperations to |
||||
// be performed on them. |
||||
map<string, InfoMergeOperation> info_merge_config = 3; |
||||
} |
||||
|
||||
// The call set search request. |
||||
message SearchCallSetsRequest { |
||||
// Restrict the query to call sets within the given variant sets. At least one |
||||
// ID must be provided. |
||||
repeated string variant_set_ids = 1; |
||||
|
||||
// Only return call sets for which a substring of the name matches this |
||||
// string. |
||||
string name = 2; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// To get the next page of results, set this parameter to the value of |
||||
// `nextPageToken` from the previous response. |
||||
string page_token = 3; |
||||
|
||||
// The maximum number of results to return in a single page. If unspecified, |
||||
// defaults to 1024. |
||||
int32 page_size = 4; |
||||
} |
||||
|
||||
// The call set search response. |
||||
message SearchCallSetsResponse { |
||||
// The list of matching call sets. |
||||
repeated CallSet call_sets = 1; |
||||
|
||||
// The continuation token, which is used to page through large result sets. |
||||
// Provide this value in a subsequent request to return the next page of |
||||
// results. This field will be empty if there aren't any additional results. |
||||
string next_page_token = 2; |
||||
} |
||||
|
||||
message CreateCallSetRequest { |
||||
// The call set to be created. |
||||
CallSet call_set = 1; |
||||
} |
||||
|
||||
message UpdateCallSetRequest { |
||||
// The ID of the call set to be updated. |
||||
string call_set_id = 1; |
||||
|
||||
// The new call set data. |
||||
CallSet call_set = 2; |
||||
|
||||
// An optional mask specifying which fields to update. At this time, the only |
||||
// mutable field is [name][google.genomics.v1.CallSet.name]. The only |
||||
// acceptable value is "name". If unspecified, all mutable fields will be |
||||
// updated. |
||||
google.protobuf.FieldMask update_mask = 3; |
||||
} |
||||
|
||||
message DeleteCallSetRequest { |
||||
// The ID of the call set to be deleted. |
||||
string call_set_id = 1; |
||||
} |
||||
|
||||
message GetCallSetRequest { |
||||
// The ID of the call set. |
||||
string call_set_id = 1; |
||||
} |
||||
|
||||
// The stream variants request. |
||||
message StreamVariantsRequest { |
||||
// The Google Developers Console project ID or number which will be billed |
||||
// for this access. The caller must have WRITE access to this project. |
||||
// Required. |
||||
string project_id = 1; |
||||
|
||||
// The variant set ID from which to stream variants. |
||||
string variant_set_id = 2; |
||||
|
||||
// Only return variant calls which belong to call sets with these IDs. |
||||
// Leaving this blank returns all variant calls. |
||||
repeated string call_set_ids = 3; |
||||
|
||||
// Required. Only return variants in this reference sequence. |
||||
string reference_name = 4; |
||||
|
||||
// The beginning of the window (0-based, inclusive) for which |
||||
// overlapping variants should be returned. |
||||
int64 start = 5; |
||||
|
||||
// The end of the window (0-based, exclusive) for which overlapping |
||||
// variants should be returned. |
||||
int64 end = 6; |
||||
} |
||||
|
||||
message StreamVariantsResponse { |
||||
repeated Variant variants = 1; |
||||
} |
||||
|
||||
// Operations to be performed during import on Variant info fields. |
||||
// These operations are set for each info field in the info_merge_config |
||||
// map of ImportVariantsRequest, which is plumbed down to the |
||||
// MergeVariantRequests generated by the import job. |
||||
enum InfoMergeOperation { |
||||
INFO_MERGE_OPERATION_UNSPECIFIED = 0; |
||||
|
||||
// By default, Variant info fields are persisted if the Variant doesn't |
||||
// already exist in the variantset. If the Variant is equivalent to a |
||||
// Variant already in the variantset, the incoming Variant's info field |
||||
// is ignored in favor of that of the already persisted Variant. |
||||
IGNORE_NEW = 1; |
||||
|
||||
// This operation removes an info field from the incoming Variant |
||||
// and persists this info field in each of the incoming Variant's Calls. |
||||
MOVE_TO_CALLS = 2; |
||||
} |
@ -0,0 +1,586 @@ |
||||
// Copyright 2016 Google Inc. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.genomics.v1alpha2; |
||||
|
||||
import "google/api/annotations.proto"; |
||||
import "google/longrunning/operations.proto"; |
||||
import "google/protobuf/empty.proto"; |
||||
import "google/protobuf/timestamp.proto"; |
||||
import "google/rpc/code.proto"; |
||||
|
||||
option cc_enable_arenas = true; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "PipelinesProto"; |
||||
option java_package = "com.google.genomics.v1a"; |
||||
|
||||
|
||||
// A service for running genomics pipelines. |
||||
service PipelinesV1Alpha2 { |
||||
// Creates a pipeline that can be run later. Create takes a Pipeline that |
||||
// has all fields other than `pipelineId` populated, and then returns |
||||
// the same pipeline with `pipelineId` populated. This id can be used |
||||
// to run the pipeline. |
||||
// |
||||
// Caller must have WRITE permission to the project. |
||||
rpc CreatePipeline(CreatePipelineRequest) returns (Pipeline) { |
||||
option (google.api.http) = { post: "/v1alpha2/pipelines" body: "pipeline" }; |
||||
} |
||||
|
||||
// Runs a pipeline. If `pipelineId` is specified in the request, then |
||||
// run a saved pipeline. If `ephemeralPipeline` is specified, then run |
||||
// that pipeline once without saving a copy. |
||||
// |
||||
// The caller must have READ permission to the project where the pipeline |
||||
// is stored and WRITE permission to the project where the pipeline will be |
||||
// run, as VMs will be created and storage will be used. |
||||
rpc RunPipeline(RunPipelineRequest) returns (google.longrunning.Operation) { |
||||
option (google.api.http) = { post: "/v1alpha2/pipelines:run" body: "*" }; |
||||
} |
||||
|
||||
// Retrieves a pipeline based on ID. |
||||
// |
||||
// Caller must have READ permission to the project. |
||||
rpc GetPipeline(GetPipelineRequest) returns (Pipeline) { |
||||
option (google.api.http) = { get: "/v1alpha2/pipelines/{pipeline_id}" }; |
||||
} |
||||
|
||||
// Lists pipelines. |
||||
// |
||||
// Caller must have READ permission to the project. |
||||
rpc ListPipelines(ListPipelinesRequest) returns (ListPipelinesResponse) { |
||||
option (google.api.http) = { get: "/v1alpha2/pipelines" }; |
||||
} |
||||
|
||||
// Deletes a pipeline based on ID. |
||||
// |
||||
// Caller must have WRITE permission to the project. |
||||
rpc DeletePipeline(DeletePipelineRequest) returns (google.protobuf.Empty) { |
||||
option (google.api.http) = { delete: "/v1alpha2/pipelines/{pipeline_id}" }; |
||||
} |
||||
|
||||
// Gets controller configuration information. Should only be called |
||||
// by VMs created by the Pipelines Service and not by end users. |
||||
rpc GetControllerConfig(GetControllerConfigRequest) returns (ControllerConfig) { |
||||
option (google.api.http) = { get: "/v1alpha2/pipelines:getControllerConfig" }; |
||||
} |
||||
|
||||
// Sets status of a given operation. All timestamps are sent on each |
||||
// call, and the whole series of events is replaced, in case |
||||
// intermediate calls are lost. Should only be called by VMs created |
||||
// by the Pipelines Service and not by end users. |
||||
rpc SetOperationStatus(SetOperationStatusRequest) returns (google.protobuf.Empty) { |
||||
option (google.api.http) = { put: "/v1alpha2/pipelines:setOperationStatus" body: "*" }; |
||||
} |
||||
} |
||||
|
||||
// Describes a GCE resource that is being managed by a running |
||||
// [pipeline][google.genomics.v1alpha2.Pipeline]. |
||||
message GCE { |
||||
// The instance on which the operation is running. |
||||
string instance_name = 1; |
||||
|
||||
// The availability zone in which the instance resides. |
||||
string zone = 2; |
||||
|
||||
// The machine type of the instance. |
||||
string machine_type = 3; |
||||
|
||||
// The names of the disks that were created for this pipeline. |
||||
repeated string disk_names = 4; |
||||
} |
||||
|
||||
// Runtime metadata that will be populated in the |
||||
// [runtimeMetadata][google.genomics.v1.OperationMetadata.runtime_metadata] |
||||
// field of the Operation associated with a RunPipeline execution. |
||||
message RuntimeMetadata { |
||||
// Execution information specific to Google Compute Engine. |
||||
GCE gce = 1; |
||||
} |
||||
|
||||
// The pipeline object. Represents a transformation from a set of input |
||||
// parameters to a set of output parameters. The transformation is defined |
||||
// as a docker image and command to run within that image. Each pipeline |
||||
// is run on a Google Compute Engine VM. A pipeline can be created with the |
||||
// `create` method and then later run with the `run` method, or a pipeline can |
||||
// be defined and run all at once with the `run` method. |
||||
message Pipeline { |
||||
// Required. The project in which to create the pipeline. The caller must have |
||||
// WRITE access. |
||||
string project_id = 1; |
||||
|
||||
// Required. A user specified pipeline name that does not have to be unique. |
||||
// This name can be used for filtering Pipelines in ListPipelines. |
||||
string name = 2; |
||||
|
||||
// User-specified description. |
||||
string description = 3; |
||||
|
||||
// Input parameters of the pipeline. |
||||
repeated PipelineParameter input_parameters = 8; |
||||
|
||||
// Output parameters of the pipeline. |
||||
repeated PipelineParameter output_parameters = 9; |
||||
|
||||
// Required. The executor indicates in which environment the pipeline runs. |
||||
oneof executor { |
||||
// Specifies the docker run information. |
||||
DockerExecutor docker = 5; |
||||
} |
||||
|
||||
// Required. Specifies resource requirements for the pipeline run. |
||||
// Required fields: |
||||
// |
||||
// * |
||||
// [minimumCpuCores][google.genomics.v1alpha2.PipelineResources.minimum_cpu_cores] |
||||
// |
||||
// * |
||||
// [minimumRamGb][google.genomics.v1alpha2.PipelineResources.minimum_ram_gb] |
||||
PipelineResources resources = 6; |
||||
|
||||
// Unique pipeline id that is generated by the service when CreatePipeline |
||||
// is called. Cannot be specified in the Pipeline used in the |
||||
// CreatePipelineRequest, and will be populated in the response to |
||||
// CreatePipeline and all subsequent Get and List calls. Indicates that the |
||||
// service has registered this pipeline. |
||||
string pipeline_id = 7; |
||||
} |
||||
|
||||
// The request to create a pipeline. The pipeline field here should not have |
||||
// `pipelineId` populated, as that will be populated by the server. |
||||
message CreatePipelineRequest { |
||||
// The pipeline to create. Should not have `pipelineId` populated. |
||||
Pipeline pipeline = 1; |
||||
} |
||||
|
||||
// The pipeline run arguments. |
||||
message RunPipelineArgs { |
||||
// Required. The project in which to run the pipeline. The caller must have |
||||
// WRITER access to all Google Cloud services and resources (e.g. Google |
||||
// Compute Engine) will be used. |
||||
string project_id = 1; |
||||
|
||||
// Pipeline input arguments; keys are defined in the pipeline documentation. |
||||
// All input parameters that do not have default values must be specified. |
||||
// If parameters with defaults are specified here, the defaults will be |
||||
// overridden. |
||||
map<string, string> inputs = 2; |
||||
|
||||
// Pipeline output arguments; keys are defined in the pipeline |
||||
// documentation. All output parameters of without default values |
||||
// must be specified. If parameters with defaults are specified |
||||
// here, the defaults will be overridden. |
||||
map<string, string> outputs = 3; |
||||
|
||||
// The Google Cloud Service Account that will be used to access data and |
||||
// services. By default, the compute service account associated with |
||||
// `projectId` is used. |
||||
ServiceAccount service_account = 4; |
||||
|
||||
// Client-specified pipeline operation identifier. |
||||
string client_id = 5; |
||||
|
||||
// Specifies resource requirements/overrides for the pipeline run. |
||||
PipelineResources resources = 6; |
||||
|
||||
// Required. Logging options. Used by the service to communicate results |
||||
// to the user. |
||||
LoggingOptions logging = 7; |
||||
} |
||||
|
||||
// The request to run a pipeline. If `pipelineId` is specified, it |
||||
// refers to a saved pipeline created with CreatePipeline and set as |
||||
// the `pipelineId` of the returned Pipeline object. If |
||||
// `ephemeralPipeline` is specified, that pipeline is run once |
||||
// with the given args and not saved. It is an error to specify both |
||||
// `pipelineId` and `ephemeralPipeline`. `pipelineArgs` |
||||
// must be specified. |
||||
message RunPipelineRequest { |
||||
oneof pipeline { |
||||
// The already created pipeline to run. |
||||
string pipeline_id = 1; |
||||
|
||||
// A new pipeline object to run once and then delete. |
||||
Pipeline ephemeral_pipeline = 2; |
||||
} |
||||
|
||||
// The arguments to use when running this pipeline. |
||||
RunPipelineArgs pipeline_args = 3; |
||||
} |
||||
|
||||
// A request to get a saved pipeline by id. |
||||
message GetPipelineRequest { |
||||
// Caller must have READ access to the project in which this pipeline |
||||
// is defined. |
||||
string pipeline_id = 1; |
||||
} |
||||
|
||||
// A request to list pipelines in a given project. Pipelines can be |
||||
// filtered by name using `namePrefix`: all pipelines with names that |
||||
// begin with `namePrefix` will be returned. Uses standard pagination: |
||||
// `pageSize` indicates how many pipelines to return, and |
||||
// `pageToken` comes from a previous ListPipelinesResponse to |
||||
// indicate offset. |
||||
message ListPipelinesRequest { |
||||
// Required. The name of the project to search for pipelines. Caller |
||||
// must have READ access to this project. |
||||
string project_id = 1; |
||||
|
||||
// Pipelines with names that match this prefix should be |
||||
// returned. If unspecified, all pipelines in the project, up to |
||||
// `pageSize`, will be returned. |
||||
string name_prefix = 2; |
||||
|
||||
// Number of pipelines to return at once. Defaults to 256, and max |
||||
// is 2048. |
||||
int32 page_size = 3; |
||||
|
||||
// Token to use to indicate where to start getting results. |
||||
// If unspecified, returns the first page of results. |
||||
string page_token = 4; |
||||
} |
||||
|
||||
// The response of ListPipelines. Contains at most `pageSize` |
||||
// pipelines. If it contains `pageSize` pipelines, and more pipelines |
||||
// exist, then `nextPageToken` will be populated and should be |
||||
// used as the `pageToken` argument to a subsequent ListPipelines |
||||
// request. |
||||
message ListPipelinesResponse { |
||||
// The matched pipelines. |
||||
repeated Pipeline pipelines = 1; |
||||
|
||||
// The token to use to get the next page of results. |
||||
string next_page_token = 2; |
||||
} |
||||
|
||||
// The request to delete a saved pipeline by ID. |
||||
message DeletePipelineRequest { |
||||
// Caller must have WRITE access to the project in which this pipeline |
||||
// is defined. |
||||
string pipeline_id = 1; |
||||
} |
||||
|
||||
// Request to get controller configuation. Should only be used |
||||
// by VMs created by the Pipelines Service and not by end users. |
||||
message GetControllerConfigRequest { |
||||
// The operation to retrieve controller configuration for. |
||||
string operation_id = 1; |
||||
|
||||
uint64 validation_token = 2; |
||||
} |
||||
|
||||
// Stores the information that the controller will fetch from the |
||||
// server in order to run. Should only be used by VMs created by the |
||||
// Pipelines Service and not by end users. |
||||
message ControllerConfig { |
||||
message RepeatedString { |
||||
repeated string values = 1; |
||||
} |
||||
|
||||
string image = 1; |
||||
|
||||
string cmd = 2; |
||||
|
||||
string gcs_log_path = 3; |
||||
|
||||
string machine_type = 4; |
||||
|
||||
map<string, string> vars = 5; |
||||
|
||||
map<string, string> disks = 6; |
||||
|
||||
map<string, RepeatedString> gcs_sources = 7; |
||||
|
||||
map<string, RepeatedString> gcs_sinks = 8; |
||||
} |
||||
|
||||
// Stores the list of events and times they occured for major events in job |
||||
// execution. |
||||
message TimestampEvent { |
||||
// String indicating the type of event |
||||
string description = 1; |
||||
|
||||
// The time this event occured. |
||||
google.protobuf.Timestamp timestamp = 2; |
||||
} |
||||
|
||||
// Request to set operation status. Should only be used by VMs |
||||
// created by the Pipelines Service and not by end users. |
||||
message SetOperationStatusRequest { |
||||
string operation_id = 1; |
||||
|
||||
repeated TimestampEvent timestamp_events = 2; |
||||
|
||||
google.rpc.Code error_code = 3; |
||||
|
||||
string error_message = 4; |
||||
|
||||
uint64 validation_token = 5; |
||||
} |
||||
|
||||
// A Google Cloud Service Account. |
||||
message ServiceAccount { |
||||
// Email address of the service account. Defaults to `default`, |
||||
// which uses the compute service account associated with the project. |
||||
string email = 1; |
||||
|
||||
// List of scopes to be enabled for this service account on the |
||||
// pipeline virtual machine. |
||||
// The following scopes are automatically included: |
||||
// * https://www.googleapis.com/auth/genomics |
||||
// * https://www.googleapis.com/auth/compute |
||||
// * https://www.googleapis.com/auth/devstorage.full_control |
||||
repeated string scopes = 2; |
||||
} |
||||
|
||||
// The logging options for the pipeline run. |
||||
message LoggingOptions { |
||||
// The location in Google Cloud Storage to which the pipeline logs |
||||
// will be copied. Can be specified as a fully qualified directory |
||||
// path, in which case logs will be output with a unique identifier |
||||
// as the filename in that directory, or as a fully specified path, |
||||
// which must end in `.log`, in which case that path will be |
||||
// used, and the user must ensure that logs are not |
||||
// overwritten. Stdout and stderr logs from the run are also |
||||
// generated and output as `-stdout.log` and `-stderr.log`. |
||||
string gcs_path = 1; |
||||
} |
||||
|
||||
// The system resources for the pipeline run. |
||||
message PipelineResources { |
||||
// A Google Compute Engine disk resource specification. |
||||
message Disk { |
||||
// The types of disks that may be attached to VMs. |
||||
enum Type { |
||||
// Default disk type. Use one of the other options below. |
||||
TYPE_UNSPECIFIED = 0; |
||||
|
||||
// Specifies a Google Compute Engine persistent hard disk. See |
||||
// https://cloud.google.com/compute/docs/disks/persistent-disks#typeofdisks |
||||
// for details. |
||||
PERSISTENT_HDD = 1; |
||||
|
||||
// Specifies a Google Compute Engine persistent solid-state disk. See |
||||
// https://cloud.google.com/compute/docs/disks/persistent-disks#typeofdisks |
||||
// for details. |
||||
PERSISTENT_SSD = 2; |
||||
|
||||
// Specifies a Google Compute Engine local SSD. |
||||
// See https://cloud.google.com/compute/docs/disks/local-ssd for details. |
||||
LOCAL_SSD = 3; |
||||
} |
||||
|
||||
// Required. The name of the disk that can be used in the pipeline |
||||
// parameters. Must be 1 - 63 characters. |
||||
// The name "boot" is reserved for system use. |
||||
string name = 1; |
||||
|
||||
// Required. The type of the disk to create. |
||||
Type type = 2; |
||||
|
||||
// The size of the disk. Defaults to 500 (GB). |
||||
// This field is not applicable for local SSD. |
||||
int32 size_gb = 3; |
||||
|
||||
// The full or partial URL of the persistent disk to attach. See |
||||
// https://cloud.google.com/compute/docs/reference/latest/instances#resource |
||||
// and |
||||
// https://cloud.google.com/compute/docs/disks/persistent-disks#snapshots |
||||
// for more details. |
||||
string source = 4; |
||||
|
||||
// Specifies whether or not to delete the disk when the pipeline |
||||
// completes. This field is applicable only for newly created disks. See |
||||
// https://cloud.google.com/compute/docs/reference/latest/instances#resource |
||||
// for more details. |
||||
// By default, `autoDelete` is `false`. `autoDelete` will be enabled if set |
||||
// to `true` at create time or run time. |
||||
bool auto_delete = 6; |
||||
|
||||
// Specifies how a sourced-base persistent disk will be mounted. See |
||||
// https://cloud.google.com/compute/docs/disks/persistent-disks#use_multi_instances |
||||
// for more details. |
||||
// Can only be set at create time. |
||||
bool read_only = 7; |
||||
|
||||
// Required at create time and cannot be overridden at run time. |
||||
// Specifies the path in the docker container where files on |
||||
// this disk should be located. For example, if `mountPoint` |
||||
// is `/mnt/disk`, and the parameter has `localPath` |
||||
// `inputs/file.txt`, the docker container can access the data at |
||||
// `/mnt/disk/inputs/file.txt`. |
||||
string mount_point = 8; |
||||
} |
||||
|
||||
// The minimum number of cores to use. Defaults to 1. |
||||
int32 minimum_cpu_cores = 1; |
||||
|
||||
// At create time means that preemptible machines may be |
||||
// used for the run. At run time, means they should be used. Cannot |
||||
// be true at run time if false at create time. |
||||
// Defaults to `false`. |
||||
bool preemptible = 2; |
||||
|
||||
// The minimum amount of RAM to use. Defaults to 3.75 (GB) |
||||
double minimum_ram_gb = 3; |
||||
|
||||
// Disks to attach. |
||||
repeated Disk disks = 4; |
||||
|
||||
// List of Google Compute Engine availability zones to which resource |
||||
// creation will restricted. If empty, any zone may be chosen. |
||||
repeated string zones = 5; |
||||
|
||||
// The size of the boot disk. Defaults to 10 (GB). |
||||
int32 boot_disk_size_gb = 6; |
||||
} |
||||
|
||||
// Parameters facilitate setting and delivering data into the |
||||
// pipeline's execution environment. They are defined at create time, |
||||
// with optional defaults, and can be overridden at run time. |
||||
// |
||||
// If `localCopy` is unset, then the parameter specifies a string that |
||||
// is passed as-is into the pipeline, as the value of the environment |
||||
// variable with the given name. A default value can be optionally |
||||
// specified at create time. The default can be overridden at run time |
||||
// using the inputs map. If no default is given, a value must be |
||||
// supplied at runtime. |
||||
// |
||||
// If `localCopy` is defined, then the parameter specifies a data |
||||
// source or sink, both in Google Cloud Storage and on the Docker container |
||||
// where the pipeline computation is run. The [service account associated with |
||||
// the Pipeline][google.genomics.v1alpha2.RunPipelineArgs.service_account] (by |
||||
// default the project's Compute Engine service account) must have access to the |
||||
// Google Cloud Storage paths. |
||||
// |
||||
// At run time, the Google Cloud Storage paths can be overridden if a default |
||||
// was provided at create time, or must be set otherwise. The pipeline runner |
||||
// should add a key/value pair to either the inputs or outputs map. The |
||||
// indicated data copies will be carried out before/after pipeline execution, |
||||
// just as if the corresponding arguments were provided to `gsutil cp`. |
||||
// |
||||
// For example: Given the following `PipelineParameter`, specified |
||||
// in the `inputParameters` list: |
||||
// |
||||
// ``` |
||||
// {name: "input_file", localCopy: {path: "file.txt", disk: "pd1"}} |
||||
// ``` |
||||
// |
||||
// where `disk` is defined in the `PipelineResources` object as: |
||||
// |
||||
// ``` |
||||
// {name: "pd1", mountPoint: "/mnt/disk/"} |
||||
// ``` |
||||
// |
||||
// We create a disk named `pd1`, mount it on the host VM, and map |
||||
// `/mnt/pd1` to `/mnt/disk` in the docker container. At |
||||
// runtime, an entry for `input_file` would be required in the inputs |
||||
// map, such as: |
||||
// |
||||
// ``` |
||||
// inputs["input_file"] = "gs://my-bucket/bar.txt" |
||||
// ``` |
||||
// |
||||
// This would generate the following gsutil call: |
||||
// |
||||
// ``` |
||||
// gsutil cp gs://my-bucket/bar.txt /mnt/pd1/file.txt |
||||
// ``` |
||||
// |
||||
// The file `/mnt/pd1/file.txt` maps to `/mnt/disk/file.txt` in the |
||||
// Docker container. Acceptable paths are: |
||||
// |
||||
// <table> |
||||
// <thead> |
||||
// <tr><th>Google Cloud storage path</th><th>Local path</th></tr> |
||||
// </thead> |
||||
// <tbody> |
||||
// <tr><td>file</td><td>file</td></tr> |
||||
// <tr><td>glob</td><td>directory</td></tr> |
||||
// </tbody> |
||||
// </table> |
||||
// |
||||
// For outputs, the direction of the copy is reversed: |
||||
// |
||||
// ``` |
||||
// gsutil cp /mnt/disk/file.txt gs://my-bucket/bar.txt |
||||
// ``` |
||||
// |
||||
// Acceptable paths are: |
||||
// |
||||
// <table> |
||||
// <thead> |
||||
// <tr><th>Local path</th><th>Google Cloud Storage path</th></tr> |
||||
// </thead> |
||||
// <tbody> |
||||
// <tr><td>file</td><td>file</td></tr> |
||||
// <tr> |
||||
// <td>file</td> |
||||
// <td>directory - directory must already exist</td> |
||||
// </tr> |
||||
// <tr> |
||||
// <td>glob</td> |
||||
// <td>directory - directory will be created if it doesn't exist</td></tr> |
||||
// </tbody> |
||||
// </table> |
||||
// |
||||
// One restriction due to docker limitations, is that for outputs that are found |
||||
// on the boot disk, the local path cannot be a glob and must be a file. |
||||
message PipelineParameter { |
||||
// LocalCopy defines how a remote file should be copied to and from the VM. |
||||
message LocalCopy { |
||||
// Required. The path within the user's docker container where |
||||
// this input should be localized to and from, relative to the specified |
||||
// disk's mount point. For example: file.txt, |
||||
string path = 1; |
||||
|
||||
// Required. The name of the disk where this parameter is |
||||
// located. Can be the name of one of the disks specified in the |
||||
// Resources field, or "boot", which represents the Docker |
||||
// instance's boot disk and has a mount point of `/`. |
||||
string disk = 2; |
||||
} |
||||
|
||||
// Required. Name of the parameter - the pipeline runner uses this string |
||||
// as the key to the input and output maps in RunPipeline. |
||||
string name = 1; |
||||
|
||||
// Human-readable description. |
||||
string description = 2; |
||||
|
||||
// The default value for this parameter. Can be overridden at runtime. |
||||
// If `localCopy` is present, then this must be a Google Cloud Storage path |
||||
// beginning with `gs://`. |
||||
string default_value = 5; |
||||
|
||||
// If present, this parameter is marked for copying to and from the VM. |
||||
// `LocalCopy` indicates where on the VM the file should be. The value |
||||
// given to this parameter (either at runtime or using `defaultValue`) |
||||
// must be the remote path where the file should be. |
||||
LocalCopy local_copy = 6; |
||||
} |
||||
|
||||
// The Docker execuctor specification. |
||||
message DockerExecutor { |
||||
// Required. Image name from either Docker Hub or Google Container Repository. |
||||
// Users that run pipelines must have READ access to the image. |
||||
string image_name = 1; |
||||
|
||||
// Required. The command string to run. Parameters that do not have |
||||
// `localCopy` specified should be used as environment variables, while |
||||
// those that do can be accessed at the defined paths. |
||||
string cmd = 2; |
||||
} |
Loading…
Reference in new issue