parent
b042947963
commit
7e973fab0e
14 changed files with 3741 additions and 0 deletions
@ -0,0 +1,14 @@ |
|||||||
|
Stores, processes, explores and shares genomic data. This API implements |
||||||
|
the Global Alliance for Genomics and Health (GA4GH) v0.5.1 API as well as |
||||||
|
several extensions. |
||||||
|
|
||||||
|
The Google Genomics API supports access via both |
||||||
|
[JSON/REST](https://cloud.google.com/genomics/reference/rest) and |
||||||
|
[gRPC](https://cloud.google.com/genomics/reference/rpc). JSON/REST is more |
||||||
|
broadly available and is easier for getting started with Google Genomics; it |
||||||
|
works well for small metadata resources (datasets, variant sets, read group |
||||||
|
sets) and for browsing small genomic regions for datasets of any size. For |
||||||
|
performant bulk data access (reads and variants), use gRPC. |
||||||
|
|
||||||
|
See also an [overview of genomic resources](https://cloud.google.com/genomics/v1/users-guide) |
||||||
|
and an overview of [Genomics on Google Cloud](https://cloud.google.com/genomics/overview). |
@ -0,0 +1,662 @@ |
|||||||
|
// Copyright 2016 Google Inc. |
||||||
|
// |
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
// you may not use this file except in compliance with the License. |
||||||
|
// You may obtain a copy of the License at |
||||||
|
// |
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
// |
||||||
|
// Unless required by applicable law or agreed to in writing, software |
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
// See the License for the specific language governing permissions and |
||||||
|
// limitations under the License. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
|
||||||
|
package google.genomics.v1; |
||||||
|
|
||||||
|
import "google/api/annotations.proto"; |
||||||
|
import "google/protobuf/empty.proto"; |
||||||
|
import "google/protobuf/field_mask.proto"; |
||||||
|
import "google/protobuf/struct.proto"; |
||||||
|
import "google/protobuf/wrappers.proto"; |
||||||
|
import "google/rpc/status.proto"; |
||||||
|
|
||||||
|
option cc_enable_arenas = true; |
||||||
|
option java_multiple_files = true; |
||||||
|
option java_outer_classname = "AnnotationsProto"; |
||||||
|
option java_package = "com.google.genomics.v1"; |
||||||
|
|
||||||
|
|
||||||
|
// This service provides storage and positional retrieval of genomic |
||||||
|
// reference annotations, including variant annotations. |
||||||
|
service AnnotationServiceV1 { |
||||||
|
// Creates a new annotation set. Caller must have WRITE permission for the |
||||||
|
// associated dataset. |
||||||
|
// |
||||||
|
// The following fields are required: |
||||||
|
// |
||||||
|
// * [datasetId][google.genomics.v1.AnnotationSet.dataset_id] |
||||||
|
// * [referenceSetId][google.genomics.v1.AnnotationSet.reference_set_id] |
||||||
|
// |
||||||
|
// All other fields may be optionally specified, unless documented as being |
||||||
|
// server-generated (for example, the `id` field). |
||||||
|
rpc CreateAnnotationSet(CreateAnnotationSetRequest) returns (AnnotationSet) { |
||||||
|
option (google.api.http) = { post: "/v1/annotationsets" body: "annotation_set" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Gets an annotation set. Caller must have READ permission for |
||||||
|
// the associated dataset. |
||||||
|
rpc GetAnnotationSet(GetAnnotationSetRequest) returns (AnnotationSet) { |
||||||
|
option (google.api.http) = { get: "/v1/annotationsets/{annotation_set_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Updates an annotation set. The update must respect all mutability |
||||||
|
// restrictions and other invariants described on the annotation set resource. |
||||||
|
// Caller must have WRITE permission for the associated dataset. |
||||||
|
rpc UpdateAnnotationSet(UpdateAnnotationSetRequest) returns (AnnotationSet) { |
||||||
|
option (google.api.http) = { put: "/v1/annotationsets/{annotation_set_id}" body: "annotation_set" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Deletes an annotation set. Caller must have WRITE permission |
||||||
|
// for the associated annotation set. |
||||||
|
rpc DeleteAnnotationSet(DeleteAnnotationSetRequest) returns (google.protobuf.Empty) { |
||||||
|
option (google.api.http) = { delete: "/v1/annotationsets/{annotation_set_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Searches for annotation sets that match the given criteria. Annotation sets |
||||||
|
// are returned in an unspecified order. This order is consistent, such that |
||||||
|
// two queries for the same content (regardless of page size) yield annotation |
||||||
|
// sets in the same order across their respective streams of paginated |
||||||
|
// responses. Caller must have READ permission for the queried datasets. |
||||||
|
rpc SearchAnnotationSets(SearchAnnotationSetsRequest) returns (SearchAnnotationSetsResponse) { |
||||||
|
option (google.api.http) = { post: "/v1/annotationsets/search" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Creates a new annotation. Caller must have WRITE permission |
||||||
|
// for the associated annotation set. |
||||||
|
// |
||||||
|
// The following fields are required: |
||||||
|
// |
||||||
|
// * [annotationSetId][google.genomics.v1.Annotation.annotation_set_id] |
||||||
|
// * [referenceName][google.genomics.v1.Annotation.reference_name] or |
||||||
|
// [referenceId][google.genomics.v1.Annotation.reference_id] |
||||||
|
// |
||||||
|
// ### Transcripts |
||||||
|
// |
||||||
|
// For annotations of type TRANSCRIPT, the following fields of |
||||||
|
// [transcript][google.genomics.v1.Annotation.transcript] must be provided: |
||||||
|
// |
||||||
|
// * [exons.start][google.genomics.v1.Transcript.Exon.start] |
||||||
|
// * [exons.end][google.genomics.v1.Transcript.Exon.end] |
||||||
|
// |
||||||
|
// All other fields may be optionally specified, unless documented as being |
||||||
|
// server-generated (for example, the `id` field). The annotated |
||||||
|
// range must be no longer than 100Mbp (mega base pairs). See the |
||||||
|
// [Annotation resource][google.genomics.v1.Annotation] |
||||||
|
// for additional restrictions on each field. |
||||||
|
rpc CreateAnnotation(CreateAnnotationRequest) returns (Annotation) { |
||||||
|
option (google.api.http) = { post: "/v1/annotations" body: "annotation" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Creates one or more new annotations atomically. All annotations must |
||||||
|
// belong to the same annotation set. Caller must have WRITE |
||||||
|
// permission for this annotation set. For optimal performance, batch |
||||||
|
// positionally adjacent annotations together. |
||||||
|
// |
||||||
|
// If the request has a systemic issue, such as an attempt to write to |
||||||
|
// an inaccessible annotation set, the entire RPC will fail accordingly. For |
||||||
|
// lesser data issues, when possible an error will be isolated to the |
||||||
|
// corresponding batch entry in the response; the remaining well formed |
||||||
|
// annotations will be created normally. |
||||||
|
// |
||||||
|
// For details on the requirements for each individual annotation resource, |
||||||
|
// see |
||||||
|
// [CreateAnnotation][google.genomics.v1.AnnotationServiceV1.CreateAnnotation]. |
||||||
|
rpc BatchCreateAnnotations(BatchCreateAnnotationsRequest) returns (BatchCreateAnnotationsResponse) { |
||||||
|
option (google.api.http) = { post: "/v1/annotations:batchCreate" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Gets an annotation. Caller must have READ permission |
||||||
|
// for the associated annotation set. |
||||||
|
rpc GetAnnotation(GetAnnotationRequest) returns (Annotation) { |
||||||
|
option (google.api.http) = { get: "/v1/annotations/{annotation_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Updates an annotation. Caller must have |
||||||
|
// WRITE permission for the associated dataset. |
||||||
|
rpc UpdateAnnotation(UpdateAnnotationRequest) returns (Annotation) { |
||||||
|
option (google.api.http) = { put: "/v1/annotations/{annotation_id}" body: "annotation" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Deletes an annotation. Caller must have WRITE permission for |
||||||
|
// the associated annotation set. |
||||||
|
rpc DeleteAnnotation(DeleteAnnotationRequest) returns (google.protobuf.Empty) { |
||||||
|
option (google.api.http) = { delete: "/v1/annotations/{annotation_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Searches for annotations that match the given criteria. Results are |
||||||
|
// ordered by genomic coordinate (by reference sequence, then position). |
||||||
|
// Annotations with equivalent genomic coordinates are returned in an |
||||||
|
// unspecified order. This order is consistent, such that two queries for the |
||||||
|
// same content (regardless of page size) yield annotations in the same order |
||||||
|
// across their respective streams of paginated responses. Caller must have |
||||||
|
// READ permission for the queried annotation sets. |
||||||
|
rpc SearchAnnotations(SearchAnnotationsRequest) returns (SearchAnnotationsResponse) { |
||||||
|
option (google.api.http) = { post: "/v1/annotations/search" body: "*" }; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// An annotation set is a logical grouping of annotations that share consistent |
||||||
|
// type information and provenance. Examples of annotation sets include 'all |
||||||
|
// genes from refseq', and 'all variant annotations from ClinVar'. |
||||||
|
message AnnotationSet { |
||||||
|
// The server-generated annotation set ID, unique across all annotation sets. |
||||||
|
string id = 1; |
||||||
|
|
||||||
|
// The dataset to which this annotation set belongs. |
||||||
|
string dataset_id = 2; |
||||||
|
|
||||||
|
// The ID of the reference set that defines the coordinate space for this |
||||||
|
// set's annotations. |
||||||
|
string reference_set_id = 3; |
||||||
|
|
||||||
|
// The display name for this annotation set. |
||||||
|
string name = 4; |
||||||
|
|
||||||
|
// The source URI describing the file from which this annotation set was |
||||||
|
// generated, if any. |
||||||
|
string source_uri = 5; |
||||||
|
|
||||||
|
// The type of annotations contained within this set. |
||||||
|
AnnotationType type = 6; |
||||||
|
|
||||||
|
// A map of additional read alignment information. This must be of the form |
||||||
|
// map<string, string[]> (string key mapping to a list of string values). |
||||||
|
map<string, google.protobuf.ListValue> info = 17; |
||||||
|
} |
||||||
|
|
||||||
|
// An annotation describes a region of reference genome. The value of an |
||||||
|
// annotation may be one of several canonical types, supplemented by arbitrary |
||||||
|
// info tags. An annotation is not inherently associated with a specific |
||||||
|
// sample or individual (though a client could choose to use annotations in |
||||||
|
// this way). Example canonical annotation types are `GENE` and |
||||||
|
// `VARIANT`. |
||||||
|
message Annotation { |
||||||
|
// The server-generated annotation ID, unique across all annotations. |
||||||
|
string id = 1; |
||||||
|
|
||||||
|
// The annotation set to which this annotation belongs. |
||||||
|
string annotation_set_id = 2; |
||||||
|
|
||||||
|
// The display name of this annotation. |
||||||
|
string name = 3; |
||||||
|
|
||||||
|
// The ID of the Google Genomics reference associated with this range. |
||||||
|
string reference_id = 4; |
||||||
|
|
||||||
|
// The display name corresponding to the reference specified by |
||||||
|
// `referenceId`, for example `chr1`, `1`, or `chrX`. |
||||||
|
string reference_name = 5; |
||||||
|
|
||||||
|
// The start position of the range on the reference, 0-based inclusive. |
||||||
|
int64 start = 6; |
||||||
|
|
||||||
|
// The end position of the range on the reference, 0-based exclusive. |
||||||
|
int64 end = 7; |
||||||
|
|
||||||
|
// Whether this range refers to the reverse strand, as opposed to the forward |
||||||
|
// strand. Note that regardless of this field, the start/end position of the |
||||||
|
// range always refer to the forward strand. |
||||||
|
bool reverse_strand = 8; |
||||||
|
|
||||||
|
// The data type for this annotation. Must match the containing annotation |
||||||
|
// set's type. |
||||||
|
AnnotationType type = 9; |
||||||
|
|
||||||
|
oneof value { |
||||||
|
// A variant annotation, which describes the effect of a variant on the |
||||||
|
// genome, the coding sequence, and/or higher level consequences at the |
||||||
|
// organism level e.g. pathogenicity. This field is only set for annotations |
||||||
|
// of type `VARIANT`. |
||||||
|
VariantAnnotation variant = 10; |
||||||
|
|
||||||
|
// A transcript value represents the assertion that a particular region of |
||||||
|
// the reference genome may be transcribed as RNA. An alternative splicing |
||||||
|
// pattern would be represented as a separate transcript object. This field |
||||||
|
// is only set for annotations of type `TRANSCRIPT`. |
||||||
|
Transcript transcript = 11; |
||||||
|
} |
||||||
|
|
||||||
|
// A map of additional read alignment information. This must be of the form |
||||||
|
// map<string, string[]> (string key mapping to a list of string values). |
||||||
|
map<string, google.protobuf.ListValue> info = 12; |
||||||
|
} |
||||||
|
|
||||||
|
message VariantAnnotation { |
||||||
|
message ClinicalCondition { |
||||||
|
// A set of names for the condition. |
||||||
|
repeated string names = 1; |
||||||
|
|
||||||
|
// The set of external IDs for this condition. |
||||||
|
repeated ExternalId external_ids = 2; |
||||||
|
|
||||||
|
// The MedGen concept id associated with this gene. |
||||||
|
// Search for these IDs at http://www.ncbi.nlm.nih.gov/medgen/ |
||||||
|
string concept_id = 3; |
||||||
|
|
||||||
|
// The OMIM id for this condition. |
||||||
|
// Search for these IDs at http://omim.org/ |
||||||
|
string omim_id = 4; |
||||||
|
} |
||||||
|
|
||||||
|
enum Type { |
||||||
|
TYPE_UNSPECIFIED = 0; |
||||||
|
|
||||||
|
// `TYPE_OTHER` should be used when no other Type will suffice. |
||||||
|
// Further explanation of the variant type may be included in the |
||||||
|
// [info][google.genomics.v1.Annotation.info] field. |
||||||
|
TYPE_OTHER = 1; |
||||||
|
|
||||||
|
// `INSERTION` indicates an insertion. |
||||||
|
INSERTION = 2; |
||||||
|
|
||||||
|
// `DELETION` indicates a deletion. |
||||||
|
DELETION = 3; |
||||||
|
|
||||||
|
// `SUBSTITUTION` indicates a block substitution of |
||||||
|
// two or more nucleotides. |
||||||
|
SUBSTITUTION = 4; |
||||||
|
|
||||||
|
// `SNP` indicates a single nucleotide polymorphism. |
||||||
|
SNP = 5; |
||||||
|
|
||||||
|
// `STRUCTURAL` indicates a large structural variant, |
||||||
|
// including chromosomal fusions, inversions, etc. |
||||||
|
STRUCTURAL = 6; |
||||||
|
|
||||||
|
// `CNV` indicates a variation in copy number. |
||||||
|
CNV = 7; |
||||||
|
} |
||||||
|
|
||||||
|
enum Effect { |
||||||
|
EFFECT_UNSPECIFIED = 0; |
||||||
|
|
||||||
|
// `EFFECT_OTHER` should be used when no other Effect |
||||||
|
// will suffice. |
||||||
|
EFFECT_OTHER = 1; |
||||||
|
|
||||||
|
// `FRAMESHIFT` indicates a mutation in which the insertion or |
||||||
|
// deletion of nucleotides resulted in a frameshift change. |
||||||
|
FRAMESHIFT = 2; |
||||||
|
|
||||||
|
// `FRAME_PRESERVING_INDEL` indicates a mutation in which a |
||||||
|
// multiple of three nucleotides has been inserted or deleted, resulting |
||||||
|
// in no change to the reading frame of the coding sequence. |
||||||
|
FRAME_PRESERVING_INDEL = 3; |
||||||
|
|
||||||
|
// `SYNONYMOUS_SNP` indicates a single nucleotide polymorphism |
||||||
|
// mutation that results in no amino acid change. |
||||||
|
SYNONYMOUS_SNP = 4; |
||||||
|
|
||||||
|
// `NONSYNONYMOUS_SNP` indicates a single nucleotide |
||||||
|
// polymorphism mutation that results in an amino acid change. |
||||||
|
NONSYNONYMOUS_SNP = 5; |
||||||
|
|
||||||
|
// `STOP_GAIN` indicates a mutation that leads to the creation |
||||||
|
// of a stop codon at the variant site. Frameshift mutations creating |
||||||
|
// downstream stop codons do not count as `STOP_GAIN`. |
||||||
|
STOP_GAIN = 6; |
||||||
|
|
||||||
|
// `STOP_LOSS` indicates a mutation that eliminates a |
||||||
|
// stop codon at the variant site. |
||||||
|
STOP_LOSS = 7; |
||||||
|
|
||||||
|
// `SPLICE_SITE_DISRUPTION` indicates that this variant is |
||||||
|
// found in a splice site for the associated transcript, and alters the |
||||||
|
// normal splicing pattern. |
||||||
|
SPLICE_SITE_DISRUPTION = 8; |
||||||
|
} |
||||||
|
|
||||||
|
enum ClinicalSignificance { |
||||||
|
CLINICAL_SIGNIFICANCE_UNSPECIFIED = 0; |
||||||
|
|
||||||
|
// `OTHER` should be used when no other clinical significance |
||||||
|
// value will suffice. |
||||||
|
CLINICAL_SIGNIFICANCE_OTHER = 1; |
||||||
|
|
||||||
|
UNCERTAIN = 2; |
||||||
|
|
||||||
|
BENIGN = 3; |
||||||
|
|
||||||
|
LIKELY_BENIGN = 4; |
||||||
|
|
||||||
|
LIKELY_PATHOGENIC = 5; |
||||||
|
|
||||||
|
PATHOGENIC = 6; |
||||||
|
|
||||||
|
DRUG_RESPONSE = 7; |
||||||
|
|
||||||
|
HISTOCOMPATIBILITY = 8; |
||||||
|
|
||||||
|
CONFERS_SENSITIVITY = 9; |
||||||
|
|
||||||
|
RISK_FACTOR = 10; |
||||||
|
|
||||||
|
ASSOCIATION = 11; |
||||||
|
|
||||||
|
PROTECTIVE = 12; |
||||||
|
|
||||||
|
// `MULTIPLE_REPORTED` should be used when multiple clinical |
||||||
|
// signficances are reported for a variant. The original clinical |
||||||
|
// significance values may be provided in the `info` field. |
||||||
|
MULTIPLE_REPORTED = 13; |
||||||
|
} |
||||||
|
|
||||||
|
// Type has been adapted from ClinVar's list of variant types. |
||||||
|
Type type = 1; |
||||||
|
|
||||||
|
// Effect of the variant on the coding sequence. |
||||||
|
Effect effect = 2; |
||||||
|
|
||||||
|
// The alternate allele for this variant. If multiple alternate alleles |
||||||
|
// exist at this location, create a separate variant for each one, as they |
||||||
|
// may represent distinct conditions. |
||||||
|
string alternate_bases = 3; |
||||||
|
|
||||||
|
// Google annotation ID of the gene affected by this variant. This should |
||||||
|
// be provided when the variant is created. |
||||||
|
string gene_id = 4; |
||||||
|
|
||||||
|
// Google annotation IDs of the transcripts affected by this variant. These |
||||||
|
// should be provided when the variant is created. |
||||||
|
repeated string transcript_ids = 5; |
||||||
|
|
||||||
|
// The set of conditions associated with this variant. |
||||||
|
// A condition describes the way a variant influences human health. |
||||||
|
repeated ClinicalCondition conditions = 6; |
||||||
|
|
||||||
|
// Describes the clinical significance of a variant. |
||||||
|
// It is adapted from the ClinVar controlled vocabulary for clinical |
||||||
|
// significance described at: |
||||||
|
// http://www.ncbi.nlm.nih.gov/clinvar/docs/clinsig/ |
||||||
|
ClinicalSignificance clinical_significance = 7; |
||||||
|
} |
||||||
|
|
||||||
|
// A transcript represents the assertion that a particular region of the |
||||||
|
// reference genome may be transcribed as RNA. |
||||||
|
message Transcript { |
||||||
|
message Exon { |
||||||
|
// The start position of the exon on this annotation's reference sequence, |
||||||
|
// 0-based inclusive. Note that this is relative to the reference start, and |
||||||
|
// **not** the containing annotation start. |
||||||
|
int64 start = 1; |
||||||
|
|
||||||
|
// The end position of the exon on this annotation's reference sequence, |
||||||
|
// 0-based exclusive. Note that this is relative to the reference start, and |
||||||
|
// *not* the containing annotation start. |
||||||
|
int64 end = 2; |
||||||
|
|
||||||
|
// The frame of this exon. Contains a value of 0, 1, or 2, which indicates |
||||||
|
// the offset of the first coding base of the exon within the reading frame |
||||||
|
// of the coding DNA sequence, if any. This field is dependent on the |
||||||
|
// strandedness of this annotation (see |
||||||
|
// [Annotation.reverse_strand][google.genomics.v1.Annotation.reverse_strand]). |
||||||
|
// For forward stranded annotations, this offset is relative to the |
||||||
|
// [exon.start][google.genomics.v1.Transcript.Exon.start]. For reverse |
||||||
|
// strand annotations, this offset is relative to the |
||||||
|
// [exon.end][google.genomics.v1.Transcript.Exon.end] `- 1`. |
||||||
|
// |
||||||
|
// Unset if this exon does not intersect the coding sequence. Upon creation |
||||||
|
// of a transcript, the frame must be populated for all or none of the |
||||||
|
// coding exons. |
||||||
|
google.protobuf.Int32Value frame = 3; |
||||||
|
} |
||||||
|
|
||||||
|
message CodingSequence { |
||||||
|
// The start of the coding sequence on this annotation's reference sequence, |
||||||
|
// 0-based inclusive. Note that this position is relative to the reference |
||||||
|
// start, and *not* the containing annotation start. |
||||||
|
int64 start = 1; |
||||||
|
|
||||||
|
// The end of the coding sequence on this annotation's reference sequence, |
||||||
|
// 0-based exclusive. Note that this position is relative to the reference |
||||||
|
// start, and *not* the containing annotation start. |
||||||
|
int64 end = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// The annotation ID of the gene from which this transcript is transcribed. |
||||||
|
string gene_id = 1; |
||||||
|
|
||||||
|
// The <a href="http://en.wikipedia.org/wiki/Exon">exons</a> that compose |
||||||
|
// this transcript. This field should be unset for genomes where transcript |
||||||
|
// splicing does not occur, for example prokaryotes. |
||||||
|
// |
||||||
|
// Introns are regions of the transcript that are not included in the |
||||||
|
// spliced RNA product. Though not explicitly modeled here, intron ranges can |
||||||
|
// be deduced; all regions of this transcript that are not exons are introns. |
||||||
|
// |
||||||
|
// Exonic sequences do not necessarily code for a translational product |
||||||
|
// (amino acids). Only the regions of exons bounded by the |
||||||
|
// [codingSequence][google.genomics.v1.Transcript.coding_sequence] correspond |
||||||
|
// to coding DNA sequence. |
||||||
|
// |
||||||
|
// Exons are ordered by start position and may not overlap. |
||||||
|
repeated Exon exons = 2; |
||||||
|
|
||||||
|
// The range of the coding sequence for this transcript, if any. To determine |
||||||
|
// the exact ranges of coding sequence, intersect this range with those of the |
||||||
|
// [exons][google.genomics.v1.Transcript.exons], if any. If there are any |
||||||
|
// [exons][google.genomics.v1.Transcript.exons], the |
||||||
|
// [codingSequence][google.genomics.v1.Transcript.coding_sequence] must start |
||||||
|
// and end within them. |
||||||
|
// |
||||||
|
// Note that in some cases, the reference genome will not exactly match the |
||||||
|
// observed mRNA transcript e.g. due to variance in the source genome from |
||||||
|
// reference. In these cases, |
||||||
|
// [exon.frame][google.genomics.v1.Transcript.Exon.frame] will not necessarily |
||||||
|
// match the expected reference reading frame and coding exon reference bases |
||||||
|
// cannot necessarily be concatenated to produce the original transcript mRNA. |
||||||
|
CodingSequence coding_sequence = 3; |
||||||
|
} |
||||||
|
|
||||||
|
message ExternalId { |
||||||
|
// The name of the source of this data. |
||||||
|
string source_name = 1; |
||||||
|
|
||||||
|
// The id used by the source of this data. |
||||||
|
string id = 2; |
||||||
|
} |
||||||
|
|
||||||
|
message CreateAnnotationSetRequest { |
||||||
|
// The annotation set to create. |
||||||
|
AnnotationSet annotation_set = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message GetAnnotationSetRequest { |
||||||
|
// The ID of the annotation set to be retrieved. |
||||||
|
string annotation_set_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message UpdateAnnotationSetRequest { |
||||||
|
// The ID of the annotation set to be updated. |
||||||
|
string annotation_set_id = 1; |
||||||
|
|
||||||
|
// The new annotation set. |
||||||
|
AnnotationSet annotation_set = 2; |
||||||
|
|
||||||
|
// An optional mask specifying which fields to update. Mutable fields are |
||||||
|
// [name][google.genomics.v1.AnnotationSet.name], |
||||||
|
// [source_uri][google.genomics.v1.AnnotationSet.source_uri], and |
||||||
|
// [info][google.genomics.v1.AnnotationSet.info]. If unspecified, all |
||||||
|
// mutable fields will be updated. |
||||||
|
google.protobuf.FieldMask update_mask = 3; |
||||||
|
} |
||||||
|
|
||||||
|
message DeleteAnnotationSetRequest { |
||||||
|
// The ID of the annotation set to be deleted. |
||||||
|
string annotation_set_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message SearchAnnotationSetsRequest { |
||||||
|
// Required. The dataset IDs to search within. Caller must have `READ` access |
||||||
|
// to these datasets. |
||||||
|
repeated string dataset_ids = 1; |
||||||
|
|
||||||
|
// If specified, only annotation sets associated with the given reference set |
||||||
|
// are returned. |
||||||
|
string reference_set_id = 2; |
||||||
|
|
||||||
|
// Only return annotations sets for which a substring of the name matches this |
||||||
|
// string (case insensitive). |
||||||
|
string name = 3; |
||||||
|
|
||||||
|
// If specified, only annotation sets that have any of these types are |
||||||
|
// returned. |
||||||
|
repeated AnnotationType types = 4; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// To get the next page of results, set this parameter to the value of |
||||||
|
// `nextPageToken` from the previous response. |
||||||
|
string page_token = 5; |
||||||
|
|
||||||
|
// The maximum number of results to return in a single page. If unspecified, |
||||||
|
// defaults to 128. The maximum value is 1024. |
||||||
|
int32 page_size = 6; |
||||||
|
} |
||||||
|
|
||||||
|
message SearchAnnotationSetsResponse { |
||||||
|
// The matching annotation sets. |
||||||
|
repeated AnnotationSet annotation_sets = 1; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// Provide this value in a subsequent request to return the next page of |
||||||
|
// results. This field will be empty if there aren't any additional results. |
||||||
|
string next_page_token = 2; |
||||||
|
} |
||||||
|
|
||||||
|
message CreateAnnotationRequest { |
||||||
|
// The annotation to be created. |
||||||
|
Annotation annotation = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message BatchCreateAnnotationsRequest { |
||||||
|
// The annotations to be created. At most 4096 can be specified in a single |
||||||
|
// request. |
||||||
|
repeated Annotation annotations = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message BatchCreateAnnotationsResponse { |
||||||
|
message Entry { |
||||||
|
// The creation status. |
||||||
|
google.rpc.Status status = 1; |
||||||
|
|
||||||
|
// The created annotation, if creation was successful. |
||||||
|
Annotation annotation = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// The resulting per-annotation entries, ordered consistently with the |
||||||
|
// original request. |
||||||
|
repeated Entry entries = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message GetAnnotationRequest { |
||||||
|
// The ID of the annotation to be retrieved. |
||||||
|
string annotation_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message UpdateAnnotationRequest { |
||||||
|
// The ID of the annotation to be updated. |
||||||
|
string annotation_id = 1; |
||||||
|
|
||||||
|
// The new annotation. |
||||||
|
Annotation annotation = 2; |
||||||
|
|
||||||
|
// An optional mask specifying which fields to update. Mutable fields are |
||||||
|
// [name][google.genomics.v1.Annotation.name], |
||||||
|
// [variant][google.genomics.v1.Annotation.variant], |
||||||
|
// [transcript][google.genomics.v1.Annotation.transcript], and |
||||||
|
// [info][google.genomics.v1.Annotation.info]. If unspecified, all mutable |
||||||
|
// fields will be updated. |
||||||
|
google.protobuf.FieldMask update_mask = 3; |
||||||
|
} |
||||||
|
|
||||||
|
message DeleteAnnotationRequest { |
||||||
|
// The ID of the annotation to be deleted. |
||||||
|
string annotation_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message SearchAnnotationsRequest { |
||||||
|
// Required. The annotation sets to search within. The caller must have |
||||||
|
// `READ` access to these annotation sets. |
||||||
|
// All queried annotation sets must have the same type. |
||||||
|
repeated string annotation_set_ids = 1; |
||||||
|
|
||||||
|
// Required. `reference_id` or `reference_name` must be set. |
||||||
|
oneof reference { |
||||||
|
// The ID of the reference to query. |
||||||
|
string reference_id = 2; |
||||||
|
|
||||||
|
// The name of the reference to query, within the reference set associated |
||||||
|
// with this query. |
||||||
|
string reference_name = 3; |
||||||
|
} |
||||||
|
|
||||||
|
// The start position of the range on the reference, 0-based inclusive. If |
||||||
|
// specified, |
||||||
|
// [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or |
||||||
|
// [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name] |
||||||
|
// must be specified. Defaults to 0. |
||||||
|
int64 start = 4; |
||||||
|
|
||||||
|
// The end position of the range on the reference, 0-based exclusive. If |
||||||
|
// [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or |
||||||
|
// [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name] |
||||||
|
// must be specified, Defaults to the length of the reference. |
||||||
|
int64 end = 5; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// To get the next page of results, set this parameter to the value of |
||||||
|
// `nextPageToken` from the previous response. |
||||||
|
string page_token = 6; |
||||||
|
|
||||||
|
// The maximum number of results to return in a single page. If unspecified, |
||||||
|
// defaults to 256. The maximum value is 2048. |
||||||
|
int32 page_size = 7; |
||||||
|
} |
||||||
|
|
||||||
|
message SearchAnnotationsResponse { |
||||||
|
// The matching annotations. |
||||||
|
repeated Annotation annotations = 1; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// Provide this value in a subsequent request to return the next page of |
||||||
|
// results. This field will be empty if there aren't any additional results. |
||||||
|
string next_page_token = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// When an [Annotation][google.genomics.v1.Annotation] or |
||||||
|
// [AnnotationSet][google.genomics.v1.AnnotationSet] is created, if `type` is |
||||||
|
// not specified it will be set to `GENERIC`. |
||||||
|
enum AnnotationType { |
||||||
|
ANNOTATION_TYPE_UNSPECIFIED = 0; |
||||||
|
|
||||||
|
// A `GENERIC` annotation type should be used when no other annotation |
||||||
|
// type will suffice. This represents an untyped annotation of the reference |
||||||
|
// genome. |
||||||
|
GENERIC = 1; |
||||||
|
|
||||||
|
// A `VARIANT` annotation type. |
||||||
|
VARIANT = 2; |
||||||
|
|
||||||
|
// A `GENE` annotation type represents the existence of a gene at the |
||||||
|
// associated reference coordinates. The start coordinate is typically the |
||||||
|
// gene's transcription start site and the end is typically the end of the |
||||||
|
// gene's last exon. |
||||||
|
GENE = 3; |
||||||
|
|
||||||
|
// A `TRANSCRIPT` annotation type represents the assertion that a |
||||||
|
// particular region of the reference genome may be transcribed as RNA. |
||||||
|
TRANSCRIPT = 4; |
||||||
|
} |
@ -0,0 +1,98 @@ |
|||||||
|
// Copyright 2016 Google Inc. |
||||||
|
// |
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
// you may not use this file except in compliance with the License. |
||||||
|
// You may obtain a copy of the License at |
||||||
|
// |
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
// |
||||||
|
// Unless required by applicable law or agreed to in writing, software |
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
// See the License for the specific language governing permissions and |
||||||
|
// limitations under the License. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
|
||||||
|
package google.genomics.v1; |
||||||
|
|
||||||
|
import "google/api/annotations.proto"; |
||||||
|
|
||||||
|
option cc_enable_arenas = true; |
||||||
|
option java_multiple_files = true; |
||||||
|
option java_outer_classname = "CigarProto"; |
||||||
|
option java_package = "com.google.genomics.v1"; |
||||||
|
|
||||||
|
|
||||||
|
// A single CIGAR operation. |
||||||
|
message CigarUnit { |
||||||
|
// Describes the different types of CIGAR alignment operations that exist. |
||||||
|
// Used wherever CIGAR alignments are used. |
||||||
|
enum Operation { |
||||||
|
OPERATION_UNSPECIFIED = 0; |
||||||
|
|
||||||
|
// An alignment match indicates that a sequence can be aligned to the |
||||||
|
// reference without evidence of an INDEL. Unlike the |
||||||
|
// `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, |
||||||
|
// the `ALIGNMENT_MATCH` operator does not indicate whether the |
||||||
|
// reference and read sequences are an exact match. This operator is |
||||||
|
// equivalent to SAM's `M`. |
||||||
|
ALIGNMENT_MATCH = 1; |
||||||
|
|
||||||
|
// The insert operator indicates that the read contains evidence of bases |
||||||
|
// being inserted into the reference. This operator is equivalent to SAM's |
||||||
|
// `I`. |
||||||
|
INSERT = 2; |
||||||
|
|
||||||
|
// The delete operator indicates that the read contains evidence of bases |
||||||
|
// being deleted from the reference. This operator is equivalent to SAM's |
||||||
|
// `D`. |
||||||
|
DELETE = 3; |
||||||
|
|
||||||
|
// The skip operator indicates that this read skips a long segment of the |
||||||
|
// reference, but the bases have not been deleted. This operator is commonly |
||||||
|
// used when working with RNA-seq data, where reads may skip long segments |
||||||
|
// of the reference between exons. This operator is equivalent to SAM's |
||||||
|
// `N`. |
||||||
|
SKIP = 4; |
||||||
|
|
||||||
|
// The soft clip operator indicates that bases at the start/end of a read |
||||||
|
// have not been considered during alignment. This may occur if the majority |
||||||
|
// of a read maps, except for low quality bases at the start/end of a read. |
||||||
|
// This operator is equivalent to SAM's `S`. Bases that are soft |
||||||
|
// clipped will still be stored in the read. |
||||||
|
CLIP_SOFT = 5; |
||||||
|
|
||||||
|
// The hard clip operator indicates that bases at the start/end of a read |
||||||
|
// have been omitted from this alignment. This may occur if this linear |
||||||
|
// alignment is part of a chimeric alignment, or if the read has been |
||||||
|
// trimmed (for example, during error correction or to trim poly-A tails for |
||||||
|
// RNA-seq). This operator is equivalent to SAM's `H`. |
||||||
|
CLIP_HARD = 6; |
||||||
|
|
||||||
|
// The pad operator indicates that there is padding in an alignment. This |
||||||
|
// operator is equivalent to SAM's `P`. |
||||||
|
PAD = 7; |
||||||
|
|
||||||
|
// This operator indicates that this portion of the aligned sequence exactly |
||||||
|
// matches the reference. This operator is equivalent to SAM's `=`. |
||||||
|
SEQUENCE_MATCH = 8; |
||||||
|
|
||||||
|
// This operator indicates that this portion of the aligned sequence is an |
||||||
|
// alignment match to the reference, but a sequence mismatch. This can |
||||||
|
// indicate a SNP or a read error. This operator is equivalent to SAM's |
||||||
|
// `X`. |
||||||
|
SEQUENCE_MISMATCH = 9; |
||||||
|
} |
||||||
|
|
||||||
|
Operation operation = 1; |
||||||
|
|
||||||
|
// The number of genomic bases that the operation runs for. Required. |
||||||
|
int64 operation_length = 2; |
||||||
|
|
||||||
|
// `referenceSequence` is only used at mismatches |
||||||
|
// (`SEQUENCE_MISMATCH`) and deletions (`DELETE`). |
||||||
|
// Filling this field replaces SAM's MD tag. If the relevant information is |
||||||
|
// not available, this field is unset. |
||||||
|
string reference_sequence = 3; |
||||||
|
} |
@ -0,0 +1,211 @@ |
|||||||
|
// Copyright 2016 Google Inc. |
||||||
|
// |
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
// you may not use this file except in compliance with the License. |
||||||
|
// You may obtain a copy of the License at |
||||||
|
// |
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
// |
||||||
|
// Unless required by applicable law or agreed to in writing, software |
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
// See the License for the specific language governing permissions and |
||||||
|
// limitations under the License. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
|
||||||
|
package google.genomics.v1; |
||||||
|
|
||||||
|
import "google/api/annotations.proto"; |
||||||
|
import "google/iam/v1/iam_policy.proto"; |
||||||
|
import "google/iam/v1/policy.proto"; |
||||||
|
import "google/protobuf/empty.proto"; |
||||||
|
import "google/protobuf/field_mask.proto"; |
||||||
|
import "google/protobuf/timestamp.proto"; |
||||||
|
|
||||||
|
option cc_enable_arenas = true; |
||||||
|
option java_multiple_files = true; |
||||||
|
option java_outer_classname = "DatasetsProto"; |
||||||
|
option java_package = "com.google.genomics.v1"; |
||||||
|
|
||||||
|
|
||||||
|
// This service manages datasets, which are collections of genomic data. |
||||||
|
service DatasetServiceV1 { |
||||||
|
// Lists datasets within a project. |
||||||
|
// |
||||||
|
// For the definitions of datasets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc ListDatasets(ListDatasetsRequest) returns (ListDatasetsResponse) { |
||||||
|
option (google.api.http) = { get: "/v1/datasets" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Creates a new dataset. |
||||||
|
// |
||||||
|
// For the definitions of datasets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc CreateDataset(CreateDatasetRequest) returns (Dataset) { |
||||||
|
option (google.api.http) = { post: "/v1/datasets" body: "dataset" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Gets a dataset by ID. |
||||||
|
// |
||||||
|
// For the definitions of datasets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc GetDataset(GetDatasetRequest) returns (Dataset) { |
||||||
|
option (google.api.http) = { get: "/v1/datasets/{dataset_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Updates a dataset. |
||||||
|
// |
||||||
|
// For the definitions of datasets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// This method supports patch semantics. |
||||||
|
rpc UpdateDataset(UpdateDatasetRequest) returns (Dataset) { |
||||||
|
option (google.api.http) = { patch: "/v1/datasets/{dataset_id}" body: "dataset" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Deletes a dataset and all of its contents (all read group sets, |
||||||
|
// reference sets, variant sets, call sets, annotation sets, etc.) |
||||||
|
// This is reversible (up to one week after the deletion) via |
||||||
|
// the |
||||||
|
// [datasets.undelete][google.genomics.v1.DatasetServiceV1.UndeleteDataset] |
||||||
|
// operation. |
||||||
|
// |
||||||
|
// For the definitions of datasets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc DeleteDataset(DeleteDatasetRequest) returns (google.protobuf.Empty) { |
||||||
|
option (google.api.http) = { delete: "/v1/datasets/{dataset_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Undeletes a dataset by restoring a dataset which was deleted via this API. |
||||||
|
// |
||||||
|
// For the definitions of datasets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// This operation is only possible for a week after the deletion occurred. |
||||||
|
rpc UndeleteDataset(UndeleteDatasetRequest) returns (Dataset) { |
||||||
|
option (google.api.http) = { post: "/v1/datasets/{dataset_id}:undelete" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Sets the access control policy on the specified dataset. Replaces any |
||||||
|
// existing policy. |
||||||
|
// |
||||||
|
// For the definitions of datasets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// See <a href="/iam/docs/managing-policies#setting_a_policy">Setting a |
||||||
|
// Policy</a> for more information. |
||||||
|
rpc SetIamPolicy(google.iam.v1.SetIamPolicyRequest) returns (google.iam.v1.Policy) { |
||||||
|
option (google.api.http) = { post: "/v1/{resource=datasets/*}:setIamPolicy" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Gets the access control policy for the dataset. This is empty if the |
||||||
|
// policy or resource does not exist. |
||||||
|
// |
||||||
|
// See <a href="/iam/docs/managing-policies#getting_a_policy">Getting a |
||||||
|
// Policy</a> for more information. |
||||||
|
// |
||||||
|
// For the definitions of datasets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc GetIamPolicy(google.iam.v1.GetIamPolicyRequest) returns (google.iam.v1.Policy) { |
||||||
|
option (google.api.http) = { post: "/v1/{resource=datasets/*}:getIamPolicy" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Returns permissions that a caller has on the specified resource. |
||||||
|
// See <a href="/iam/docs/managing-policies#testing_permissions">Testing |
||||||
|
// Permissions</a> for more information. |
||||||
|
// |
||||||
|
// For the definitions of datasets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc TestIamPermissions(google.iam.v1.TestIamPermissionsRequest) returns (google.iam.v1.TestIamPermissionsResponse) { |
||||||
|
option (google.api.http) = { post: "/v1/{resource=datasets/*}:testIamPermissions" body: "*" }; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// A Dataset is a collection of genomic data. |
||||||
|
// |
||||||
|
// For more genomics resource definitions, see [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
message Dataset { |
||||||
|
// The server-generated dataset ID, unique across all datasets. |
||||||
|
string id = 1; |
||||||
|
|
||||||
|
// The Google Developers Console project ID that this dataset belongs to. |
||||||
|
string project_id = 2; |
||||||
|
|
||||||
|
// The dataset name. |
||||||
|
string name = 3; |
||||||
|
|
||||||
|
// The time this dataset was created, in seconds from the epoch. |
||||||
|
google.protobuf.Timestamp create_time = 4; |
||||||
|
} |
||||||
|
|
||||||
|
// The dataset list request. |
||||||
|
message ListDatasetsRequest { |
||||||
|
// Required. The project to list datasets for. |
||||||
|
string project_id = 1; |
||||||
|
|
||||||
|
// The maximum number of results to return in a single page. If unspecified, |
||||||
|
// defaults to 50. The maximum value is 1024. |
||||||
|
int32 page_size = 2; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// To get the next page of results, set this parameter to the value of |
||||||
|
// `nextPageToken` from the previous response. |
||||||
|
string page_token = 3; |
||||||
|
} |
||||||
|
|
||||||
|
// The dataset list response. |
||||||
|
message ListDatasetsResponse { |
||||||
|
// The list of matching Datasets. |
||||||
|
repeated Dataset datasets = 1; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// Provide this value in a subsequent request to return the next page of |
||||||
|
// results. This field will be empty if there aren't any additional results. |
||||||
|
string next_page_token = 2; |
||||||
|
} |
||||||
|
|
||||||
|
message CreateDatasetRequest { |
||||||
|
// The dataset to be created. Must contain projectId and name. |
||||||
|
Dataset dataset = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message UpdateDatasetRequest { |
||||||
|
// The ID of the dataset to be updated. |
||||||
|
string dataset_id = 1; |
||||||
|
|
||||||
|
// The new dataset data. |
||||||
|
Dataset dataset = 2; |
||||||
|
|
||||||
|
// An optional mask specifying which fields to update. At this time, the only |
||||||
|
// mutable field is [name][google.genomics.v1.Dataset.name]. The only |
||||||
|
// acceptable value is "name". If unspecified, all mutable fields will be |
||||||
|
// updated. |
||||||
|
google.protobuf.FieldMask update_mask = 3; |
||||||
|
} |
||||||
|
|
||||||
|
message DeleteDatasetRequest { |
||||||
|
// The ID of the dataset to be deleted. |
||||||
|
string dataset_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message UndeleteDatasetRequest { |
||||||
|
// The ID of the dataset to be undeleted. |
||||||
|
string dataset_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message GetDatasetRequest { |
||||||
|
// The ID of the dataset. |
||||||
|
string dataset_id = 1; |
||||||
|
} |
@ -0,0 +1,58 @@ |
|||||||
|
// Copyright 2016 Google Inc. |
||||||
|
// |
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
// you may not use this file except in compliance with the License. |
||||||
|
// You may obtain a copy of the License at |
||||||
|
// |
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
// |
||||||
|
// Unless required by applicable law or agreed to in writing, software |
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
// See the License for the specific language governing permissions and |
||||||
|
// limitations under the License. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
|
||||||
|
package google.genomics.v1; |
||||||
|
|
||||||
|
import "google/api/annotations.proto"; |
||||||
|
import "google/protobuf/any.proto"; |
||||||
|
import "google/protobuf/timestamp.proto"; |
||||||
|
|
||||||
|
option cc_enable_arenas = true; |
||||||
|
option java_multiple_files = true; |
||||||
|
option java_outer_classname = "OperationsProto"; |
||||||
|
option java_package = "com.google.genomics.v1"; |
||||||
|
|
||||||
|
|
||||||
|
// Metadata describing an [Operation][google.longrunning.Operation]. |
||||||
|
message OperationMetadata { |
||||||
|
// The Google Cloud Project in which the job is scoped. |
||||||
|
string project_id = 1; |
||||||
|
|
||||||
|
// The time at which the job was submitted to the Genomics service. |
||||||
|
google.protobuf.Timestamp create_time = 2; |
||||||
|
|
||||||
|
// The time at which the job stopped running. |
||||||
|
google.protobuf.Timestamp end_time = 4; |
||||||
|
|
||||||
|
// The original request that started the operation. Note that this will be in |
||||||
|
// current version of the API. If the operation was started with v1beta2 API |
||||||
|
// and a GetOperation is performed on v1 API, a v1 request will be returned. |
||||||
|
google.protobuf.Any request = 5; |
||||||
|
|
||||||
|
// Optional event messages that were generated during the job's execution. |
||||||
|
// This also contains any warnings that were generated during import |
||||||
|
// or export. |
||||||
|
repeated OperationEvent events = 6; |
||||||
|
|
||||||
|
// Runtime metadata on this Operation. |
||||||
|
google.protobuf.Any runtime_metadata = 8; |
||||||
|
} |
||||||
|
|
||||||
|
// An event that occurred during an [Operation][google.longrunning.Operation]. |
||||||
|
message OperationEvent { |
||||||
|
// Required description of event. |
||||||
|
string description = 3; |
||||||
|
} |
@ -0,0 +1,41 @@ |
|||||||
|
// Copyright 2016 Google Inc. |
||||||
|
// |
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
// you may not use this file except in compliance with the License. |
||||||
|
// You may obtain a copy of the License at |
||||||
|
// |
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
// |
||||||
|
// Unless required by applicable law or agreed to in writing, software |
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
// See the License for the specific language governing permissions and |
||||||
|
// limitations under the License. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
|
||||||
|
package google.genomics.v1; |
||||||
|
|
||||||
|
import "google/api/annotations.proto"; |
||||||
|
|
||||||
|
option cc_enable_arenas = true; |
||||||
|
option java_multiple_files = true; |
||||||
|
option java_outer_classname = "PositionProto"; |
||||||
|
option java_package = "com.google.genomics.v1"; |
||||||
|
|
||||||
|
|
||||||
|
// An abstraction for referring to a genomic position, in relation to some |
||||||
|
// already known reference. For now, represents a genomic position as a |
||||||
|
// reference name, a base number on that reference (0-based), and a |
||||||
|
// determination of forward or reverse strand. |
||||||
|
message Position { |
||||||
|
// The name of the reference in whatever reference set is being used. |
||||||
|
string reference_name = 1; |
||||||
|
|
||||||
|
// The 0-based offset from the start of the forward strand for that reference. |
||||||
|
int64 position = 2; |
||||||
|
|
||||||
|
// Whether this position is on the reverse strand, as opposed to the forward |
||||||
|
// strand. |
||||||
|
bool reverse_strand = 3; |
||||||
|
} |
@ -0,0 +1,38 @@ |
|||||||
|
// Copyright 2016 Google Inc. |
||||||
|
// |
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
// you may not use this file except in compliance with the License. |
||||||
|
// You may obtain a copy of the License at |
||||||
|
// |
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
// |
||||||
|
// Unless required by applicable law or agreed to in writing, software |
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
// See the License for the specific language governing permissions and |
||||||
|
// limitations under the License. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
|
||||||
|
package google.genomics.v1; |
||||||
|
|
||||||
|
import "google/api/annotations.proto"; |
||||||
|
|
||||||
|
option cc_enable_arenas = true; |
||||||
|
option java_multiple_files = true; |
||||||
|
option java_outer_classname = "RangeProto"; |
||||||
|
option java_package = "com.google.genomics.v1"; |
||||||
|
|
||||||
|
|
||||||
|
// A 0-based half-open genomic coordinate range for search requests. |
||||||
|
message Range { |
||||||
|
// The reference sequence name, for example `chr1`, |
||||||
|
// `1`, or `chrX`. |
||||||
|
string reference_name = 1; |
||||||
|
|
||||||
|
// The start position of the range on the reference, 0-based inclusive. |
||||||
|
int64 start = 2; |
||||||
|
|
||||||
|
// The end position of the range on the reference, 0-based exclusive. |
||||||
|
int64 end = 3; |
||||||
|
} |
@ -0,0 +1,220 @@ |
|||||||
|
// Copyright 2016 Google Inc. |
||||||
|
// |
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
// you may not use this file except in compliance with the License. |
||||||
|
// You may obtain a copy of the License at |
||||||
|
// |
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
// |
||||||
|
// Unless required by applicable law or agreed to in writing, software |
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
// See the License for the specific language governing permissions and |
||||||
|
// limitations under the License. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
|
||||||
|
package google.genomics.v1; |
||||||
|
|
||||||
|
import "google/api/annotations.proto"; |
||||||
|
import "google/genomics/v1/cigar.proto"; |
||||||
|
import "google/genomics/v1/position.proto"; |
||||||
|
import "google/protobuf/struct.proto"; |
||||||
|
|
||||||
|
option cc_enable_arenas = true; |
||||||
|
option java_multiple_files = true; |
||||||
|
option java_outer_classname = "ReadAlignmentProto"; |
||||||
|
option java_package = "com.google.genomics.v1"; |
||||||
|
|
||||||
|
|
||||||
|
// A linear alignment can be represented by one CIGAR string. Describes the |
||||||
|
// mapped position and local alignment of the read to the reference. |
||||||
|
message LinearAlignment { |
||||||
|
// The position of this alignment. |
||||||
|
Position position = 1; |
||||||
|
|
||||||
|
// The mapping quality of this alignment. Represents how likely |
||||||
|
// the read maps to this position as opposed to other locations. |
||||||
|
// |
||||||
|
// Specifically, this is -10 log10 Pr(mapping position is wrong), rounded to |
||||||
|
// the nearest integer. |
||||||
|
int32 mapping_quality = 2; |
||||||
|
|
||||||
|
// Represents the local alignment of this sequence (alignment matches, indels, |
||||||
|
// etc) against the reference. |
||||||
|
repeated CigarUnit cigar = 3; |
||||||
|
} |
||||||
|
|
||||||
|
// A read alignment describes a linear alignment of a string of DNA to a |
||||||
|
// [reference sequence][google.genomics.v1.Reference], in addition to metadata |
||||||
|
// about the fragment (the molecule of DNA sequenced) and the read (the bases |
||||||
|
// which were read by the sequencer). A read is equivalent to a line in a SAM |
||||||
|
// file. A read belongs to exactly one read group and exactly one |
||||||
|
// [read group set][google.genomics.v1.ReadGroupSet]. |
||||||
|
// |
||||||
|
// For more genomics resource definitions, see [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// ### Reverse-stranded reads |
||||||
|
// |
||||||
|
// Mapped reads (reads having a non-null `alignment`) can be aligned to either |
||||||
|
// the forward or the reverse strand of their associated reference. Strandedness |
||||||
|
// of a mapped read is encoded by `alignment.position.reverseStrand`. |
||||||
|
// |
||||||
|
// If we consider the reference to be a forward-stranded coordinate space of |
||||||
|
// `[0, reference.length)` with `0` as the left-most position and |
||||||
|
// `reference.length` as the right-most position, reads are always aligned left |
||||||
|
// to right. That is, `alignment.position.position` always refers to the |
||||||
|
// left-most reference coordinate and `alignment.cigar` describes the alignment |
||||||
|
// of this read to the reference from left to right. All per-base fields such as |
||||||
|
// `alignedSequence` and `alignedQuality` share this same left-to-right |
||||||
|
// orientation; this is true of reads which are aligned to either strand. For |
||||||
|
// reverse-stranded reads, this means that `alignedSequence` is the reverse |
||||||
|
// complement of the bases that were originally reported by the sequencing |
||||||
|
// machine. |
||||||
|
// |
||||||
|
// ### Generating a reference-aligned sequence string |
||||||
|
// |
||||||
|
// When interacting with mapped reads, it's often useful to produce a string |
||||||
|
// representing the local alignment of the read to reference. The following |
||||||
|
// pseudocode demonstrates one way of doing this: |
||||||
|
// |
||||||
|
// out = "" |
||||||
|
// offset = 0 |
||||||
|
// for c in read.alignment.cigar { |
||||||
|
// switch c.operation { |
||||||
|
// case "ALIGNMENT_MATCH", "SEQUENCE_MATCH", "SEQUENCE_MISMATCH": |
||||||
|
// out += read.alignedSequence[offset:offset+c.operationLength] |
||||||
|
// offset += c.operationLength |
||||||
|
// break |
||||||
|
// case "CLIP_SOFT", "INSERT": |
||||||
|
// offset += c.operationLength |
||||||
|
// break |
||||||
|
// case "PAD": |
||||||
|
// out += repeat("*", c.operationLength) |
||||||
|
// break |
||||||
|
// case "DELETE": |
||||||
|
// out += repeat("-", c.operationLength) |
||||||
|
// break |
||||||
|
// case "SKIP": |
||||||
|
// out += repeat(" ", c.operationLength) |
||||||
|
// break |
||||||
|
// case "CLIP_HARD": |
||||||
|
// break |
||||||
|
// } |
||||||
|
// } |
||||||
|
// return out |
||||||
|
// |
||||||
|
// ### Converting to SAM's CIGAR string |
||||||
|
// |
||||||
|
// The following pseudocode generates a SAM CIGAR string from the |
||||||
|
// `cigar` field. Note that this is a lossy conversion |
||||||
|
// (`cigar.referenceSequence` is lost). |
||||||
|
// |
||||||
|
// cigarMap = { |
||||||
|
// "ALIGNMENT_MATCH": "M", |
||||||
|
// "INSERT": "I", |
||||||
|
// "DELETE": "D", |
||||||
|
// "SKIP": "N", |
||||||
|
// "CLIP_SOFT": "S", |
||||||
|
// "CLIP_HARD": "H", |
||||||
|
// "PAD": "P", |
||||||
|
// "SEQUENCE_MATCH": "=", |
||||||
|
// "SEQUENCE_MISMATCH": "X", |
||||||
|
// } |
||||||
|
// cigarStr = "" |
||||||
|
// for c in read.alignment.cigar { |
||||||
|
// cigarStr += c.operationLength + cigarMap[c.operation] |
||||||
|
// } |
||||||
|
// return cigarStr |
||||||
|
message Read { |
||||||
|
// The server-generated read ID, unique across all reads. This is different |
||||||
|
// from the `fragmentName`. |
||||||
|
string id = 1; |
||||||
|
|
||||||
|
// The ID of the read group this read belongs to. A read belongs to exactly |
||||||
|
// one read group. This is a server-generated ID which is distinct from SAM's |
||||||
|
// RG tag (for that value, see |
||||||
|
// [ReadGroup.name][google.genomics.v1.ReadGroup.name]). |
||||||
|
string read_group_id = 2; |
||||||
|
|
||||||
|
// The ID of the read group set this read belongs to. A read belongs to |
||||||
|
// exactly one read group set. |
||||||
|
string read_group_set_id = 3; |
||||||
|
|
||||||
|
// The fragment name. Equivalent to QNAME (query template name) in SAM. |
||||||
|
string fragment_name = 4; |
||||||
|
|
||||||
|
// The orientation and the distance between reads from the fragment are |
||||||
|
// consistent with the sequencing protocol (SAM flag 0x2). |
||||||
|
bool proper_placement = 5; |
||||||
|
|
||||||
|
// The fragment is a PCR or optical duplicate (SAM flag 0x400). |
||||||
|
bool duplicate_fragment = 6; |
||||||
|
|
||||||
|
// The observed length of the fragment, equivalent to TLEN in SAM. |
||||||
|
int32 fragment_length = 7; |
||||||
|
|
||||||
|
// The read number in sequencing. 0-based and less than numberReads. This |
||||||
|
// field replaces SAM flag 0x40 and 0x80. |
||||||
|
int32 read_number = 8; |
||||||
|
|
||||||
|
// The number of reads in the fragment (extension to SAM flag 0x1). |
||||||
|
int32 number_reads = 9; |
||||||
|
|
||||||
|
// Whether this read did not pass filters, such as platform or vendor quality |
||||||
|
// controls (SAM flag 0x200). |
||||||
|
bool failed_vendor_quality_checks = 10; |
||||||
|
|
||||||
|
// The linear alignment for this alignment record. This field is null for |
||||||
|
// unmapped reads. |
||||||
|
LinearAlignment alignment = 11; |
||||||
|
|
||||||
|
// Whether this alignment is secondary. Equivalent to SAM flag 0x100. |
||||||
|
// A secondary alignment represents an alternative to the primary alignment |
||||||
|
// for this read. Aligners may return secondary alignments if a read can map |
||||||
|
// ambiguously to multiple coordinates in the genome. By convention, each read |
||||||
|
// has one and only one alignment where both `secondaryAlignment` |
||||||
|
// and `supplementaryAlignment` are false. |
||||||
|
bool secondary_alignment = 12; |
||||||
|
|
||||||
|
// Whether this alignment is supplementary. Equivalent to SAM flag 0x800. |
||||||
|
// Supplementary alignments are used in the representation of a chimeric |
||||||
|
// alignment. In a chimeric alignment, a read is split into multiple |
||||||
|
// linear alignments that map to different reference contigs. The first |
||||||
|
// linear alignment in the read will be designated as the representative |
||||||
|
// alignment; the remaining linear alignments will be designated as |
||||||
|
// supplementary alignments. These alignments may have different mapping |
||||||
|
// quality scores. In each linear alignment in a chimeric alignment, the read |
||||||
|
// will be hard clipped. The `alignedSequence` and |
||||||
|
// `alignedQuality` fields in the alignment record will only |
||||||
|
// represent the bases for its respective linear alignment. |
||||||
|
bool supplementary_alignment = 13; |
||||||
|
|
||||||
|
// The bases of the read sequence contained in this alignment record, |
||||||
|
// **without CIGAR operations applied** (equivalent to SEQ in SAM). |
||||||
|
// `alignedSequence` and `alignedQuality` may be |
||||||
|
// shorter than the full read sequence and quality. This will occur if the |
||||||
|
// alignment is part of a chimeric alignment, or if the read was trimmed. When |
||||||
|
// this occurs, the CIGAR for this read will begin/end with a hard clip |
||||||
|
// operator that will indicate the length of the excised sequence. |
||||||
|
string aligned_sequence = 14; |
||||||
|
|
||||||
|
// The quality of the read sequence contained in this alignment record |
||||||
|
// (equivalent to QUAL in SAM). |
||||||
|
// `alignedSequence` and `alignedQuality` may be shorter than the full read |
||||||
|
// sequence and quality. This will occur if the alignment is part of a |
||||||
|
// chimeric alignment, or if the read was trimmed. When this occurs, the CIGAR |
||||||
|
// for this read will begin/end with a hard clip operator that will indicate |
||||||
|
// the length of the excised sequence. |
||||||
|
repeated int32 aligned_quality = 15; |
||||||
|
|
||||||
|
// The mapping of the primary alignment of the |
||||||
|
// `(readNumber+1)%numberReads` read in the fragment. It replaces |
||||||
|
// mate position and mate strand in SAM. |
||||||
|
Position next_mate_position = 16; |
||||||
|
|
||||||
|
// A map of additional read alignment information. This must be of the form |
||||||
|
// map<string, string[]> (string key mapping to a list of string values). |
||||||
|
map<string, google.protobuf.ListValue> info = 17; |
||||||
|
} |
@ -0,0 +1,105 @@ |
|||||||
|
// Copyright 2016 Google Inc. |
||||||
|
// |
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
// you may not use this file except in compliance with the License. |
||||||
|
// You may obtain a copy of the License at |
||||||
|
// |
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
// |
||||||
|
// Unless required by applicable law or agreed to in writing, software |
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
// See the License for the specific language governing permissions and |
||||||
|
// limitations under the License. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
|
||||||
|
package google.genomics.v1; |
||||||
|
|
||||||
|
import "google/api/annotations.proto"; |
||||||
|
import "google/protobuf/struct.proto"; |
||||||
|
|
||||||
|
option cc_enable_arenas = true; |
||||||
|
option java_multiple_files = true; |
||||||
|
option java_outer_classname = "ReadGroupProto"; |
||||||
|
option java_package = "com.google.genomics.v1"; |
||||||
|
|
||||||
|
|
||||||
|
// A read group is all the data that's processed the same way by the sequencer. |
||||||
|
message ReadGroup { |
||||||
|
message Experiment { |
||||||
|
// A client-supplied library identifier; a library is a collection of DNA |
||||||
|
// fragments which have been prepared for sequencing from a sample. This |
||||||
|
// field is important for quality control as error or bias can be introduced |
||||||
|
// during sample preparation. |
||||||
|
string library_id = 1; |
||||||
|
|
||||||
|
// The platform unit used as part of this experiment, for example |
||||||
|
// flowcell-barcode.lane for Illumina or slide for SOLiD. Corresponds to the |
||||||
|
// @RG PU field in the SAM spec. |
||||||
|
string platform_unit = 2; |
||||||
|
|
||||||
|
// The sequencing center used as part of this experiment. |
||||||
|
string sequencing_center = 3; |
||||||
|
|
||||||
|
// The instrument model used as part of this experiment. This maps to |
||||||
|
// sequencing technology in the SAM spec. |
||||||
|
string instrument_model = 4; |
||||||
|
} |
||||||
|
|
||||||
|
message Program { |
||||||
|
// The command line used to run this program. |
||||||
|
string command_line = 1; |
||||||
|
|
||||||
|
// The user specified locally unique ID of the program. Used along with |
||||||
|
// `prevProgramId` to define an ordering between programs. |
||||||
|
string id = 2; |
||||||
|
|
||||||
|
// The display name of the program. This is typically the colloquial name of |
||||||
|
// the tool used, for example 'bwa' or 'picard'. |
||||||
|
string name = 3; |
||||||
|
|
||||||
|
// The ID of the program run before this one. |
||||||
|
string prev_program_id = 4; |
||||||
|
|
||||||
|
// The version of the program run. |
||||||
|
string version = 5; |
||||||
|
} |
||||||
|
|
||||||
|
// The server-generated read group ID, unique for all read groups. |
||||||
|
// Note: This is different than the @RG ID field in the SAM spec. For that |
||||||
|
// value, see [name][google.genomics.v1.ReadGroup.name]. |
||||||
|
string id = 1; |
||||||
|
|
||||||
|
// The dataset to which this read group belongs. |
||||||
|
string dataset_id = 2; |
||||||
|
|
||||||
|
// The read group name. This corresponds to the @RG ID field in the SAM spec. |
||||||
|
string name = 3; |
||||||
|
|
||||||
|
// A free-form text description of this read group. |
||||||
|
string description = 4; |
||||||
|
|
||||||
|
// A client-supplied sample identifier for the reads in this read group. |
||||||
|
string sample_id = 5; |
||||||
|
|
||||||
|
// The experiment used to generate this read group. |
||||||
|
Experiment experiment = 6; |
||||||
|
|
||||||
|
// The predicted insert size of this read group. The insert size is the length |
||||||
|
// the sequenced DNA fragment from end-to-end, not including the adapters. |
||||||
|
int32 predicted_insert_size = 7; |
||||||
|
|
||||||
|
// The programs used to generate this read group. Programs are always |
||||||
|
// identical for all read groups within a read group set. For this reason, |
||||||
|
// only the first read group in a returned set will have this field |
||||||
|
// populated. |
||||||
|
repeated Program programs = 10; |
||||||
|
|
||||||
|
// The reference set the reads in this read group are aligned to. |
||||||
|
string reference_set_id = 11; |
||||||
|
|
||||||
|
// A map of additional read group information. This must be of the form |
||||||
|
// map<string, string[]> (string key mapping to a list of string values). |
||||||
|
map<string, google.protobuf.ListValue> info = 12; |
||||||
|
} |
@ -0,0 +1,63 @@ |
|||||||
|
// Copyright 2016 Google Inc. |
||||||
|
// |
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
// you may not use this file except in compliance with the License. |
||||||
|
// You may obtain a copy of the License at |
||||||
|
// |
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
// |
||||||
|
// Unless required by applicable law or agreed to in writing, software |
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
// See the License for the specific language governing permissions and |
||||||
|
// limitations under the License. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
|
||||||
|
package google.genomics.v1; |
||||||
|
|
||||||
|
import "google/api/annotations.proto"; |
||||||
|
import "google/genomics/v1/readgroup.proto"; |
||||||
|
import "google/protobuf/struct.proto"; |
||||||
|
|
||||||
|
option cc_enable_arenas = true; |
||||||
|
option java_multiple_files = true; |
||||||
|
option java_outer_classname = "ReadGroupSetProto"; |
||||||
|
option java_package = "com.google.genomics.v1"; |
||||||
|
|
||||||
|
|
||||||
|
// A read group set is a logical collection of read groups, which are |
||||||
|
// collections of reads produced by a sequencer. A read group set typically |
||||||
|
// models reads corresponding to one sample, sequenced one way, and aligned one |
||||||
|
// way. |
||||||
|
// |
||||||
|
// * A read group set belongs to one dataset. |
||||||
|
// * A read group belongs to one read group set. |
||||||
|
// * A read belongs to one read group. |
||||||
|
// |
||||||
|
// For more genomics resource definitions, see [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
message ReadGroupSet { |
||||||
|
// The server-generated read group set ID, unique for all read group sets. |
||||||
|
string id = 1; |
||||||
|
|
||||||
|
// The dataset to which this read group set belongs. |
||||||
|
string dataset_id = 2; |
||||||
|
|
||||||
|
// The reference set to which the reads in this read group set are aligned. |
||||||
|
string reference_set_id = 3; |
||||||
|
|
||||||
|
// The read group set name. By default this will be initialized to the sample |
||||||
|
// name of the sequenced data contained in this set. |
||||||
|
string name = 4; |
||||||
|
|
||||||
|
// The filename of the original source file for this read group set, if any. |
||||||
|
string filename = 5; |
||||||
|
|
||||||
|
// The read groups in this set. There are typically 1-10 read groups in a read |
||||||
|
// group set. |
||||||
|
repeated ReadGroup read_groups = 6; |
||||||
|
|
||||||
|
// A map of additional read group set information. |
||||||
|
map<string, google.protobuf.ListValue> info = 7; |
||||||
|
} |
@ -0,0 +1,461 @@ |
|||||||
|
// Copyright 2016 Google Inc. |
||||||
|
// |
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
// you may not use this file except in compliance with the License. |
||||||
|
// You may obtain a copy of the License at |
||||||
|
// |
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
// |
||||||
|
// Unless required by applicable law or agreed to in writing, software |
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
// See the License for the specific language governing permissions and |
||||||
|
// limitations under the License. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
|
||||||
|
package google.genomics.v1; |
||||||
|
|
||||||
|
import "google/api/annotations.proto"; |
||||||
|
import "google/genomics/v1/range.proto"; |
||||||
|
import "google/genomics/v1/readalignment.proto"; |
||||||
|
import "google/genomics/v1/readgroupset.proto"; |
||||||
|
import "google/longrunning/operations.proto"; |
||||||
|
import "google/protobuf/empty.proto"; |
||||||
|
import "google/protobuf/field_mask.proto"; |
||||||
|
|
||||||
|
option cc_enable_arenas = true; |
||||||
|
option java_multiple_files = true; |
||||||
|
option java_outer_classname = "ReadsProto"; |
||||||
|
option java_package = "com.google.genomics.v1"; |
||||||
|
|
||||||
|
|
||||||
|
service StreamingReadService { |
||||||
|
// Returns a stream of all the reads matching the search request, ordered |
||||||
|
// by reference name, position, and ID. |
||||||
|
rpc StreamReads(StreamReadsRequest) returns (stream StreamReadsResponse) { |
||||||
|
option (google.api.http) = { post: "/v1/reads:stream" body: "*" }; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// The Readstore. A data store for DNA sequencing Reads. |
||||||
|
// |
||||||
|
service ReadServiceV1 { |
||||||
|
// Creates read group sets by asynchronously importing the provided |
||||||
|
// information. |
||||||
|
// |
||||||
|
// For the definitions of read group sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// The caller must have WRITE permissions to the dataset. |
||||||
|
// |
||||||
|
// ## Notes on [BAM](https://samtools.github.io/hts-specs/SAMv1.pdf) import |
||||||
|
// |
||||||
|
// - Tags will be converted to strings - tag types are not preserved |
||||||
|
// - Comments (`@CO`) in the input file header will not be preserved |
||||||
|
// - Original header order of references (`@SQ`) will not be preserved |
||||||
|
// - Any reverse stranded unmapped reads will be reverse complemented, and |
||||||
|
// their qualities (also the "BQ" and "OQ" tags, if any) will be reversed |
||||||
|
// - Unmapped reads will be stripped of positional information (reference name |
||||||
|
// and position) |
||||||
|
rpc ImportReadGroupSets(ImportReadGroupSetsRequest) returns (google.longrunning.Operation) { |
||||||
|
option (google.api.http) = { post: "/v1/readgroupsets:import" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Exports a read group set to a BAM file in Google Cloud Storage. |
||||||
|
// |
||||||
|
// For the definitions of read group sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// Note that currently there may be some differences between exported BAM |
||||||
|
// files and the original BAM file at the time of import. See |
||||||
|
// [ImportReadGroupSets](google.genomics.v1.ReadServiceV1.ImportReadGroupSets) |
||||||
|
// for caveats. |
||||||
|
rpc ExportReadGroupSet(ExportReadGroupSetRequest) returns (google.longrunning.Operation) { |
||||||
|
option (google.api.http) = { post: "/v1/readgroupsets/{read_group_set_id}:export" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Searches for read group sets matching the criteria. |
||||||
|
// |
||||||
|
// For the definitions of read group sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// Implements |
||||||
|
// [GlobalAllianceApi.searchReadGroupSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L135). |
||||||
|
rpc SearchReadGroupSets(SearchReadGroupSetsRequest) returns (SearchReadGroupSetsResponse) { |
||||||
|
option (google.api.http) = { post: "/v1/readgroupsets/search" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Updates a read group set. |
||||||
|
// |
||||||
|
// For the definitions of read group sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// This method supports patch semantics. |
||||||
|
rpc UpdateReadGroupSet(UpdateReadGroupSetRequest) returns (ReadGroupSet) { |
||||||
|
option (google.api.http) = { patch: "/v1/readgroupsets/{read_group_set_id}" body: "read_group_set" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Deletes a read group set. |
||||||
|
// |
||||||
|
// For the definitions of read group sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc DeleteReadGroupSet(DeleteReadGroupSetRequest) returns (google.protobuf.Empty) { |
||||||
|
option (google.api.http) = { delete: "/v1/readgroupsets/{read_group_set_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Gets a read group set by ID. |
||||||
|
// |
||||||
|
// For the definitions of read group sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc GetReadGroupSet(GetReadGroupSetRequest) returns (ReadGroupSet) { |
||||||
|
option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Lists fixed width coverage buckets for a read group set, each of which |
||||||
|
// correspond to a range of a reference sequence. Each bucket summarizes |
||||||
|
// coverage information across its corresponding genomic range. |
||||||
|
// |
||||||
|
// For the definitions of read group sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// Coverage is defined as the number of reads which are aligned to a given |
||||||
|
// base in the reference sequence. Coverage buckets are available at several |
||||||
|
// precomputed bucket widths, enabling retrieval of various coverage 'zoom |
||||||
|
// levels'. The caller must have READ permissions for the target read group |
||||||
|
// set. |
||||||
|
rpc ListCoverageBuckets(ListCoverageBucketsRequest) returns (ListCoverageBucketsResponse) { |
||||||
|
option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}/coveragebuckets" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Gets a list of reads for one or more read group sets. |
||||||
|
// |
||||||
|
// For the definitions of read group sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// Reads search operates over a genomic coordinate space of reference sequence |
||||||
|
// & position defined over the reference sequences to which the requested |
||||||
|
// read group sets are aligned. |
||||||
|
// |
||||||
|
// If a target positional range is specified, search returns all reads whose |
||||||
|
// alignment to the reference genome overlap the range. A query which |
||||||
|
// specifies only read group set IDs yields all reads in those read group |
||||||
|
// sets, including unmapped reads. |
||||||
|
// |
||||||
|
// All reads returned (including reads on subsequent pages) are ordered by |
||||||
|
// genomic coordinate (by reference sequence, then position). Reads with |
||||||
|
// equivalent genomic coordinates are returned in an unspecified order. This |
||||||
|
// order is consistent, such that two queries for the same content (regardless |
||||||
|
// of page size) yield reads in the same order across their respective streams |
||||||
|
// of paginated responses. |
||||||
|
// |
||||||
|
// Implements |
||||||
|
// [GlobalAllianceApi.searchReads](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L85). |
||||||
|
rpc SearchReads(SearchReadsRequest) returns (SearchReadsResponse) { |
||||||
|
option (google.api.http) = { post: "/v1/reads/search" body: "*" }; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// The read group set search request. |
||||||
|
message SearchReadGroupSetsRequest { |
||||||
|
// Restricts this query to read group sets within the given datasets. At least |
||||||
|
// one ID must be provided. |
||||||
|
repeated string dataset_ids = 1; |
||||||
|
|
||||||
|
// Only return read group sets for which a substring of the name matches this |
||||||
|
// string. |
||||||
|
string name = 3; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// To get the next page of results, set this parameter to the value of |
||||||
|
// `nextPageToken` from the previous response. |
||||||
|
string page_token = 2; |
||||||
|
|
||||||
|
// The maximum number of results to return in a single page. If unspecified, |
||||||
|
// defaults to 256. The maximum value is 1024. |
||||||
|
int32 page_size = 4; |
||||||
|
} |
||||||
|
|
||||||
|
// The read group set search response. |
||||||
|
message SearchReadGroupSetsResponse { |
||||||
|
// The list of matching read group sets. |
||||||
|
repeated ReadGroupSet read_group_sets = 1; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// Provide this value in a subsequent request to return the next page of |
||||||
|
// results. This field will be empty if there aren't any additional results. |
||||||
|
string next_page_token = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// The read group set import request. |
||||||
|
message ImportReadGroupSetsRequest { |
||||||
|
enum PartitionStrategy { |
||||||
|
PARTITION_STRATEGY_UNSPECIFIED = 0; |
||||||
|
|
||||||
|
// In most cases, this strategy yields one read group set per file. This is |
||||||
|
// the default behavior. |
||||||
|
// |
||||||
|
// Allocate one read group set per file per sample. For BAM files, read |
||||||
|
// groups are considered to share a sample if they have identical sample |
||||||
|
// names. Furthermore, all reads for each file which do not belong to a read |
||||||
|
// group, if any, will be grouped into a single read group set per-file. |
||||||
|
PER_FILE_PER_SAMPLE = 1; |
||||||
|
|
||||||
|
// Includes all read groups in all imported files into a single read group |
||||||
|
// set. Requires that the headers for all imported files are equivalent. All |
||||||
|
// reads which do not belong to a read group, if any, will be grouped into a |
||||||
|
// separate read group set. |
||||||
|
MERGE_ALL = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// Required. The ID of the dataset these read group sets will belong to. The |
||||||
|
// caller must have WRITE permissions to this dataset. |
||||||
|
string dataset_id = 1; |
||||||
|
|
||||||
|
// The reference set to which the imported read group sets are aligned to, if |
||||||
|
// any. The reference names of this reference set must be a superset of those |
||||||
|
// found in the imported file headers. If no reference set id is provided, a |
||||||
|
// best effort is made to associate with a matching reference set. |
||||||
|
string reference_set_id = 4; |
||||||
|
|
||||||
|
// A list of URIs pointing at [BAM |
||||||
|
// files](https://samtools.github.io/hts-specs/SAMv1.pdf) |
||||||
|
// in Google Cloud Storage. |
||||||
|
repeated string source_uris = 2; |
||||||
|
|
||||||
|
// The partition strategy describes how read groups are partitioned into read |
||||||
|
// group sets. |
||||||
|
PartitionStrategy partition_strategy = 5; |
||||||
|
} |
||||||
|
|
||||||
|
// The read group set import response. |
||||||
|
message ImportReadGroupSetsResponse { |
||||||
|
// IDs of the read group sets that were created. |
||||||
|
repeated string read_group_set_ids = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// The read group set export request. |
||||||
|
message ExportReadGroupSetRequest { |
||||||
|
// Required. The Google Developers Console project ID that owns this |
||||||
|
// export. The caller must have WRITE access to this project. |
||||||
|
string project_id = 1; |
||||||
|
|
||||||
|
// Required. A Google Cloud Storage URI for the exported BAM file. |
||||||
|
// The currently authenticated user must have write access to the new file. |
||||||
|
// An error will be returned if the URI already contains data. |
||||||
|
string export_uri = 2; |
||||||
|
|
||||||
|
// Required. The ID of the read group set to export. The caller must have |
||||||
|
// READ access to this read group set. |
||||||
|
string read_group_set_id = 3; |
||||||
|
|
||||||
|
// The reference names to export. If this is not specified, all reference |
||||||
|
// sequences, including unmapped reads, are exported. |
||||||
|
// Use `*` to export only unmapped reads. |
||||||
|
repeated string reference_names = 4; |
||||||
|
} |
||||||
|
|
||||||
|
message UpdateReadGroupSetRequest { |
||||||
|
// The ID of the read group set to be updated. The caller must have WRITE |
||||||
|
// permissions to the dataset associated with this read group set. |
||||||
|
string read_group_set_id = 1; |
||||||
|
|
||||||
|
// The new read group set data. See `updateMask` for details on mutability of |
||||||
|
// fields. |
||||||
|
ReadGroupSet read_group_set = 2; |
||||||
|
|
||||||
|
// An optional mask specifying which fields to update. Supported fields: |
||||||
|
// |
||||||
|
// * [name][google.genomics.v1.ReadGroupSet.name]. |
||||||
|
// * [referenceSetId][google.genomics.v1.ReadGroupSet.reference_set_id]. |
||||||
|
// |
||||||
|
// Leaving `updateMask` unset is equivalent to specifying all mutable |
||||||
|
// fields. |
||||||
|
google.protobuf.FieldMask update_mask = 3; |
||||||
|
} |
||||||
|
|
||||||
|
message DeleteReadGroupSetRequest { |
||||||
|
// The ID of the read group set to be deleted. The caller must have WRITE |
||||||
|
// permissions to the dataset associated with this read group set. |
||||||
|
string read_group_set_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message GetReadGroupSetRequest { |
||||||
|
// The ID of the read group set. |
||||||
|
string read_group_set_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message ListCoverageBucketsRequest { |
||||||
|
// Required. The ID of the read group set over which coverage is requested. |
||||||
|
string read_group_set_id = 1; |
||||||
|
|
||||||
|
// The name of the reference to query, within the reference set associated |
||||||
|
// with this query. Optional. |
||||||
|
string reference_name = 3; |
||||||
|
|
||||||
|
// The start position of the range on the reference, 0-based inclusive. If |
||||||
|
// specified, `referenceName` must also be specified. Defaults to 0. |
||||||
|
int64 start = 4; |
||||||
|
|
||||||
|
// The end position of the range on the reference, 0-based exclusive. If |
||||||
|
// specified, `referenceName` must also be specified. If unset or 0, defaults |
||||||
|
// to the length of the reference. |
||||||
|
int64 end = 5; |
||||||
|
|
||||||
|
// The desired width of each reported coverage bucket in base pairs. This |
||||||
|
// will be rounded down to the nearest precomputed bucket width; the value |
||||||
|
// of which is returned as `bucketWidth` in the response. Defaults |
||||||
|
// to infinity (each bucket spans an entire reference sequence) or the length |
||||||
|
// of the target range, if specified. The smallest precomputed |
||||||
|
// `bucketWidth` is currently 2048 base pairs; this is subject to |
||||||
|
// change. |
||||||
|
int64 target_bucket_width = 6; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// To get the next page of results, set this parameter to the value of |
||||||
|
// `nextPageToken` from the previous response. |
||||||
|
string page_token = 7; |
||||||
|
|
||||||
|
// The maximum number of results to return in a single page. If unspecified, |
||||||
|
// defaults to 1024. The maximum value is 2048. |
||||||
|
int32 page_size = 8; |
||||||
|
} |
||||||
|
|
||||||
|
// A bucket over which read coverage has been precomputed. A bucket corresponds |
||||||
|
// to a specific range of the reference sequence. |
||||||
|
message CoverageBucket { |
||||||
|
// The genomic coordinate range spanned by this bucket. |
||||||
|
Range range = 1; |
||||||
|
|
||||||
|
// The average number of reads which are aligned to each individual |
||||||
|
// reference base in this bucket. |
||||||
|
float mean_coverage = 2; |
||||||
|
} |
||||||
|
|
||||||
|
message ListCoverageBucketsResponse { |
||||||
|
// The length of each coverage bucket in base pairs. Note that buckets at the |
||||||
|
// end of a reference sequence may be shorter. This value is omitted if the |
||||||
|
// bucket width is infinity (the default behaviour, with no range or |
||||||
|
// `targetBucketWidth`). |
||||||
|
int64 bucket_width = 1; |
||||||
|
|
||||||
|
// The coverage buckets. The list of buckets is sparse; a bucket with 0 |
||||||
|
// overlapping reads is not returned. A bucket never crosses more than one |
||||||
|
// reference sequence. Each bucket has width `bucketWidth`, unless |
||||||
|
// its end is the end of the reference sequence. |
||||||
|
repeated CoverageBucket coverage_buckets = 2; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// Provide this value in a subsequent request to return the next page of |
||||||
|
// results. This field will be empty if there aren't any additional results. |
||||||
|
string next_page_token = 3; |
||||||
|
} |
||||||
|
|
||||||
|
// The read search request. |
||||||
|
message SearchReadsRequest { |
||||||
|
// The IDs of the read groups sets within which to search for reads. All |
||||||
|
// specified read group sets must be aligned against a common set of reference |
||||||
|
// sequences; this defines the genomic coordinates for the query. Must specify |
||||||
|
// one of `readGroupSetIds` or `readGroupIds`. |
||||||
|
repeated string read_group_set_ids = 1; |
||||||
|
|
||||||
|
// The IDs of the read groups within which to search for reads. All specified |
||||||
|
// read groups must belong to the same read group sets. Must specify one of |
||||||
|
// `readGroupSetIds` or `readGroupIds`. |
||||||
|
repeated string read_group_ids = 5; |
||||||
|
|
||||||
|
// The reference sequence name, for example `chr1`, `1`, or `chrX`. If set to |
||||||
|
// `*`, only unmapped reads are returned. If unspecified, all reads (mapped |
||||||
|
// and unmapped) are returned. |
||||||
|
string reference_name = 7; |
||||||
|
|
||||||
|
// The start position of the range on the reference, 0-based inclusive. If |
||||||
|
// specified, `referenceName` must also be specified. |
||||||
|
int64 start = 8; |
||||||
|
|
||||||
|
// The end position of the range on the reference, 0-based exclusive. If |
||||||
|
// specified, `referenceName` must also be specified. |
||||||
|
int64 end = 9; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// To get the next page of results, set this parameter to the value of |
||||||
|
// `nextPageToken` from the previous response. |
||||||
|
string page_token = 3; |
||||||
|
|
||||||
|
// The maximum number of results to return in a single page. If unspecified, |
||||||
|
// defaults to 256. The maximum value is 2048. |
||||||
|
int32 page_size = 4; |
||||||
|
} |
||||||
|
|
||||||
|
// The read search response. |
||||||
|
message SearchReadsResponse { |
||||||
|
// The list of matching alignments sorted by mapped genomic coordinate, |
||||||
|
// if any, ascending in position within the same reference. Unmapped reads, |
||||||
|
// which have no position, are returned contiguously and are sorted in |
||||||
|
// ascending lexicographic order by fragment name. |
||||||
|
repeated Read alignments = 1; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// Provide this value in a subsequent request to return the next page of |
||||||
|
// results. This field will be empty if there aren't any additional results. |
||||||
|
string next_page_token = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// The stream reads request. |
||||||
|
message StreamReadsRequest { |
||||||
|
// The Google Developers Console project ID or number which will be billed |
||||||
|
// for this access. The caller must have WRITE access to this project. |
||||||
|
// Required. |
||||||
|
string project_id = 1; |
||||||
|
|
||||||
|
// The ID of the read group set from which to stream reads. |
||||||
|
string read_group_set_id = 2; |
||||||
|
|
||||||
|
// The reference sequence name, for example `chr1`, |
||||||
|
// `1`, or `chrX`. If set to *, only unmapped reads are |
||||||
|
// returned. |
||||||
|
string reference_name = 3; |
||||||
|
|
||||||
|
// The start position of the range on the reference, 0-based inclusive. If |
||||||
|
// specified, `referenceName` must also be specified. |
||||||
|
int64 start = 4; |
||||||
|
|
||||||
|
// The end position of the range on the reference, 0-based exclusive. If |
||||||
|
// specified, `referenceName` must also be specified. |
||||||
|
int64 end = 5; |
||||||
|
|
||||||
|
// Restricts results to a shard containing approximately `1/totalShards` |
||||||
|
// of the normal response payload for this query. Results from a sharded |
||||||
|
// request are disjoint from those returned by all queries which differ only |
||||||
|
// in their shard parameter. A shard may yield 0 results; this is especially |
||||||
|
// likely for large values of `totalShards`. |
||||||
|
// |
||||||
|
// Valid values are `[0, totalShards)`. |
||||||
|
int32 shard = 6; |
||||||
|
|
||||||
|
// Specifying `totalShards` causes a disjoint subset of the normal response |
||||||
|
// payload to be returned for each query with a unique `shard` parameter |
||||||
|
// specified. A best effort is made to yield equally sized shards. Sharding |
||||||
|
// can be used to distribute processing amongst workers, where each worker is |
||||||
|
// assigned a unique `shard` number and all workers specify the same |
||||||
|
// `totalShards` number. The union of reads returned for all sharded queries |
||||||
|
// `[0, totalShards)` is equal to those returned by a single unsharded query. |
||||||
|
// |
||||||
|
// Queries for different values of `totalShards` with common divisors will |
||||||
|
// share shard boundaries. For example, streaming `shard` 2 of 5 |
||||||
|
// `totalShards` yields the same results as streaming `shard`s 4 and 5 of 10 |
||||||
|
// `totalShards`. This property can be leveraged for adaptive retries. |
||||||
|
int32 total_shards = 7; |
||||||
|
} |
||||||
|
|
||||||
|
message StreamReadsResponse { |
||||||
|
repeated Read alignments = 1; |
||||||
|
} |
@ -0,0 +1,281 @@ |
|||||||
|
// Copyright 2016 Google Inc. |
||||||
|
// |
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
// you may not use this file except in compliance with the License. |
||||||
|
// You may obtain a copy of the License at |
||||||
|
// |
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
// |
||||||
|
// Unless required by applicable law or agreed to in writing, software |
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
// See the License for the specific language governing permissions and |
||||||
|
// limitations under the License. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
|
||||||
|
package google.genomics.v1; |
||||||
|
|
||||||
|
import "google/api/annotations.proto"; |
||||||
|
|
||||||
|
option cc_enable_arenas = true; |
||||||
|
option java_multiple_files = true; |
||||||
|
option java_outer_classname = "ReferencesProto"; |
||||||
|
option java_package = "com.google.genomics.v1"; |
||||||
|
|
||||||
|
|
||||||
|
service ReferenceServiceV1 { |
||||||
|
// Searches for reference sets which match the given criteria. |
||||||
|
// |
||||||
|
// For the definitions of references and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// Implements |
||||||
|
// [GlobalAllianceApi.searchReferenceSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L71) |
||||||
|
rpc SearchReferenceSets(SearchReferenceSetsRequest) returns (SearchReferenceSetsResponse) { |
||||||
|
option (google.api.http) = { post: "/v1/referencesets/search" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Gets a reference set. |
||||||
|
// |
||||||
|
// For the definitions of references and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// Implements |
||||||
|
// [GlobalAllianceApi.getReferenceSet](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L83). |
||||||
|
rpc GetReferenceSet(GetReferenceSetRequest) returns (ReferenceSet) { |
||||||
|
option (google.api.http) = { get: "/v1/referencesets/{reference_set_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Searches for references which match the given criteria. |
||||||
|
// |
||||||
|
// For the definitions of references and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// Implements |
||||||
|
// [GlobalAllianceApi.searchReferences](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L146). |
||||||
|
rpc SearchReferences(SearchReferencesRequest) returns (SearchReferencesResponse) { |
||||||
|
option (google.api.http) = { post: "/v1/references/search" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Gets a reference. |
||||||
|
// |
||||||
|
// For the definitions of references and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// Implements |
||||||
|
// [GlobalAllianceApi.getReference](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L158). |
||||||
|
rpc GetReference(GetReferenceRequest) returns (Reference) { |
||||||
|
option (google.api.http) = { get: "/v1/references/{reference_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Lists the bases in a reference, optionally restricted to a range. |
||||||
|
// |
||||||
|
// For the definitions of references and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// Implements |
||||||
|
// [GlobalAllianceApi.getReferenceBases](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L221). |
||||||
|
rpc ListBases(ListBasesRequest) returns (ListBasesResponse) { |
||||||
|
option (google.api.http) = { get: "/v1/references/{reference_id}/bases" }; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// A reference is a canonical assembled DNA sequence, intended to act as a |
||||||
|
// reference coordinate space for other genomic annotations. A single reference |
||||||
|
// might represent the human chromosome 1 or mitochandrial DNA, for instance. A |
||||||
|
// reference belongs to one or more reference sets. |
||||||
|
// |
||||||
|
// For more genomics resource definitions, see [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
message Reference { |
||||||
|
// The server-generated reference ID, unique across all references. |
||||||
|
string id = 1; |
||||||
|
|
||||||
|
// The length of this reference's sequence. |
||||||
|
int64 length = 2; |
||||||
|
|
||||||
|
// MD5 of the upper-case sequence excluding all whitespace characters (this |
||||||
|
// is equivalent to SQ:M5 in SAM). This value is represented in lower case |
||||||
|
// hexadecimal format. |
||||||
|
string md5checksum = 3; |
||||||
|
|
||||||
|
// The name of this reference, for example `22`. |
||||||
|
string name = 4; |
||||||
|
|
||||||
|
// The URI from which the sequence was obtained. Typically specifies a FASTA |
||||||
|
// format file. |
||||||
|
string source_uri = 5; |
||||||
|
|
||||||
|
// All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally |
||||||
|
// with a version number, for example `GCF_000001405.26`. |
||||||
|
repeated string source_accessions = 6; |
||||||
|
|
||||||
|
// ID from http://www.ncbi.nlm.nih.gov/taxonomy. For example, 9606 for human. |
||||||
|
int32 ncbi_taxon_id = 7; |
||||||
|
} |
||||||
|
|
||||||
|
// A reference set is a set of references which typically comprise a reference |
||||||
|
// assembly for a species, such as `GRCh38` which is representative |
||||||
|
// of the human genome. A reference set defines a common coordinate space for |
||||||
|
// comparing reference-aligned experimental data. A reference set contains 1 or |
||||||
|
// more references. |
||||||
|
// |
||||||
|
// For more genomics resource definitions, see [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
message ReferenceSet { |
||||||
|
// The server-generated reference set ID, unique across all reference sets. |
||||||
|
string id = 1; |
||||||
|
|
||||||
|
// The IDs of the reference objects that are part of this set. |
||||||
|
// `Reference.md5checksum` must be unique within this set. |
||||||
|
repeated string reference_ids = 2; |
||||||
|
|
||||||
|
// Order-independent MD5 checksum which identifies this reference set. The |
||||||
|
// checksum is computed by sorting all lower case hexidecimal string |
||||||
|
// `reference.md5checksum` (for all reference in this set) in |
||||||
|
// ascending lexicographic order, concatenating, and taking the MD5 of that |
||||||
|
// value. The resulting value is represented in lower case hexadecimal format. |
||||||
|
string md5checksum = 3; |
||||||
|
|
||||||
|
// ID from http://www.ncbi.nlm.nih.gov/taxonomy (for example, 9606 for human) |
||||||
|
// indicating the species which this reference set is intended to model. Note |
||||||
|
// that contained references may specify a different `ncbiTaxonId`, as |
||||||
|
// assemblies may contain reference sequences which do not belong to the |
||||||
|
// modeled species, for example EBV in a human reference genome. |
||||||
|
int32 ncbi_taxon_id = 4; |
||||||
|
|
||||||
|
// Free text description of this reference set. |
||||||
|
string description = 5; |
||||||
|
|
||||||
|
// Public id of this reference set, such as `GRCh37`. |
||||||
|
string assembly_id = 6; |
||||||
|
|
||||||
|
// The URI from which the references were obtained. |
||||||
|
string source_uri = 7; |
||||||
|
|
||||||
|
// All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally |
||||||
|
// with a version number, for example `NC_000001.11`. |
||||||
|
repeated string source_accessions = 8; |
||||||
|
} |
||||||
|
|
||||||
|
message SearchReferenceSetsRequest { |
||||||
|
// If present, return reference sets for which the |
||||||
|
// [md5checksum][google.genomics.v1.ReferenceSet.md5checksum] matches exactly. |
||||||
|
repeated string md5checksums = 1; |
||||||
|
|
||||||
|
// If present, return reference sets for which a prefix of any of |
||||||
|
// [sourceAccessions][google.genomics.v1.ReferenceSet.source_accessions] |
||||||
|
// match any of these strings. Accession numbers typically have a main number |
||||||
|
// and a version, for example `NC_000001.11`. |
||||||
|
repeated string accessions = 2; |
||||||
|
|
||||||
|
// If present, return reference sets for which a substring of their |
||||||
|
// `assemblyId` matches this string (case insensitive). |
||||||
|
string assembly_id = 3; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// To get the next page of results, set this parameter to the value of |
||||||
|
// `nextPageToken` from the previous response. |
||||||
|
string page_token = 4; |
||||||
|
|
||||||
|
// The maximum number of results to return in a single page. If unspecified, |
||||||
|
// defaults to 1024. The maximum value is 4096. |
||||||
|
int32 page_size = 5; |
||||||
|
} |
||||||
|
|
||||||
|
message SearchReferenceSetsResponse { |
||||||
|
// The matching references sets. |
||||||
|
repeated ReferenceSet reference_sets = 1; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// Provide this value in a subsequent request to return the next page of |
||||||
|
// results. This field will be empty if there aren't any additional results. |
||||||
|
string next_page_token = 2; |
||||||
|
} |
||||||
|
|
||||||
|
message GetReferenceSetRequest { |
||||||
|
// The ID of the reference set. |
||||||
|
string reference_set_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message SearchReferencesRequest { |
||||||
|
// If present, return references for which the |
||||||
|
// [md5checksum][google.genomics.v1.Reference.md5checksum] matches exactly. |
||||||
|
repeated string md5checksums = 1; |
||||||
|
|
||||||
|
// If present, return references for which a prefix of any of |
||||||
|
// [sourceAccessions][google.genomics.v1.Reference.source_accessions] match |
||||||
|
// any of these strings. Accession numbers typically have a main number and a |
||||||
|
// version, for example `GCF_000001405.26`. |
||||||
|
repeated string accessions = 2; |
||||||
|
|
||||||
|
// If present, return only references which belong to this reference set. |
||||||
|
string reference_set_id = 3; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// To get the next page of results, set this parameter to the value of |
||||||
|
// `nextPageToken` from the previous response. |
||||||
|
string page_token = 4; |
||||||
|
|
||||||
|
// The maximum number of results to return in a single page. If unspecified, |
||||||
|
// defaults to 1024. The maximum value is 4096. |
||||||
|
int32 page_size = 5; |
||||||
|
} |
||||||
|
|
||||||
|
message SearchReferencesResponse { |
||||||
|
// The matching references. |
||||||
|
repeated Reference references = 1; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// Provide this value in a subsequent request to return the next page of |
||||||
|
// results. This field will be empty if there aren't any additional results. |
||||||
|
string next_page_token = 2; |
||||||
|
} |
||||||
|
|
||||||
|
message GetReferenceRequest { |
||||||
|
// The ID of the reference. |
||||||
|
string reference_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message ListBasesRequest { |
||||||
|
// The ID of the reference. |
||||||
|
string reference_id = 1; |
||||||
|
|
||||||
|
// The start position (0-based) of this query. Defaults to 0. |
||||||
|
int64 start = 2; |
||||||
|
|
||||||
|
// The end position (0-based, exclusive) of this query. Defaults to the length |
||||||
|
// of this reference. |
||||||
|
int64 end = 3; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// To get the next page of results, set this parameter to the value of |
||||||
|
// `nextPageToken` from the previous response. |
||||||
|
string page_token = 4; |
||||||
|
|
||||||
|
// The maximum number of bases to return in a single page. If unspecified, |
||||||
|
// defaults to 200Kbp (kilo base pairs). The maximum value is 10Mbp (mega base |
||||||
|
// pairs). |
||||||
|
int32 page_size = 5; |
||||||
|
} |
||||||
|
|
||||||
|
message ListBasesResponse { |
||||||
|
// The offset position (0-based) of the given `sequence` from the |
||||||
|
// start of this `Reference`. This value will differ for each page |
||||||
|
// in a paginated request. |
||||||
|
int64 offset = 1; |
||||||
|
|
||||||
|
// A substring of the bases that make up this reference. |
||||||
|
string sequence = 2; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// Provide this value in a subsequent request to return the next page of |
||||||
|
// results. This field will be empty if there aren't any additional results. |
||||||
|
string next_page_token = 3; |
||||||
|
} |
@ -0,0 +1,903 @@ |
|||||||
|
// Copyright 2016 Google Inc. |
||||||
|
// |
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
// you may not use this file except in compliance with the License. |
||||||
|
// You may obtain a copy of the License at |
||||||
|
// |
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
// |
||||||
|
// Unless required by applicable law or agreed to in writing, software |
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
// See the License for the specific language governing permissions and |
||||||
|
// limitations under the License. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
|
||||||
|
package google.genomics.v1; |
||||||
|
|
||||||
|
import "google/api/annotations.proto"; |
||||||
|
import "google/longrunning/operations.proto"; |
||||||
|
import "google/protobuf/empty.proto"; |
||||||
|
import "google/protobuf/field_mask.proto"; |
||||||
|
import "google/protobuf/struct.proto"; |
||||||
|
|
||||||
|
option cc_enable_arenas = true; |
||||||
|
option java_multiple_files = true; |
||||||
|
option java_outer_classname = "VariantsProto"; |
||||||
|
option java_package = "com.google.genomics.v1"; |
||||||
|
|
||||||
|
|
||||||
|
service StreamingVariantService { |
||||||
|
// Returns a stream of all the variants matching the search request, ordered |
||||||
|
// by reference name, position, and ID. |
||||||
|
rpc StreamVariants(StreamVariantsRequest) returns (stream StreamVariantsResponse) { |
||||||
|
option (google.api.http) = { post: "/v1/variants:stream" body: "*" }; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
service VariantServiceV1 { |
||||||
|
// Creates variant data by asynchronously importing the provided information. |
||||||
|
// |
||||||
|
// For the definitions of variant sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// The variants for import will be merged with any existing variant that |
||||||
|
// matches its reference sequence, start, end, reference bases, and |
||||||
|
// alternative bases. If no such variant exists, a new one will be created. |
||||||
|
// |
||||||
|
// When variants are merged, the call information from the new variant |
||||||
|
// is added to the existing variant, and Variant info fields are merged |
||||||
|
// as specified in |
||||||
|
// [infoMergeConfig][google.genomics.v1.ImportVariantsRequest.info_merge_config]. |
||||||
|
// As a special case, for single-sample VCF files, QUAL and FILTER fields will |
||||||
|
// be moved to the call level; these are sometimes interpreted in a |
||||||
|
// call-specific context. |
||||||
|
// Imported VCF headers are appended to the metadata already in a variant set. |
||||||
|
rpc ImportVariants(ImportVariantsRequest) returns (google.longrunning.Operation) { |
||||||
|
option (google.api.http) = { post: "/v1/variants:import" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Creates a new variant set. |
||||||
|
// |
||||||
|
// For the definitions of variant sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// The provided variant set must have a valid `datasetId` set - all other |
||||||
|
// fields are optional. Note that the `id` field will be ignored, as this is |
||||||
|
// assigned by the server. |
||||||
|
rpc CreateVariantSet(CreateVariantSetRequest) returns (VariantSet) { |
||||||
|
option (google.api.http) = { post: "/v1/variantsets" body: "variant_set" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Exports variant set data to an external destination. |
||||||
|
// |
||||||
|
// For the definitions of variant sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc ExportVariantSet(ExportVariantSetRequest) returns (google.longrunning.Operation) { |
||||||
|
option (google.api.http) = { post: "/v1/variantsets/{variant_set_id}:export" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Gets a variant set by ID. |
||||||
|
// |
||||||
|
// For the definitions of variant sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc GetVariantSet(GetVariantSetRequest) returns (VariantSet) { |
||||||
|
option (google.api.http) = { get: "/v1/variantsets/{variant_set_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Returns a list of all variant sets matching search criteria. |
||||||
|
// |
||||||
|
// For the definitions of variant sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// Implements |
||||||
|
// [GlobalAllianceApi.searchVariantSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L49). |
||||||
|
rpc SearchVariantSets(SearchVariantSetsRequest) returns (SearchVariantSetsResponse) { |
||||||
|
option (google.api.http) = { post: "/v1/variantsets/search" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Deletes a variant set including all variants, call sets, and calls within. |
||||||
|
// This is not reversible. |
||||||
|
// |
||||||
|
// For the definitions of variant sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc DeleteVariantSet(DeleteVariantSetRequest) returns (google.protobuf.Empty) { |
||||||
|
option (google.api.http) = { delete: "/v1/variantsets/{variant_set_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Updates a variant set using patch semantics. |
||||||
|
// |
||||||
|
// For the definitions of variant sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc UpdateVariantSet(UpdateVariantSetRequest) returns (VariantSet) { |
||||||
|
option (google.api.http) = { patch: "/v1/variantsets/{variant_set_id}" body: "variant_set" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Gets a list of variants matching the criteria. |
||||||
|
// |
||||||
|
// For the definitions of variants and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// Implements |
||||||
|
// [GlobalAllianceApi.searchVariants](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L126). |
||||||
|
rpc SearchVariants(SearchVariantsRequest) returns (SearchVariantsResponse) { |
||||||
|
option (google.api.http) = { post: "/v1/variants/search" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Creates a new variant. |
||||||
|
// |
||||||
|
// For the definitions of variants and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc CreateVariant(CreateVariantRequest) returns (Variant) { |
||||||
|
option (google.api.http) = { post: "/v1/variants" body: "variant" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Updates a variant. |
||||||
|
// |
||||||
|
// For the definitions of variants and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// This method supports patch semantics. Returns the modified variant without |
||||||
|
// its calls. |
||||||
|
rpc UpdateVariant(UpdateVariantRequest) returns (Variant) { |
||||||
|
option (google.api.http) = { patch: "/v1/variants/{variant_id}" body: "variant" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Deletes a variant. |
||||||
|
// |
||||||
|
// For the definitions of variants and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc DeleteVariant(DeleteVariantRequest) returns (google.protobuf.Empty) { |
||||||
|
option (google.api.http) = { delete: "/v1/variants/{variant_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Gets a variant by ID. |
||||||
|
// |
||||||
|
// For the definitions of variants and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc GetVariant(GetVariantRequest) returns (Variant) { |
||||||
|
option (google.api.http) = { get: "/v1/variants/{variant_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Merges the given variants with existing variants. |
||||||
|
// |
||||||
|
// For the definitions of variants and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// Each variant will be |
||||||
|
// merged with an existing variant that matches its reference sequence, |
||||||
|
// start, end, reference bases, and alternative bases. If no such variant |
||||||
|
// exists, a new one will be created. |
||||||
|
// |
||||||
|
// When variants are merged, the call information from the new variant |
||||||
|
// is added to the existing variant. Variant info fields are merged as |
||||||
|
// specified in the |
||||||
|
// [infoMergeConfig][google.genomics.v1.MergeVariantsRequest.info_merge_config] |
||||||
|
// field of the MergeVariantsRequest. |
||||||
|
// |
||||||
|
// Please exercise caution when using this method! It is easy to introduce |
||||||
|
// mistakes in existing variants and difficult to back out of them. For |
||||||
|
// example, |
||||||
|
// suppose you were trying to merge a new variant with an existing one and |
||||||
|
// both |
||||||
|
// variants contain calls that belong to callsets with the same callset ID. |
||||||
|
// |
||||||
|
// // Existing variant - irrelevant fields trimmed for clarity |
||||||
|
// { |
||||||
|
// "variantSetId": "10473108253681171589", |
||||||
|
// "referenceName": "1", |
||||||
|
// "start": "10582", |
||||||
|
// "referenceBases": "G", |
||||||
|
// "alternateBases": [ |
||||||
|
// "A" |
||||||
|
// ], |
||||||
|
// "calls": [ |
||||||
|
// { |
||||||
|
// "callSetId": "10473108253681171589-0", |
||||||
|
// "callSetName": "CALLSET0", |
||||||
|
// "genotype": [ |
||||||
|
// 0, |
||||||
|
// 1 |
||||||
|
// ], |
||||||
|
// } |
||||||
|
// ] |
||||||
|
// } |
||||||
|
// |
||||||
|
// // New variant with conflicting call information |
||||||
|
// { |
||||||
|
// "variantSetId": "10473108253681171589", |
||||||
|
// "referenceName": "1", |
||||||
|
// "start": "10582", |
||||||
|
// "referenceBases": "G", |
||||||
|
// "alternateBases": [ |
||||||
|
// "A" |
||||||
|
// ], |
||||||
|
// "calls": [ |
||||||
|
// { |
||||||
|
// "callSetId": "10473108253681171589-0", |
||||||
|
// "callSetName": "CALLSET0", |
||||||
|
// "genotype": [ |
||||||
|
// 1, |
||||||
|
// 1 |
||||||
|
// ], |
||||||
|
// } |
||||||
|
// ] |
||||||
|
// } |
||||||
|
// |
||||||
|
// The resulting merged variant would overwrite the existing calls with those |
||||||
|
// from the new variant: |
||||||
|
// |
||||||
|
// { |
||||||
|
// "variantSetId": "10473108253681171589", |
||||||
|
// "referenceName": "1", |
||||||
|
// "start": "10582", |
||||||
|
// "referenceBases": "G", |
||||||
|
// "alternateBases": [ |
||||||
|
// "A" |
||||||
|
// ], |
||||||
|
// "calls": [ |
||||||
|
// { |
||||||
|
// "callSetId": "10473108253681171589-0", |
||||||
|
// "callSetName": "CALLSET0", |
||||||
|
// "genotype": [ |
||||||
|
// 1, |
||||||
|
// 1 |
||||||
|
// ], |
||||||
|
// } |
||||||
|
// ] |
||||||
|
// } |
||||||
|
// |
||||||
|
// This may be the desired outcome, but it is up to the user to determine if |
||||||
|
// if that is indeed the case. |
||||||
|
rpc MergeVariants(MergeVariantsRequest) returns (google.protobuf.Empty) { |
||||||
|
option (google.api.http) = { post: "/v1/variants:merge" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Gets a list of call sets matching the criteria. |
||||||
|
// |
||||||
|
// For the definitions of call sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// Implements |
||||||
|
// [GlobalAllianceApi.searchCallSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L178). |
||||||
|
rpc SearchCallSets(SearchCallSetsRequest) returns (SearchCallSetsResponse) { |
||||||
|
option (google.api.http) = { post: "/v1/callsets/search" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Creates a new call set. |
||||||
|
// |
||||||
|
// For the definitions of call sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc CreateCallSet(CreateCallSetRequest) returns (CallSet) { |
||||||
|
option (google.api.http) = { post: "/v1/callsets" body: "call_set" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Updates a call set. |
||||||
|
// |
||||||
|
// For the definitions of call sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// This method supports patch semantics. |
||||||
|
rpc UpdateCallSet(UpdateCallSetRequest) returns (CallSet) { |
||||||
|
option (google.api.http) = { patch: "/v1/callsets/{call_set_id}" body: "call_set" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Deletes a call set. |
||||||
|
// |
||||||
|
// For the definitions of call sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc DeleteCallSet(DeleteCallSetRequest) returns (google.protobuf.Empty) { |
||||||
|
option (google.api.http) = { delete: "/v1/callsets/{call_set_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Gets a call set by ID. |
||||||
|
// |
||||||
|
// For the definitions of call sets and other genomics resources, see |
||||||
|
// [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
rpc GetCallSet(GetCallSetRequest) returns (CallSet) { |
||||||
|
option (google.api.http) = { get: "/v1/callsets/{call_set_id}" }; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Metadata describes a single piece of variant call metadata. |
||||||
|
// These data include a top level key and either a single value string (value) |
||||||
|
// or a list of key-value pairs (info.) |
||||||
|
// Value and info are mutually exclusive. |
||||||
|
message VariantSetMetadata { |
||||||
|
enum Type { |
||||||
|
TYPE_UNSPECIFIED = 0; |
||||||
|
|
||||||
|
INTEGER = 1; |
||||||
|
|
||||||
|
FLOAT = 2; |
||||||
|
|
||||||
|
FLAG = 3; |
||||||
|
|
||||||
|
CHARACTER = 4; |
||||||
|
|
||||||
|
STRING = 5; |
||||||
|
} |
||||||
|
|
||||||
|
// The top-level key. |
||||||
|
string key = 1; |
||||||
|
|
||||||
|
// The value field for simple metadata |
||||||
|
string value = 2; |
||||||
|
|
||||||
|
// User-provided ID field, not enforced by this API. |
||||||
|
// Two or more pieces of structured metadata with identical |
||||||
|
// id and key fields are considered equivalent. |
||||||
|
string id = 4; |
||||||
|
|
||||||
|
// The type of data. Possible types include: Integer, Float, |
||||||
|
// Flag, Character, and String. |
||||||
|
Type type = 5; |
||||||
|
|
||||||
|
// The number of values that can be included in a field described by this |
||||||
|
// metadata. |
||||||
|
string number = 8; |
||||||
|
|
||||||
|
// A textual description of this metadata. |
||||||
|
string description = 7; |
||||||
|
|
||||||
|
// Remaining structured metadata key-value pairs. This must be of the form |
||||||
|
// map<string, string[]> (string key mapping to a list of string values). |
||||||
|
map<string, google.protobuf.ListValue> info = 3; |
||||||
|
} |
||||||
|
|
||||||
|
// A variant set is a collection of call sets and variants. It contains summary |
||||||
|
// statistics of those contents. A variant set belongs to a dataset. |
||||||
|
// |
||||||
|
// For more genomics resource definitions, see [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
message VariantSet { |
||||||
|
// The dataset to which this variant set belongs. |
||||||
|
string dataset_id = 1; |
||||||
|
|
||||||
|
// The server-generated variant set ID, unique across all variant sets. |
||||||
|
string id = 2; |
||||||
|
|
||||||
|
// The reference set to which the variant set is mapped. The reference set |
||||||
|
// describes the alignment provenance of the variant set, while the |
||||||
|
// `referenceBounds` describe the shape of the actual variant data. The |
||||||
|
// reference set's reference names are a superset of those found in the |
||||||
|
// `referenceBounds`. |
||||||
|
// |
||||||
|
// For example, given a variant set that is mapped to the GRCh38 reference set |
||||||
|
// and contains a single variant on reference 'X', `referenceBounds` would |
||||||
|
// contain only an entry for 'X', while the associated reference set |
||||||
|
// enumerates all possible references: '1', '2', 'X', 'Y', 'MT', etc. |
||||||
|
string reference_set_id = 6; |
||||||
|
|
||||||
|
// A list of all references used by the variants in a variant set |
||||||
|
// with associated coordinate upper bounds for each one. |
||||||
|
repeated ReferenceBound reference_bounds = 5; |
||||||
|
|
||||||
|
// The metadata associated with this variant set. |
||||||
|
repeated VariantSetMetadata metadata = 4; |
||||||
|
|
||||||
|
// User-specified, mutable name. |
||||||
|
string name = 7; |
||||||
|
|
||||||
|
// A textual description of this variant set. |
||||||
|
string description = 8; |
||||||
|
} |
||||||
|
|
||||||
|
// A variant represents a change in DNA sequence relative to a reference |
||||||
|
// sequence. For example, a variant could represent a SNP or an insertion. |
||||||
|
// Variants belong to a variant set. |
||||||
|
// |
||||||
|
// For more genomics resource definitions, see [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
// |
||||||
|
// Each of the calls on a variant represent a determination of genotype with |
||||||
|
// respect to that variant. For example, a call might assign probability of 0.32 |
||||||
|
// to the occurrence of a SNP named rs1234 in a sample named NA12345. A call |
||||||
|
// belongs to a call set, which contains related calls typically from one |
||||||
|
// sample. |
||||||
|
message Variant { |
||||||
|
// The ID of the variant set this variant belongs to. |
||||||
|
string variant_set_id = 15; |
||||||
|
|
||||||
|
// The server-generated variant ID, unique across all variants. |
||||||
|
string id = 2; |
||||||
|
|
||||||
|
// Names for the variant, for example a RefSNP ID. |
||||||
|
repeated string names = 3; |
||||||
|
|
||||||
|
// The date this variant was created, in milliseconds from the epoch. |
||||||
|
int64 created = 12; |
||||||
|
|
||||||
|
// The reference on which this variant occurs. |
||||||
|
// (such as `chr20` or `X`) |
||||||
|
string reference_name = 14; |
||||||
|
|
||||||
|
// The position at which this variant occurs (0-based). |
||||||
|
// This corresponds to the first base of the string of reference bases. |
||||||
|
int64 start = 16; |
||||||
|
|
||||||
|
// The end position (0-based) of this variant. This corresponds to the first |
||||||
|
// base after the last base in the reference allele. So, the length of |
||||||
|
// the reference allele is (end - start). This is useful for variants |
||||||
|
// that don't explicitly give alternate bases, for example large deletions. |
||||||
|
int64 end = 13; |
||||||
|
|
||||||
|
// The reference bases for this variant. They start at the given |
||||||
|
// position. |
||||||
|
string reference_bases = 6; |
||||||
|
|
||||||
|
// The bases that appear instead of the reference bases. |
||||||
|
repeated string alternate_bases = 7; |
||||||
|
|
||||||
|
// A measure of how likely this variant is to be real. |
||||||
|
// A higher value is better. |
||||||
|
double quality = 8; |
||||||
|
|
||||||
|
// A list of filters (normally quality filters) this variant has failed. |
||||||
|
// `PASS` indicates this variant has passed all filters. |
||||||
|
repeated string filter = 9; |
||||||
|
|
||||||
|
// A map of additional variant information. This must be of the form |
||||||
|
// map<string, string[]> (string key mapping to a list of string values). |
||||||
|
map<string, google.protobuf.ListValue> info = 10; |
||||||
|
|
||||||
|
// The variant calls for this particular variant. Each one represents the |
||||||
|
// determination of genotype with respect to this variant. |
||||||
|
repeated VariantCall calls = 11; |
||||||
|
} |
||||||
|
|
||||||
|
// A call represents the determination of genotype with respect to a particular |
||||||
|
// variant. It may include associated information such as quality and phasing. |
||||||
|
// For example, a call might assign a probability of 0.32 to the occurrence of |
||||||
|
// a SNP named rs1234 in a call set with the name NA12345. |
||||||
|
message VariantCall { |
||||||
|
// The ID of the call set this variant call belongs to. |
||||||
|
string call_set_id = 8; |
||||||
|
|
||||||
|
// The name of the call set this variant call belongs to. |
||||||
|
string call_set_name = 9; |
||||||
|
|
||||||
|
// The genotype of this variant call. Each value represents either the value |
||||||
|
// of the `referenceBases` field or a 1-based index into |
||||||
|
// `alternateBases`. If a variant had a `referenceBases` |
||||||
|
// value of `T` and an `alternateBases` |
||||||
|
// value of `["A", "C"]`, and the `genotype` was |
||||||
|
// `[2, 1]`, that would mean the call |
||||||
|
// represented the heterozygous value `CA` for this variant. |
||||||
|
// If the `genotype` was instead `[0, 1]`, the |
||||||
|
// represented value would be `TA`. Ordering of the |
||||||
|
// genotype values is important if the `phaseset` is present. |
||||||
|
// If a genotype is not called (that is, a `.` is present in the |
||||||
|
// GT string) -1 is returned. |
||||||
|
repeated int32 genotype = 7; |
||||||
|
|
||||||
|
// If this field is present, this variant call's genotype ordering implies |
||||||
|
// the phase of the bases and is consistent with any other variant calls in |
||||||
|
// the same reference sequence which have the same phaseset value. |
||||||
|
// When importing data from VCF, if the genotype data was phased but no |
||||||
|
// phase set was specified this field will be set to `*`. |
||||||
|
string phaseset = 5; |
||||||
|
|
||||||
|
// The genotype likelihoods for this variant call. Each array entry |
||||||
|
// represents how likely a specific genotype is for this call. The value |
||||||
|
// ordering is defined by the GL tag in the VCF spec. |
||||||
|
// If Phred-scaled genotype likelihood scores (PL) are available and |
||||||
|
// log10(P) genotype likelihood scores (GL) are not, PL scores are converted |
||||||
|
// to GL scores. If both are available, PL scores are stored in `info`. |
||||||
|
repeated double genotype_likelihood = 6; |
||||||
|
|
||||||
|
// A map of additional variant call information. This must be of the form |
||||||
|
// map<string, string[]> (string key mapping to a list of string values). |
||||||
|
map<string, google.protobuf.ListValue> info = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// A call set is a collection of variant calls, typically for one sample. It |
||||||
|
// belongs to a variant set. |
||||||
|
// |
||||||
|
// For more genomics resource definitions, see [Fundamentals of Google |
||||||
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) |
||||||
|
message CallSet { |
||||||
|
// The server-generated call set ID, unique across all call sets. |
||||||
|
string id = 1; |
||||||
|
|
||||||
|
// The call set name. |
||||||
|
string name = 2; |
||||||
|
|
||||||
|
// The sample ID this call set corresponds to. |
||||||
|
string sample_id = 7; |
||||||
|
|
||||||
|
// The IDs of the variant sets this call set belongs to. This field must |
||||||
|
// have exactly length one, as a call set belongs to a single variant set. |
||||||
|
// This field is repeated for compatibility with the |
||||||
|
// [GA4GH 0.5.1 |
||||||
|
// API](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variants.avdl#L76). |
||||||
|
repeated string variant_set_ids = 6; |
||||||
|
|
||||||
|
// The date this call set was created in milliseconds from the epoch. |
||||||
|
int64 created = 5; |
||||||
|
|
||||||
|
// A map of additional call set information. This must be of the form |
||||||
|
// map<string, string[]> (string key mapping to a list of string values). |
||||||
|
map<string, google.protobuf.ListValue> info = 4; |
||||||
|
} |
||||||
|
|
||||||
|
// ReferenceBound records an upper bound for the starting coordinate of |
||||||
|
// variants in a particular reference. |
||||||
|
message ReferenceBound { |
||||||
|
// The name of the reference associated with this reference bound. |
||||||
|
string reference_name = 1; |
||||||
|
|
||||||
|
// An upper bound (inclusive) on the starting coordinate of any |
||||||
|
// variant in the reference sequence. |
||||||
|
int64 upper_bound = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// The variant data import request. |
||||||
|
message ImportVariantsRequest { |
||||||
|
enum Format { |
||||||
|
FORMAT_UNSPECIFIED = 0; |
||||||
|
|
||||||
|
// VCF (Variant Call Format). The VCF files should be uncompressed. gVCF is |
||||||
|
// also supported. |
||||||
|
FORMAT_VCF = 1; |
||||||
|
|
||||||
|
// Complete Genomics masterVarBeta format. The masterVarBeta files should |
||||||
|
// be bzip2 compressed. |
||||||
|
FORMAT_COMPLETE_GENOMICS = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// Required. The variant set to which variant data should be imported. |
||||||
|
string variant_set_id = 1; |
||||||
|
|
||||||
|
// A list of URIs referencing variant files in Google Cloud Storage. URIs can |
||||||
|
// include wildcards [as described |
||||||
|
// here](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). |
||||||
|
// Note that recursive wildcards ('**') are not supported. |
||||||
|
repeated string source_uris = 2; |
||||||
|
|
||||||
|
// The format of the variant data being imported. If unspecified, defaults to |
||||||
|
// to `VCF`. |
||||||
|
Format format = 3; |
||||||
|
|
||||||
|
// Convert reference names to the canonical representation. |
||||||
|
// hg19 haploytypes (those reference names containing "_hap") |
||||||
|
// are not modified in any way. |
||||||
|
// All other reference names are modified according to the following rules: |
||||||
|
// The reference name is capitalized. |
||||||
|
// The "chr" prefix is dropped for all autosomes and sex chromsomes. |
||||||
|
// For example "chr17" becomes "17" and "chrX" becomes "X". |
||||||
|
// All mitochondrial chromosomes ("chrM", "chrMT", etc) become "MT". |
||||||
|
bool normalize_reference_names = 5; |
||||||
|
|
||||||
|
// A mapping between info field keys and the InfoMergeOperations to |
||||||
|
// be performed on them. This is plumbed down to the MergeVariantRequests |
||||||
|
// generated by the resulting import job. |
||||||
|
map<string, InfoMergeOperation> info_merge_config = 6; |
||||||
|
} |
||||||
|
|
||||||
|
// The variant data import response. |
||||||
|
message ImportVariantsResponse { |
||||||
|
// IDs of the call sets created during the import. |
||||||
|
repeated string call_set_ids = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// The CreateVariantSet request |
||||||
|
message CreateVariantSetRequest { |
||||||
|
// Required. The variant set to be created. Must have a valid `datasetId`. |
||||||
|
VariantSet variant_set = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// The variant data export request. |
||||||
|
message ExportVariantSetRequest { |
||||||
|
enum Format { |
||||||
|
FORMAT_UNSPECIFIED = 0; |
||||||
|
|
||||||
|
// Export the data to Google BigQuery. |
||||||
|
FORMAT_BIGQUERY = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// Required. The ID of the variant set that contains variant data which |
||||||
|
// should be exported. The caller must have READ access to this variant set. |
||||||
|
string variant_set_id = 1; |
||||||
|
|
||||||
|
// If provided, only variant call information from the specified call sets |
||||||
|
// will be exported. By default all variant calls are exported. |
||||||
|
repeated string call_set_ids = 2; |
||||||
|
|
||||||
|
// Required. The Google Cloud project ID that owns the destination |
||||||
|
// BigQuery dataset. The caller must have WRITE access to this project. This |
||||||
|
// project will also own the resulting export job. |
||||||
|
string project_id = 3; |
||||||
|
|
||||||
|
// The format for the exported data. |
||||||
|
Format format = 4; |
||||||
|
|
||||||
|
// Required. The BigQuery dataset to export data to. This dataset must already |
||||||
|
// exist. Note that this is distinct from the Genomics concept of "dataset". |
||||||
|
string bigquery_dataset = 5; |
||||||
|
|
||||||
|
// Required. The BigQuery table to export data to. |
||||||
|
// If the table doesn't exist, it will be created. If it already exists, it |
||||||
|
// will be overwritten. |
||||||
|
string bigquery_table = 6; |
||||||
|
} |
||||||
|
|
||||||
|
// The variant set request. |
||||||
|
message GetVariantSetRequest { |
||||||
|
// Required. The ID of the variant set. |
||||||
|
string variant_set_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// The search variant sets request. |
||||||
|
message SearchVariantSetsRequest { |
||||||
|
// Exactly one dataset ID must be provided here. Only variant sets which |
||||||
|
// belong to this dataset will be returned. |
||||||
|
repeated string dataset_ids = 1; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// To get the next page of results, set this parameter to the value of |
||||||
|
// `nextPageToken` from the previous response. |
||||||
|
string page_token = 2; |
||||||
|
|
||||||
|
// The maximum number of results to return in a single page. If unspecified, |
||||||
|
// defaults to 1024. |
||||||
|
int32 page_size = 3; |
||||||
|
} |
||||||
|
|
||||||
|
// The search variant sets response. |
||||||
|
message SearchVariantSetsResponse { |
||||||
|
// The variant sets belonging to the requested dataset. |
||||||
|
repeated VariantSet variant_sets = 1; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// Provide this value in a subsequent request to return the next page of |
||||||
|
// results. This field will be empty if there aren't any additional results. |
||||||
|
string next_page_token = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// The delete variant set request. |
||||||
|
message DeleteVariantSetRequest { |
||||||
|
// The ID of the variant set to be deleted. |
||||||
|
string variant_set_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message UpdateVariantSetRequest { |
||||||
|
// The ID of the variant to be updated (must already exist). |
||||||
|
string variant_set_id = 1; |
||||||
|
|
||||||
|
// The new variant data. Only the variant_set.metadata will be considered |
||||||
|
// for update. |
||||||
|
VariantSet variant_set = 2; |
||||||
|
|
||||||
|
// An optional mask specifying which fields to update. Supported fields: |
||||||
|
// |
||||||
|
// * [metadata][google.genomics.v1.VariantSet.metadata]. |
||||||
|
// * [name][google.genomics.v1.VariantSet.name]. |
||||||
|
// * [description][google.genomics.v1.VariantSet.description]. |
||||||
|
// |
||||||
|
// Leaving `updateMask` unset is equivalent to specifying all mutable |
||||||
|
// fields. |
||||||
|
google.protobuf.FieldMask update_mask = 5; |
||||||
|
} |
||||||
|
|
||||||
|
// The variant search request. |
||||||
|
message SearchVariantsRequest { |
||||||
|
// At most one variant set ID must be provided. Only variants from this |
||||||
|
// variant set will be returned. If omitted, a call set id must be included in |
||||||
|
// the request. |
||||||
|
repeated string variant_set_ids = 1; |
||||||
|
|
||||||
|
// Only return variants which have exactly this name. |
||||||
|
string variant_name = 2; |
||||||
|
|
||||||
|
// Only return variant calls which belong to call sets with these ids. |
||||||
|
// Leaving this blank returns all variant calls. If a variant has no |
||||||
|
// calls belonging to any of these call sets, it won't be returned at all. |
||||||
|
// Currently, variants with no calls from any call set will never be returned. |
||||||
|
repeated string call_set_ids = 3; |
||||||
|
|
||||||
|
// Required. Only return variants in this reference sequence. |
||||||
|
string reference_name = 4; |
||||||
|
|
||||||
|
// The beginning of the window (0-based, inclusive) for which |
||||||
|
// overlapping variants should be returned. If unspecified, defaults to 0. |
||||||
|
int64 start = 5; |
||||||
|
|
||||||
|
// The end of the window, 0-based exclusive. If unspecified or 0, defaults to |
||||||
|
// the length of the reference. |
||||||
|
int64 end = 6; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// To get the next page of results, set this parameter to the value of |
||||||
|
// `nextPageToken` from the previous response. |
||||||
|
string page_token = 7; |
||||||
|
|
||||||
|
// The maximum number of variants to return in a single page. If unspecified, |
||||||
|
// defaults to 5000. The maximum value is 10000. |
||||||
|
int32 page_size = 8; |
||||||
|
|
||||||
|
// The maximum number of calls to return in a single page. Note that this |
||||||
|
// limit may be exceeded in the event that a matching variant contains more |
||||||
|
// calls than the requested maximum. If unspecified, defaults to 5000. The |
||||||
|
// maximum value is 10000. |
||||||
|
int32 max_calls = 9; |
||||||
|
} |
||||||
|
|
||||||
|
// The variant search response. |
||||||
|
message SearchVariantsResponse { |
||||||
|
// The list of matching Variants. |
||||||
|
repeated Variant variants = 1; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// Provide this value in a subsequent request to return the next page of |
||||||
|
// results. This field will be empty if there aren't any additional results. |
||||||
|
string next_page_token = 2; |
||||||
|
} |
||||||
|
|
||||||
|
message CreateVariantRequest { |
||||||
|
// The variant to be created. |
||||||
|
Variant variant = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message UpdateVariantRequest { |
||||||
|
// The ID of the variant to be updated. |
||||||
|
string variant_id = 1; |
||||||
|
|
||||||
|
// The new variant data. |
||||||
|
Variant variant = 2; |
||||||
|
|
||||||
|
// An optional mask specifying which fields to update. At this time, mutable |
||||||
|
// fields are [names][google.genomics.v1.Variant.names] and |
||||||
|
// [info][google.genomics.v1.Variant.info]. Acceptable values are "names" and |
||||||
|
// "info". If unspecified, all mutable fields will be updated. |
||||||
|
google.protobuf.FieldMask update_mask = 3; |
||||||
|
} |
||||||
|
|
||||||
|
message DeleteVariantRequest { |
||||||
|
// The ID of the variant to be deleted. |
||||||
|
string variant_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message GetVariantRequest { |
||||||
|
// The ID of the variant. |
||||||
|
string variant_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message MergeVariantsRequest { |
||||||
|
// The destination variant set. |
||||||
|
string variant_set_id = 1; |
||||||
|
|
||||||
|
// The variants to be merged with existing variants. |
||||||
|
repeated Variant variants = 2; |
||||||
|
|
||||||
|
// A mapping between info field keys and the InfoMergeOperations to |
||||||
|
// be performed on them. |
||||||
|
map<string, InfoMergeOperation> info_merge_config = 3; |
||||||
|
} |
||||||
|
|
||||||
|
// The call set search request. |
||||||
|
message SearchCallSetsRequest { |
||||||
|
// Restrict the query to call sets within the given variant sets. At least one |
||||||
|
// ID must be provided. |
||||||
|
repeated string variant_set_ids = 1; |
||||||
|
|
||||||
|
// Only return call sets for which a substring of the name matches this |
||||||
|
// string. |
||||||
|
string name = 2; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// To get the next page of results, set this parameter to the value of |
||||||
|
// `nextPageToken` from the previous response. |
||||||
|
string page_token = 3; |
||||||
|
|
||||||
|
// The maximum number of results to return in a single page. If unspecified, |
||||||
|
// defaults to 1024. |
||||||
|
int32 page_size = 4; |
||||||
|
} |
||||||
|
|
||||||
|
// The call set search response. |
||||||
|
message SearchCallSetsResponse { |
||||||
|
// The list of matching call sets. |
||||||
|
repeated CallSet call_sets = 1; |
||||||
|
|
||||||
|
// The continuation token, which is used to page through large result sets. |
||||||
|
// Provide this value in a subsequent request to return the next page of |
||||||
|
// results. This field will be empty if there aren't any additional results. |
||||||
|
string next_page_token = 2; |
||||||
|
} |
||||||
|
|
||||||
|
message CreateCallSetRequest { |
||||||
|
// The call set to be created. |
||||||
|
CallSet call_set = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message UpdateCallSetRequest { |
||||||
|
// The ID of the call set to be updated. |
||||||
|
string call_set_id = 1; |
||||||
|
|
||||||
|
// The new call set data. |
||||||
|
CallSet call_set = 2; |
||||||
|
|
||||||
|
// An optional mask specifying which fields to update. At this time, the only |
||||||
|
// mutable field is [name][google.genomics.v1.CallSet.name]. The only |
||||||
|
// acceptable value is "name". If unspecified, all mutable fields will be |
||||||
|
// updated. |
||||||
|
google.protobuf.FieldMask update_mask = 3; |
||||||
|
} |
||||||
|
|
||||||
|
message DeleteCallSetRequest { |
||||||
|
// The ID of the call set to be deleted. |
||||||
|
string call_set_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
message GetCallSetRequest { |
||||||
|
// The ID of the call set. |
||||||
|
string call_set_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// The stream variants request. |
||||||
|
message StreamVariantsRequest { |
||||||
|
// The Google Developers Console project ID or number which will be billed |
||||||
|
// for this access. The caller must have WRITE access to this project. |
||||||
|
// Required. |
||||||
|
string project_id = 1; |
||||||
|
|
||||||
|
// The variant set ID from which to stream variants. |
||||||
|
string variant_set_id = 2; |
||||||
|
|
||||||
|
// Only return variant calls which belong to call sets with these IDs. |
||||||
|
// Leaving this blank returns all variant calls. |
||||||
|
repeated string call_set_ids = 3; |
||||||
|
|
||||||
|
// Required. Only return variants in this reference sequence. |
||||||
|
string reference_name = 4; |
||||||
|
|
||||||
|
// The beginning of the window (0-based, inclusive) for which |
||||||
|
// overlapping variants should be returned. |
||||||
|
int64 start = 5; |
||||||
|
|
||||||
|
// The end of the window (0-based, exclusive) for which overlapping |
||||||
|
// variants should be returned. |
||||||
|
int64 end = 6; |
||||||
|
} |
||||||
|
|
||||||
|
message StreamVariantsResponse { |
||||||
|
repeated Variant variants = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// Operations to be performed during import on Variant info fields. |
||||||
|
// These operations are set for each info field in the info_merge_config |
||||||
|
// map of ImportVariantsRequest, which is plumbed down to the |
||||||
|
// MergeVariantRequests generated by the import job. |
||||||
|
enum InfoMergeOperation { |
||||||
|
INFO_MERGE_OPERATION_UNSPECIFIED = 0; |
||||||
|
|
||||||
|
// By default, Variant info fields are persisted if the Variant doesn't |
||||||
|
// already exist in the variantset. If the Variant is equivalent to a |
||||||
|
// Variant already in the variantset, the incoming Variant's info field |
||||||
|
// is ignored in favor of that of the already persisted Variant. |
||||||
|
IGNORE_NEW = 1; |
||||||
|
|
||||||
|
// This operation removes an info field from the incoming Variant |
||||||
|
// and persists this info field in each of the incoming Variant's Calls. |
||||||
|
MOVE_TO_CALLS = 2; |
||||||
|
} |
@ -0,0 +1,586 @@ |
|||||||
|
// Copyright 2016 Google Inc. |
||||||
|
// |
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
// you may not use this file except in compliance with the License. |
||||||
|
// You may obtain a copy of the License at |
||||||
|
// |
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
// |
||||||
|
// Unless required by applicable law or agreed to in writing, software |
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
// See the License for the specific language governing permissions and |
||||||
|
// limitations under the License. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
|
||||||
|
package google.genomics.v1alpha2; |
||||||
|
|
||||||
|
import "google/api/annotations.proto"; |
||||||
|
import "google/longrunning/operations.proto"; |
||||||
|
import "google/protobuf/empty.proto"; |
||||||
|
import "google/protobuf/timestamp.proto"; |
||||||
|
import "google/rpc/code.proto"; |
||||||
|
|
||||||
|
option cc_enable_arenas = true; |
||||||
|
option java_multiple_files = true; |
||||||
|
option java_outer_classname = "PipelinesProto"; |
||||||
|
option java_package = "com.google.genomics.v1a"; |
||||||
|
|
||||||
|
|
||||||
|
// A service for running genomics pipelines. |
||||||
|
service PipelinesV1Alpha2 { |
||||||
|
// Creates a pipeline that can be run later. Create takes a Pipeline that |
||||||
|
// has all fields other than `pipelineId` populated, and then returns |
||||||
|
// the same pipeline with `pipelineId` populated. This id can be used |
||||||
|
// to run the pipeline. |
||||||
|
// |
||||||
|
// Caller must have WRITE permission to the project. |
||||||
|
rpc CreatePipeline(CreatePipelineRequest) returns (Pipeline) { |
||||||
|
option (google.api.http) = { post: "/v1alpha2/pipelines" body: "pipeline" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Runs a pipeline. If `pipelineId` is specified in the request, then |
||||||
|
// run a saved pipeline. If `ephemeralPipeline` is specified, then run |
||||||
|
// that pipeline once without saving a copy. |
||||||
|
// |
||||||
|
// The caller must have READ permission to the project where the pipeline |
||||||
|
// is stored and WRITE permission to the project where the pipeline will be |
||||||
|
// run, as VMs will be created and storage will be used. |
||||||
|
rpc RunPipeline(RunPipelineRequest) returns (google.longrunning.Operation) { |
||||||
|
option (google.api.http) = { post: "/v1alpha2/pipelines:run" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Retrieves a pipeline based on ID. |
||||||
|
// |
||||||
|
// Caller must have READ permission to the project. |
||||||
|
rpc GetPipeline(GetPipelineRequest) returns (Pipeline) { |
||||||
|
option (google.api.http) = { get: "/v1alpha2/pipelines/{pipeline_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Lists pipelines. |
||||||
|
// |
||||||
|
// Caller must have READ permission to the project. |
||||||
|
rpc ListPipelines(ListPipelinesRequest) returns (ListPipelinesResponse) { |
||||||
|
option (google.api.http) = { get: "/v1alpha2/pipelines" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Deletes a pipeline based on ID. |
||||||
|
// |
||||||
|
// Caller must have WRITE permission to the project. |
||||||
|
rpc DeletePipeline(DeletePipelineRequest) returns (google.protobuf.Empty) { |
||||||
|
option (google.api.http) = { delete: "/v1alpha2/pipelines/{pipeline_id}" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Gets controller configuration information. Should only be called |
||||||
|
// by VMs created by the Pipelines Service and not by end users. |
||||||
|
rpc GetControllerConfig(GetControllerConfigRequest) returns (ControllerConfig) { |
||||||
|
option (google.api.http) = { get: "/v1alpha2/pipelines:getControllerConfig" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Sets status of a given operation. All timestamps are sent on each |
||||||
|
// call, and the whole series of events is replaced, in case |
||||||
|
// intermediate calls are lost. Should only be called by VMs created |
||||||
|
// by the Pipelines Service and not by end users. |
||||||
|
rpc SetOperationStatus(SetOperationStatusRequest) returns (google.protobuf.Empty) { |
||||||
|
option (google.api.http) = { put: "/v1alpha2/pipelines:setOperationStatus" body: "*" }; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describes a GCE resource that is being managed by a running |
||||||
|
// [pipeline][google.genomics.v1alpha2.Pipeline]. |
||||||
|
message GCE { |
||||||
|
// The instance on which the operation is running. |
||||||
|
string instance_name = 1; |
||||||
|
|
||||||
|
// The availability zone in which the instance resides. |
||||||
|
string zone = 2; |
||||||
|
|
||||||
|
// The machine type of the instance. |
||||||
|
string machine_type = 3; |
||||||
|
|
||||||
|
// The names of the disks that were created for this pipeline. |
||||||
|
repeated string disk_names = 4; |
||||||
|
} |
||||||
|
|
||||||
|
// Runtime metadata that will be populated in the |
||||||
|
// [runtimeMetadata][google.genomics.v1.OperationMetadata.runtime_metadata] |
||||||
|
// field of the Operation associated with a RunPipeline execution. |
||||||
|
message RuntimeMetadata { |
||||||
|
// Execution information specific to Google Compute Engine. |
||||||
|
GCE gce = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// The pipeline object. Represents a transformation from a set of input |
||||||
|
// parameters to a set of output parameters. The transformation is defined |
||||||
|
// as a docker image and command to run within that image. Each pipeline |
||||||
|
// is run on a Google Compute Engine VM. A pipeline can be created with the |
||||||
|
// `create` method and then later run with the `run` method, or a pipeline can |
||||||
|
// be defined and run all at once with the `run` method. |
||||||
|
message Pipeline { |
||||||
|
// Required. The project in which to create the pipeline. The caller must have |
||||||
|
// WRITE access. |
||||||
|
string project_id = 1; |
||||||
|
|
||||||
|
// Required. A user specified pipeline name that does not have to be unique. |
||||||
|
// This name can be used for filtering Pipelines in ListPipelines. |
||||||
|
string name = 2; |
||||||
|
|
||||||
|
// User-specified description. |
||||||
|
string description = 3; |
||||||
|
|
||||||
|
// Input parameters of the pipeline. |
||||||
|
repeated PipelineParameter input_parameters = 8; |
||||||
|
|
||||||
|
// Output parameters of the pipeline. |
||||||
|
repeated PipelineParameter output_parameters = 9; |
||||||
|
|
||||||
|
// Required. The executor indicates in which environment the pipeline runs. |
||||||
|
oneof executor { |
||||||
|
// Specifies the docker run information. |
||||||
|
DockerExecutor docker = 5; |
||||||
|
} |
||||||
|
|
||||||
|
// Required. Specifies resource requirements for the pipeline run. |
||||||
|
// Required fields: |
||||||
|
// |
||||||
|
// * |
||||||
|
// [minimumCpuCores][google.genomics.v1alpha2.PipelineResources.minimum_cpu_cores] |
||||||
|
// |
||||||
|
// * |
||||||
|
// [minimumRamGb][google.genomics.v1alpha2.PipelineResources.minimum_ram_gb] |
||||||
|
PipelineResources resources = 6; |
||||||
|
|
||||||
|
// Unique pipeline id that is generated by the service when CreatePipeline |
||||||
|
// is called. Cannot be specified in the Pipeline used in the |
||||||
|
// CreatePipelineRequest, and will be populated in the response to |
||||||
|
// CreatePipeline and all subsequent Get and List calls. Indicates that the |
||||||
|
// service has registered this pipeline. |
||||||
|
string pipeline_id = 7; |
||||||
|
} |
||||||
|
|
||||||
|
// The request to create a pipeline. The pipeline field here should not have |
||||||
|
// `pipelineId` populated, as that will be populated by the server. |
||||||
|
message CreatePipelineRequest { |
||||||
|
// The pipeline to create. Should not have `pipelineId` populated. |
||||||
|
Pipeline pipeline = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// The pipeline run arguments. |
||||||
|
message RunPipelineArgs { |
||||||
|
// Required. The project in which to run the pipeline. The caller must have |
||||||
|
// WRITER access to all Google Cloud services and resources (e.g. Google |
||||||
|
// Compute Engine) will be used. |
||||||
|
string project_id = 1; |
||||||
|
|
||||||
|
// Pipeline input arguments; keys are defined in the pipeline documentation. |
||||||
|
// All input parameters that do not have default values must be specified. |
||||||
|
// If parameters with defaults are specified here, the defaults will be |
||||||
|
// overridden. |
||||||
|
map<string, string> inputs = 2; |
||||||
|
|
||||||
|
// Pipeline output arguments; keys are defined in the pipeline |
||||||
|
// documentation. All output parameters of without default values |
||||||
|
// must be specified. If parameters with defaults are specified |
||||||
|
// here, the defaults will be overridden. |
||||||
|
map<string, string> outputs = 3; |
||||||
|
|
||||||
|
// The Google Cloud Service Account that will be used to access data and |
||||||
|
// services. By default, the compute service account associated with |
||||||
|
// `projectId` is used. |
||||||
|
ServiceAccount service_account = 4; |
||||||
|
|
||||||
|
// Client-specified pipeline operation identifier. |
||||||
|
string client_id = 5; |
||||||
|
|
||||||
|
// Specifies resource requirements/overrides for the pipeline run. |
||||||
|
PipelineResources resources = 6; |
||||||
|
|
||||||
|
// Required. Logging options. Used by the service to communicate results |
||||||
|
// to the user. |
||||||
|
LoggingOptions logging = 7; |
||||||
|
} |
||||||
|
|
||||||
|
// The request to run a pipeline. If `pipelineId` is specified, it |
||||||
|
// refers to a saved pipeline created with CreatePipeline and set as |
||||||
|
// the `pipelineId` of the returned Pipeline object. If |
||||||
|
// `ephemeralPipeline` is specified, that pipeline is run once |
||||||
|
// with the given args and not saved. It is an error to specify both |
||||||
|
// `pipelineId` and `ephemeralPipeline`. `pipelineArgs` |
||||||
|
// must be specified. |
||||||
|
message RunPipelineRequest { |
||||||
|
oneof pipeline { |
||||||
|
// The already created pipeline to run. |
||||||
|
string pipeline_id = 1; |
||||||
|
|
||||||
|
// A new pipeline object to run once and then delete. |
||||||
|
Pipeline ephemeral_pipeline = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// The arguments to use when running this pipeline. |
||||||
|
RunPipelineArgs pipeline_args = 3; |
||||||
|
} |
||||||
|
|
||||||
|
// A request to get a saved pipeline by id. |
||||||
|
message GetPipelineRequest { |
||||||
|
// Caller must have READ access to the project in which this pipeline |
||||||
|
// is defined. |
||||||
|
string pipeline_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// A request to list pipelines in a given project. Pipelines can be |
||||||
|
// filtered by name using `namePrefix`: all pipelines with names that |
||||||
|
// begin with `namePrefix` will be returned. Uses standard pagination: |
||||||
|
// `pageSize` indicates how many pipelines to return, and |
||||||
|
// `pageToken` comes from a previous ListPipelinesResponse to |
||||||
|
// indicate offset. |
||||||
|
message ListPipelinesRequest { |
||||||
|
// Required. The name of the project to search for pipelines. Caller |
||||||
|
// must have READ access to this project. |
||||||
|
string project_id = 1; |
||||||
|
|
||||||
|
// Pipelines with names that match this prefix should be |
||||||
|
// returned. If unspecified, all pipelines in the project, up to |
||||||
|
// `pageSize`, will be returned. |
||||||
|
string name_prefix = 2; |
||||||
|
|
||||||
|
// Number of pipelines to return at once. Defaults to 256, and max |
||||||
|
// is 2048. |
||||||
|
int32 page_size = 3; |
||||||
|
|
||||||
|
// Token to use to indicate where to start getting results. |
||||||
|
// If unspecified, returns the first page of results. |
||||||
|
string page_token = 4; |
||||||
|
} |
||||||
|
|
||||||
|
// The response of ListPipelines. Contains at most `pageSize` |
||||||
|
// pipelines. If it contains `pageSize` pipelines, and more pipelines |
||||||
|
// exist, then `nextPageToken` will be populated and should be |
||||||
|
// used as the `pageToken` argument to a subsequent ListPipelines |
||||||
|
// request. |
||||||
|
message ListPipelinesResponse { |
||||||
|
// The matched pipelines. |
||||||
|
repeated Pipeline pipelines = 1; |
||||||
|
|
||||||
|
// The token to use to get the next page of results. |
||||||
|
string next_page_token = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// The request to delete a saved pipeline by ID. |
||||||
|
message DeletePipelineRequest { |
||||||
|
// Caller must have WRITE access to the project in which this pipeline |
||||||
|
// is defined. |
||||||
|
string pipeline_id = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// Request to get controller configuation. Should only be used |
||||||
|
// by VMs created by the Pipelines Service and not by end users. |
||||||
|
message GetControllerConfigRequest { |
||||||
|
// The operation to retrieve controller configuration for. |
||||||
|
string operation_id = 1; |
||||||
|
|
||||||
|
uint64 validation_token = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// Stores the information that the controller will fetch from the |
||||||
|
// server in order to run. Should only be used by VMs created by the |
||||||
|
// Pipelines Service and not by end users. |
||||||
|
message ControllerConfig { |
||||||
|
message RepeatedString { |
||||||
|
repeated string values = 1; |
||||||
|
} |
||||||
|
|
||||||
|
string image = 1; |
||||||
|
|
||||||
|
string cmd = 2; |
||||||
|
|
||||||
|
string gcs_log_path = 3; |
||||||
|
|
||||||
|
string machine_type = 4; |
||||||
|
|
||||||
|
map<string, string> vars = 5; |
||||||
|
|
||||||
|
map<string, string> disks = 6; |
||||||
|
|
||||||
|
map<string, RepeatedString> gcs_sources = 7; |
||||||
|
|
||||||
|
map<string, RepeatedString> gcs_sinks = 8; |
||||||
|
} |
||||||
|
|
||||||
|
// Stores the list of events and times they occured for major events in job |
||||||
|
// execution. |
||||||
|
message TimestampEvent { |
||||||
|
// String indicating the type of event |
||||||
|
string description = 1; |
||||||
|
|
||||||
|
// The time this event occured. |
||||||
|
google.protobuf.Timestamp timestamp = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// Request to set operation status. Should only be used by VMs |
||||||
|
// created by the Pipelines Service and not by end users. |
||||||
|
message SetOperationStatusRequest { |
||||||
|
string operation_id = 1; |
||||||
|
|
||||||
|
repeated TimestampEvent timestamp_events = 2; |
||||||
|
|
||||||
|
google.rpc.Code error_code = 3; |
||||||
|
|
||||||
|
string error_message = 4; |
||||||
|
|
||||||
|
uint64 validation_token = 5; |
||||||
|
} |
||||||
|
|
||||||
|
// A Google Cloud Service Account. |
||||||
|
message ServiceAccount { |
||||||
|
// Email address of the service account. Defaults to `default`, |
||||||
|
// which uses the compute service account associated with the project. |
||||||
|
string email = 1; |
||||||
|
|
||||||
|
// List of scopes to be enabled for this service account on the |
||||||
|
// pipeline virtual machine. |
||||||
|
// The following scopes are automatically included: |
||||||
|
// * https://www.googleapis.com/auth/genomics |
||||||
|
// * https://www.googleapis.com/auth/compute |
||||||
|
// * https://www.googleapis.com/auth/devstorage.full_control |
||||||
|
repeated string scopes = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// The logging options for the pipeline run. |
||||||
|
message LoggingOptions { |
||||||
|
// The location in Google Cloud Storage to which the pipeline logs |
||||||
|
// will be copied. Can be specified as a fully qualified directory |
||||||
|
// path, in which case logs will be output with a unique identifier |
||||||
|
// as the filename in that directory, or as a fully specified path, |
||||||
|
// which must end in `.log`, in which case that path will be |
||||||
|
// used, and the user must ensure that logs are not |
||||||
|
// overwritten. Stdout and stderr logs from the run are also |
||||||
|
// generated and output as `-stdout.log` and `-stderr.log`. |
||||||
|
string gcs_path = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// The system resources for the pipeline run. |
||||||
|
message PipelineResources { |
||||||
|
// A Google Compute Engine disk resource specification. |
||||||
|
message Disk { |
||||||
|
// The types of disks that may be attached to VMs. |
||||||
|
enum Type { |
||||||
|
// Default disk type. Use one of the other options below. |
||||||
|
TYPE_UNSPECIFIED = 0; |
||||||
|
|
||||||
|
// Specifies a Google Compute Engine persistent hard disk. See |
||||||
|
// https://cloud.google.com/compute/docs/disks/persistent-disks#typeofdisks |
||||||
|
// for details. |
||||||
|
PERSISTENT_HDD = 1; |
||||||
|
|
||||||
|
// Specifies a Google Compute Engine persistent solid-state disk. See |
||||||
|
// https://cloud.google.com/compute/docs/disks/persistent-disks#typeofdisks |
||||||
|
// for details. |
||||||
|
PERSISTENT_SSD = 2; |
||||||
|
|
||||||
|
// Specifies a Google Compute Engine local SSD. |
||||||
|
// See https://cloud.google.com/compute/docs/disks/local-ssd for details. |
||||||
|
LOCAL_SSD = 3; |
||||||
|
} |
||||||
|
|
||||||
|
// Required. The name of the disk that can be used in the pipeline |
||||||
|
// parameters. Must be 1 - 63 characters. |
||||||
|
// The name "boot" is reserved for system use. |
||||||
|
string name = 1; |
||||||
|
|
||||||
|
// Required. The type of the disk to create. |
||||||
|
Type type = 2; |
||||||
|
|
||||||
|
// The size of the disk. Defaults to 500 (GB). |
||||||
|
// This field is not applicable for local SSD. |
||||||
|
int32 size_gb = 3; |
||||||
|
|
||||||
|
// The full or partial URL of the persistent disk to attach. See |
||||||
|
// https://cloud.google.com/compute/docs/reference/latest/instances#resource |
||||||
|
// and |
||||||
|
// https://cloud.google.com/compute/docs/disks/persistent-disks#snapshots |
||||||
|
// for more details. |
||||||
|
string source = 4; |
||||||
|
|
||||||
|
// Specifies whether or not to delete the disk when the pipeline |
||||||
|
// completes. This field is applicable only for newly created disks. See |
||||||
|
// https://cloud.google.com/compute/docs/reference/latest/instances#resource |
||||||
|
// for more details. |
||||||
|
// By default, `autoDelete` is `false`. `autoDelete` will be enabled if set |
||||||
|
// to `true` at create time or run time. |
||||||
|
bool auto_delete = 6; |
||||||
|
|
||||||
|
// Specifies how a sourced-base persistent disk will be mounted. See |
||||||
|
// https://cloud.google.com/compute/docs/disks/persistent-disks#use_multi_instances |
||||||
|
// for more details. |
||||||
|
// Can only be set at create time. |
||||||
|
bool read_only = 7; |
||||||
|
|
||||||
|
// Required at create time and cannot be overridden at run time. |
||||||
|
// Specifies the path in the docker container where files on |
||||||
|
// this disk should be located. For example, if `mountPoint` |
||||||
|
// is `/mnt/disk`, and the parameter has `localPath` |
||||||
|
// `inputs/file.txt`, the docker container can access the data at |
||||||
|
// `/mnt/disk/inputs/file.txt`. |
||||||
|
string mount_point = 8; |
||||||
|
} |
||||||
|
|
||||||
|
// The minimum number of cores to use. Defaults to 1. |
||||||
|
int32 minimum_cpu_cores = 1; |
||||||
|
|
||||||
|
// At create time means that preemptible machines may be |
||||||
|
// used for the run. At run time, means they should be used. Cannot |
||||||
|
// be true at run time if false at create time. |
||||||
|
// Defaults to `false`. |
||||||
|
bool preemptible = 2; |
||||||
|
|
||||||
|
// The minimum amount of RAM to use. Defaults to 3.75 (GB) |
||||||
|
double minimum_ram_gb = 3; |
||||||
|
|
||||||
|
// Disks to attach. |
||||||
|
repeated Disk disks = 4; |
||||||
|
|
||||||
|
// List of Google Compute Engine availability zones to which resource |
||||||
|
// creation will restricted. If empty, any zone may be chosen. |
||||||
|
repeated string zones = 5; |
||||||
|
|
||||||
|
// The size of the boot disk. Defaults to 10 (GB). |
||||||
|
int32 boot_disk_size_gb = 6; |
||||||
|
} |
||||||
|
|
||||||
|
// Parameters facilitate setting and delivering data into the |
||||||
|
// pipeline's execution environment. They are defined at create time, |
||||||
|
// with optional defaults, and can be overridden at run time. |
||||||
|
// |
||||||
|
// If `localCopy` is unset, then the parameter specifies a string that |
||||||
|
// is passed as-is into the pipeline, as the value of the environment |
||||||
|
// variable with the given name. A default value can be optionally |
||||||
|
// specified at create time. The default can be overridden at run time |
||||||
|
// using the inputs map. If no default is given, a value must be |
||||||
|
// supplied at runtime. |
||||||
|
// |
||||||
|
// If `localCopy` is defined, then the parameter specifies a data |
||||||
|
// source or sink, both in Google Cloud Storage and on the Docker container |
||||||
|
// where the pipeline computation is run. The [service account associated with |
||||||
|
// the Pipeline][google.genomics.v1alpha2.RunPipelineArgs.service_account] (by |
||||||
|
// default the project's Compute Engine service account) must have access to the |
||||||
|
// Google Cloud Storage paths. |
||||||
|
// |
||||||
|
// At run time, the Google Cloud Storage paths can be overridden if a default |
||||||
|
// was provided at create time, or must be set otherwise. The pipeline runner |
||||||
|
// should add a key/value pair to either the inputs or outputs map. The |
||||||
|
// indicated data copies will be carried out before/after pipeline execution, |
||||||
|
// just as if the corresponding arguments were provided to `gsutil cp`. |
||||||
|
// |
||||||
|
// For example: Given the following `PipelineParameter`, specified |
||||||
|
// in the `inputParameters` list: |
||||||
|
// |
||||||
|
// ``` |
||||||
|
// {name: "input_file", localCopy: {path: "file.txt", disk: "pd1"}} |
||||||
|
// ``` |
||||||
|
// |
||||||
|
// where `disk` is defined in the `PipelineResources` object as: |
||||||
|
// |
||||||
|
// ``` |
||||||
|
// {name: "pd1", mountPoint: "/mnt/disk/"} |
||||||
|
// ``` |
||||||
|
// |
||||||
|
// We create a disk named `pd1`, mount it on the host VM, and map |
||||||
|
// `/mnt/pd1` to `/mnt/disk` in the docker container. At |
||||||
|
// runtime, an entry for `input_file` would be required in the inputs |
||||||
|
// map, such as: |
||||||
|
// |
||||||
|
// ``` |
||||||
|
// inputs["input_file"] = "gs://my-bucket/bar.txt" |
||||||
|
// ``` |
||||||
|
// |
||||||
|
// This would generate the following gsutil call: |
||||||
|
// |
||||||
|
// ``` |
||||||
|
// gsutil cp gs://my-bucket/bar.txt /mnt/pd1/file.txt |
||||||
|
// ``` |
||||||
|
// |
||||||
|
// The file `/mnt/pd1/file.txt` maps to `/mnt/disk/file.txt` in the |
||||||
|
// Docker container. Acceptable paths are: |
||||||
|
// |
||||||
|
// <table> |
||||||
|
// <thead> |
||||||
|
// <tr><th>Google Cloud storage path</th><th>Local path</th></tr> |
||||||
|
// </thead> |
||||||
|
// <tbody> |
||||||
|
// <tr><td>file</td><td>file</td></tr> |
||||||
|
// <tr><td>glob</td><td>directory</td></tr> |
||||||
|
// </tbody> |
||||||
|
// </table> |
||||||
|
// |
||||||
|
// For outputs, the direction of the copy is reversed: |
||||||
|
// |
||||||
|
// ``` |
||||||
|
// gsutil cp /mnt/disk/file.txt gs://my-bucket/bar.txt |
||||||
|
// ``` |
||||||
|
// |
||||||
|
// Acceptable paths are: |
||||||
|
// |
||||||
|
// <table> |
||||||
|
// <thead> |
||||||
|
// <tr><th>Local path</th><th>Google Cloud Storage path</th></tr> |
||||||
|
// </thead> |
||||||
|
// <tbody> |
||||||
|
// <tr><td>file</td><td>file</td></tr> |
||||||
|
// <tr> |
||||||
|
// <td>file</td> |
||||||
|
// <td>directory - directory must already exist</td> |
||||||
|
// </tr> |
||||||
|
// <tr> |
||||||
|
// <td>glob</td> |
||||||
|
// <td>directory - directory will be created if it doesn't exist</td></tr> |
||||||
|
// </tbody> |
||||||
|
// </table> |
||||||
|
// |
||||||
|
// One restriction due to docker limitations, is that for outputs that are found |
||||||
|
// on the boot disk, the local path cannot be a glob and must be a file. |
||||||
|
message PipelineParameter { |
||||||
|
// LocalCopy defines how a remote file should be copied to and from the VM. |
||||||
|
message LocalCopy { |
||||||
|
// Required. The path within the user's docker container where |
||||||
|
// this input should be localized to and from, relative to the specified |
||||||
|
// disk's mount point. For example: file.txt, |
||||||
|
string path = 1; |
||||||
|
|
||||||
|
// Required. The name of the disk where this parameter is |
||||||
|
// located. Can be the name of one of the disks specified in the |
||||||
|
// Resources field, or "boot", which represents the Docker |
||||||
|
// instance's boot disk and has a mount point of `/`. |
||||||
|
string disk = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// Required. Name of the parameter - the pipeline runner uses this string |
||||||
|
// as the key to the input and output maps in RunPipeline. |
||||||
|
string name = 1; |
||||||
|
|
||||||
|
// Human-readable description. |
||||||
|
string description = 2; |
||||||
|
|
||||||
|
// The default value for this parameter. Can be overridden at runtime. |
||||||
|
// If `localCopy` is present, then this must be a Google Cloud Storage path |
||||||
|
// beginning with `gs://`. |
||||||
|
string default_value = 5; |
||||||
|
|
||||||
|
// If present, this parameter is marked for copying to and from the VM. |
||||||
|
// `LocalCopy` indicates where on the VM the file should be. The value |
||||||
|
// given to this parameter (either at runtime or using `defaultValue`) |
||||||
|
// must be the remote path where the file should be. |
||||||
|
LocalCopy local_copy = 6; |
||||||
|
} |
||||||
|
|
||||||
|
// The Docker execuctor specification. |
||||||
|
message DockerExecutor { |
||||||
|
// Required. Image name from either Docker Hub or Google Container Repository. |
||||||
|
// Users that run pipelines must have READ access to the image. |
||||||
|
string image_name = 1; |
||||||
|
|
||||||
|
// Required. The command string to run. Parameters that do not have |
||||||
|
// `localCopy` specified should be used as environment variables, while |
||||||
|
// those that do can be accessed at the defined paths. |
||||||
|
string cmd = 2; |
||||||
|
} |
Loading…
Reference in new issue