import protos for google genomics API.

9 years ago · 7e973fab0e
parent b042947963
commit 7e973fab0e
14 changed files with 3741 additions and 0 deletions
--- a/google/genomics/README.md
+++ b/google/genomics/README.md
@ -0,0 +1,14 @@
 Stores, processes, explores and shares genomic data. This API implements
 the Global Alliance for Genomics and Health (GA4GH) v0.5.1 API as well as
 several extensions.
 The Google Genomics API supports access via both
 [JSON/REST](https://cloud.google.com/genomics/reference/rest) and
 [gRPC](https://cloud.google.com/genomics/reference/rpc). JSON/REST is more
 broadly available and is easier for getting started with Google Genomics; it
 works well for small metadata resources (datasets, variant sets, read group
 sets) and for browsing small genomic regions for datasets of any size. For
 performant bulk data access (reads and variants), use gRPC.
 See also an [overview of genomic resources](https://cloud.google.com/genomics/v1/users-guide)
 and an overview of [Genomics on Google Cloud](https://cloud.google.com/genomics/overview).
--- a/google/genomics/v1/annotations.proto
+++ b/google/genomics/v1/annotations.proto
@ -0,0 +1,662 @@
 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 syntax = "proto3";
 package google.genomics.v1;
 import "google/api/annotations.proto";
 import "google/protobuf/empty.proto";
 import "google/protobuf/field_mask.proto";
 import "google/protobuf/struct.proto";
 import "google/protobuf/wrappers.proto";
 import "google/rpc/status.proto";
 option cc_enable_arenas = true;
 option java_multiple_files = true;
 option java_outer_classname = "AnnotationsProto";
 option java_package = "com.google.genomics.v1";
 // This service provides storage and positional retrieval of genomic
 // reference annotations, including variant annotations.
 service AnnotationServiceV1 {
  // Creates a new annotation set. Caller must have WRITE permission for the
  // associated dataset.
  //
  // The following fields are required:
  //
  //   * [datasetId][google.genomics.v1.AnnotationSet.dataset_id]
  //   * [referenceSetId][google.genomics.v1.AnnotationSet.reference_set_id]
  //
  // All other fields may be optionally specified, unless documented as being
  // server-generated (for example, the `id` field).
  rpc CreateAnnotationSet(CreateAnnotationSetRequest) returns (AnnotationSet) {
    option (google.api.http) = { post: "/v1/annotationsets" body: "annotation_set" };
  }
  // Gets an annotation set. Caller must have READ permission for
  // the associated dataset.
  rpc GetAnnotationSet(GetAnnotationSetRequest) returns (AnnotationSet) {
    option (google.api.http) = { get: "/v1/annotationsets/{annotation_set_id}" };
  }
  // Updates an annotation set. The update must respect all mutability
  // restrictions and other invariants described on the annotation set resource.
  // Caller must have WRITE permission for the associated dataset.
  rpc UpdateAnnotationSet(UpdateAnnotationSetRequest) returns (AnnotationSet) {
    option (google.api.http) = { put: "/v1/annotationsets/{annotation_set_id}" body: "annotation_set" };
  }
  // Deletes an annotation set. Caller must have WRITE permission
  // for the associated annotation set.
  rpc DeleteAnnotationSet(DeleteAnnotationSetRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = { delete: "/v1/annotationsets/{annotation_set_id}" };
  }
  // Searches for annotation sets that match the given criteria. Annotation sets
  // are returned in an unspecified order. This order is consistent, such that
  // two queries for the same content (regardless of page size) yield annotation
  // sets in the same order across their respective streams of paginated
  // responses. Caller must have READ permission for the queried datasets.
  rpc SearchAnnotationSets(SearchAnnotationSetsRequest) returns (SearchAnnotationSetsResponse) {
    option (google.api.http) = { post: "/v1/annotationsets/search" body: "*" };
  }
  // Creates a new annotation. Caller must have WRITE permission
  // for the associated annotation set.
  //
  // The following fields are required:
  //
  // * [annotationSetId][google.genomics.v1.Annotation.annotation_set_id]
  // * [referenceName][google.genomics.v1.Annotation.reference_name] or
  //   [referenceId][google.genomics.v1.Annotation.reference_id]
  //
  // ### Transcripts
  //
  // For annotations of type TRANSCRIPT, the following fields of
  // [transcript][google.genomics.v1.Annotation.transcript] must be provided:
  //
  // * [exons.start][google.genomics.v1.Transcript.Exon.start]
  // * [exons.end][google.genomics.v1.Transcript.Exon.end]
  //
  // All other fields may be optionally specified, unless documented as being
  // server-generated (for example, the `id` field). The annotated
  // range must be no longer than 100Mbp (mega base pairs). See the
  // [Annotation resource][google.genomics.v1.Annotation]
  // for additional restrictions on each field.
  rpc CreateAnnotation(CreateAnnotationRequest) returns (Annotation) {
    option (google.api.http) = { post: "/v1/annotations" body: "annotation" };
  }
  // Creates one or more new annotations atomically. All annotations must
  // belong to the same annotation set. Caller must have WRITE
  // permission for this annotation set. For optimal performance, batch
  // positionally adjacent annotations together.
  //
  // If the request has a systemic issue, such as an attempt to write to
  // an inaccessible annotation set, the entire RPC will fail accordingly. For
  // lesser data issues, when possible an error will be isolated to the
  // corresponding batch entry in the response; the remaining well formed
  // annotations will be created normally.
  //
  // For details on the requirements for each individual annotation resource,
  // see
  // [CreateAnnotation][google.genomics.v1.AnnotationServiceV1.CreateAnnotation].
  rpc BatchCreateAnnotations(BatchCreateAnnotationsRequest) returns (BatchCreateAnnotationsResponse) {
    option (google.api.http) = { post: "/v1/annotations:batchCreate" body: "*" };
  }
  // Gets an annotation. Caller must have READ permission
  // for the associated annotation set.
  rpc GetAnnotation(GetAnnotationRequest) returns (Annotation) {
    option (google.api.http) = { get: "/v1/annotations/{annotation_id}" };
  }
  // Updates an annotation. Caller must have
  // WRITE permission for the associated dataset.
  rpc UpdateAnnotation(UpdateAnnotationRequest) returns (Annotation) {
    option (google.api.http) = { put: "/v1/annotations/{annotation_id}" body: "annotation" };
  }
  // Deletes an annotation. Caller must have WRITE permission for
  // the associated annotation set.
  rpc DeleteAnnotation(DeleteAnnotationRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = { delete: "/v1/annotations/{annotation_id}" };
  }
  // Searches for annotations that match the given criteria. Results are
  // ordered by genomic coordinate (by reference sequence, then position).
  // Annotations with equivalent genomic coordinates are returned in an
  // unspecified order. This order is consistent, such that two queries for the
  // same content (regardless of page size) yield annotations in the same order
  // across their respective streams of paginated responses. Caller must have
  // READ permission for the queried annotation sets.
  rpc SearchAnnotations(SearchAnnotationsRequest) returns (SearchAnnotationsResponse) {
    option (google.api.http) = { post: "/v1/annotations/search" body: "*" };
  }
 }
 // An annotation set is a logical grouping of annotations that share consistent
 // type information and provenance. Examples of annotation sets include 'all
 // genes from refseq', and 'all variant annotations from ClinVar'.
 message AnnotationSet {
  // The server-generated annotation set ID, unique across all annotation sets.
  string id = 1;
  // The dataset to which this annotation set belongs.
  string dataset_id = 2;
  // The ID of the reference set that defines the coordinate space for this
  // set's annotations.
  string reference_set_id = 3;
  // The display name for this annotation set.
  string name = 4;
  // The source URI describing the file from which this annotation set was
  // generated, if any.
  string source_uri = 5;
  // The type of annotations contained within this set.
  AnnotationType type = 6;
  // A map of additional read alignment information. This must be of the form
  // map<string, string[]> (string key mapping to a list of string values).
  map<string, google.protobuf.ListValue> info = 17;
 }
 // An annotation describes a region of reference genome. The value of an
 // annotation may be one of several canonical types, supplemented by arbitrary
 // info tags. An annotation is not inherently associated with a specific
 // sample or individual (though a client could choose to use annotations in
 // this way). Example canonical annotation types are `GENE` and
 // `VARIANT`.
 message Annotation {
  // The server-generated annotation ID, unique across all annotations.
  string id = 1;
  // The annotation set to which this annotation belongs.
  string annotation_set_id = 2;
  // The display name of this annotation.
  string name = 3;
  // The ID of the Google Genomics reference associated with this range.
  string reference_id = 4;
  // The display name corresponding to the reference specified by
  // `referenceId`, for example `chr1`, `1`, or `chrX`.
  string reference_name = 5;
  // The start position of the range on the reference, 0-based inclusive.
  int64 start = 6;
  // The end position of the range on the reference, 0-based exclusive.
  int64 end = 7;
  // Whether this range refers to the reverse strand, as opposed to the forward
  // strand. Note that regardless of this field, the start/end position of the
  // range always refer to the forward strand.
  bool reverse_strand = 8;
  // The data type for this annotation. Must match the containing annotation
  // set's type.
  AnnotationType type = 9;
  oneof value {
    // A variant annotation, which describes the effect of a variant on the
    // genome, the coding sequence, and/or higher level consequences at the
    // organism level e.g. pathogenicity. This field is only set for annotations
    // of type `VARIANT`.
    VariantAnnotation variant = 10;
    // A transcript value represents the assertion that a particular region of
    // the reference genome may be transcribed as RNA. An alternative splicing
    // pattern would be represented as a separate transcript object. This field
    // is only set for annotations of type `TRANSCRIPT`.
    Transcript transcript = 11;
  }
  // A map of additional read alignment information. This must be of the form
  // map<string, string[]> (string key mapping to a list of string values).
  map<string, google.protobuf.ListValue> info = 12;
 }
 message VariantAnnotation {
  message ClinicalCondition {
    // A set of names for the condition.
    repeated string names = 1;
    // The set of external IDs for this condition.
    repeated ExternalId external_ids = 2;
    // The MedGen concept id associated with this gene.
    // Search for these IDs at http://www.ncbi.nlm.nih.gov/medgen/
    string concept_id = 3;
    // The OMIM id for this condition.
    // Search for these IDs at http://omim.org/
    string omim_id = 4;
  }
  enum Type {
    TYPE_UNSPECIFIED = 0;
    // `TYPE_OTHER` should be used when no other Type will suffice.
    // Further explanation of the variant type may be included in the
    // [info][google.genomics.v1.Annotation.info] field.
    TYPE_OTHER = 1;
    // `INSERTION` indicates an insertion.
    INSERTION = 2;
    // `DELETION` indicates a deletion.
    DELETION = 3;
    // `SUBSTITUTION` indicates a block substitution of
    // two or more nucleotides.
    SUBSTITUTION = 4;
    // `SNP` indicates a single nucleotide polymorphism.
    SNP = 5;
    // `STRUCTURAL` indicates a large structural variant,
    // including chromosomal fusions, inversions, etc.
    STRUCTURAL = 6;
    // `CNV` indicates a variation in copy number.
    CNV = 7;
  }
  enum Effect {
    EFFECT_UNSPECIFIED = 0;
    // `EFFECT_OTHER` should be used when no other Effect
    // will suffice.
    EFFECT_OTHER = 1;
    // `FRAMESHIFT` indicates a mutation in which the insertion or
    // deletion of nucleotides resulted in a frameshift change.
    FRAMESHIFT = 2;
    // `FRAME_PRESERVING_INDEL` indicates a mutation in which a
    // multiple of three nucleotides has been inserted or deleted, resulting
    // in no change to the reading frame of the coding sequence.
    FRAME_PRESERVING_INDEL = 3;
    // `SYNONYMOUS_SNP` indicates a single nucleotide polymorphism
    // mutation that results in no amino acid change.
    SYNONYMOUS_SNP = 4;
    // `NONSYNONYMOUS_SNP` indicates a single nucleotide
    // polymorphism mutation that results in an amino acid change.
    NONSYNONYMOUS_SNP = 5;
    // `STOP_GAIN` indicates a mutation that leads to the creation
    // of a stop codon at the variant site. Frameshift mutations creating
    // downstream stop codons do not count as `STOP_GAIN`.
    STOP_GAIN = 6;
    // `STOP_LOSS` indicates a mutation that eliminates a
    // stop codon at the variant site.
    STOP_LOSS = 7;
    // `SPLICE_SITE_DISRUPTION` indicates that this variant is
    // found in a splice site for the associated transcript, and alters the
    // normal splicing pattern.
    SPLICE_SITE_DISRUPTION = 8;
  }
  enum ClinicalSignificance {
    CLINICAL_SIGNIFICANCE_UNSPECIFIED = 0;
    // `OTHER` should be used when no other clinical significance
    // value will suffice.
    CLINICAL_SIGNIFICANCE_OTHER = 1;
    UNCERTAIN = 2;
    BENIGN = 3;
    LIKELY_BENIGN = 4;
    LIKELY_PATHOGENIC = 5;
    PATHOGENIC = 6;
    DRUG_RESPONSE = 7;
    HISTOCOMPATIBILITY = 8;
    CONFERS_SENSITIVITY = 9;
    RISK_FACTOR = 10;
    ASSOCIATION = 11;
    PROTECTIVE = 12;
    // `MULTIPLE_REPORTED` should be used when multiple clinical
    // signficances are reported for a variant. The original clinical
    // significance values may be provided in the `info` field.
    MULTIPLE_REPORTED = 13;
  }
  // Type has been adapted from ClinVar's list of variant types.
  Type type = 1;
  // Effect of the variant on the coding sequence.
  Effect effect = 2;
  // The alternate allele for this variant. If multiple alternate alleles
  // exist at this location, create a separate variant for each one, as they
  // may represent distinct conditions.
  string alternate_bases = 3;
  // Google annotation ID of the gene affected by this variant. This should
  // be provided when the variant is created.
  string gene_id = 4;
  // Google annotation IDs of the transcripts affected by this variant. These
  // should be provided when the variant is created.
  repeated string transcript_ids = 5;
  // The set of conditions associated with this variant.
  // A condition describes the way a variant influences human health.
  repeated ClinicalCondition conditions = 6;
  // Describes the clinical significance of a variant.
  // It is adapted from the ClinVar controlled vocabulary for clinical
  // significance described at:
  // http://www.ncbi.nlm.nih.gov/clinvar/docs/clinsig/
  ClinicalSignificance clinical_significance = 7;
 }
 // A transcript represents the assertion that a particular region of the
 // reference genome may be transcribed as RNA.
 message Transcript {
  message Exon {
    // The start position of the exon on this annotation's reference sequence,
    // 0-based inclusive. Note that this is relative to the reference start, and
    // **not** the containing annotation start.
    int64 start = 1;
    // The end position of the exon on this annotation's reference sequence,
    // 0-based exclusive. Note that this is relative to the reference start, and
    // *not* the containing annotation start.
    int64 end = 2;
    // The frame of this exon. Contains a value of 0, 1, or 2, which indicates
    // the offset of the first coding base of the exon within the reading frame
    // of the coding DNA sequence, if any. This field is dependent on the
    // strandedness of this annotation (see
    // [Annotation.reverse_strand][google.genomics.v1.Annotation.reverse_strand]).
    // For forward stranded annotations, this offset is relative to the
    // [exon.start][google.genomics.v1.Transcript.Exon.start]. For reverse
    // strand annotations, this offset is relative to the
    // [exon.end][google.genomics.v1.Transcript.Exon.end] `- 1`.
    //
    // Unset if this exon does not intersect the coding sequence. Upon creation
    // of a transcript, the frame must be populated for all or none of the
    // coding exons.
    google.protobuf.Int32Value frame = 3;
  }
  message CodingSequence {
    // The start of the coding sequence on this annotation's reference sequence,
    // 0-based inclusive. Note that this position is relative to the reference
    // start, and *not* the containing annotation start.
    int64 start = 1;
    // The end of the coding sequence on this annotation's reference sequence,
    // 0-based exclusive. Note that this position is relative to the reference
    // start, and *not* the containing annotation start.
    int64 end = 2;
  }
  // The annotation ID of the gene from which this transcript is transcribed.
  string gene_id = 1;
  // The <a href="http://en.wikipedia.org/wiki/Exon">exons</a> that compose
  // this transcript. This field should be unset for genomes where transcript
  // splicing does not occur, for example prokaryotes.
  //
  // Introns are regions of the transcript that are not included in the
  // spliced RNA product. Though not explicitly modeled here, intron ranges can
  // be deduced; all regions of this transcript that are not exons are introns.
  //
  // Exonic sequences do not necessarily code for a translational product
  // (amino acids). Only the regions of exons bounded by the
  // [codingSequence][google.genomics.v1.Transcript.coding_sequence] correspond
  // to coding DNA sequence.
  //
  // Exons are ordered by start position and may not overlap.
  repeated Exon exons = 2;
  // The range of the coding sequence for this transcript, if any. To determine
  // the exact ranges of coding sequence, intersect this range with those of the
  // [exons][google.genomics.v1.Transcript.exons], if any. If there are any
  // [exons][google.genomics.v1.Transcript.exons], the
  // [codingSequence][google.genomics.v1.Transcript.coding_sequence] must start
  // and end within them.
  //
  // Note that in some cases, the reference genome will not exactly match the
  // observed mRNA transcript e.g. due to variance in the source genome from
  // reference. In these cases,
  // [exon.frame][google.genomics.v1.Transcript.Exon.frame] will not necessarily
  // match the expected reference reading frame and coding exon reference bases
  // cannot necessarily be concatenated to produce the original transcript mRNA.
  CodingSequence coding_sequence = 3;
 }
 message ExternalId {
  // The name of the source of this data.
  string source_name = 1;
  // The id used by the source of this data.
  string id = 2;
 }
 message CreateAnnotationSetRequest {
  // The annotation set to create.
  AnnotationSet annotation_set = 1;
 }
 message GetAnnotationSetRequest {
  // The ID of the annotation set to be retrieved.
  string annotation_set_id = 1;
 }
 message UpdateAnnotationSetRequest {
  // The ID of the annotation set to be updated.
  string annotation_set_id = 1;
  // The new annotation set.
  AnnotationSet annotation_set = 2;
  // An optional mask specifying which fields to update. Mutable fields are
  // [name][google.genomics.v1.AnnotationSet.name],
  // [source_uri][google.genomics.v1.AnnotationSet.source_uri], and
  // [info][google.genomics.v1.AnnotationSet.info]. If unspecified, all
  // mutable fields will be updated.
  google.protobuf.FieldMask update_mask = 3;
 }
 message DeleteAnnotationSetRequest {
  // The ID of the annotation set to be deleted.
  string annotation_set_id = 1;
 }
 message SearchAnnotationSetsRequest {
  // Required. The dataset IDs to search within. Caller must have `READ` access
  // to these datasets.
  repeated string dataset_ids = 1;
  // If specified, only annotation sets associated with the given reference set
  // are returned.
  string reference_set_id = 2;
  // Only return annotations sets for which a substring of the name matches this
  // string (case insensitive).
  string name = 3;
  // If specified, only annotation sets that have any of these types are
  // returned.
  repeated AnnotationType types = 4;
  // The continuation token, which is used to page through large result sets.
  // To get the next page of results, set this parameter to the value of
  // `nextPageToken` from the previous response.
  string page_token = 5;
  // The maximum number of results to return in a single page. If unspecified,
  // defaults to 128. The maximum value is 1024.
  int32 page_size = 6;
 }
 message SearchAnnotationSetsResponse {
  // The matching annotation sets.
  repeated AnnotationSet annotation_sets = 1;
  // The continuation token, which is used to page through large result sets.
  // Provide this value in a subsequent request to return the next page of
  // results. This field will be empty if there aren't any additional results.
  string next_page_token = 2;
 }
 message CreateAnnotationRequest {
  // The annotation to be created.
  Annotation annotation = 1;
 }
 message BatchCreateAnnotationsRequest {
  // The annotations to be created. At most 4096 can be specified in a single
  // request.
  repeated Annotation annotations = 1;
 }
 message BatchCreateAnnotationsResponse {
  message Entry {
    // The creation status.
    google.rpc.Status status = 1;
    // The created annotation, if creation was successful.
    Annotation annotation = 2;
  }
  // The resulting per-annotation entries, ordered consistently with the
  // original request.
  repeated Entry entries = 1;
 }
 message GetAnnotationRequest {
  // The ID of the annotation to be retrieved.
  string annotation_id = 1;
 }
 message UpdateAnnotationRequest {
  // The ID of the annotation to be updated.
  string annotation_id = 1;
  // The new annotation.
  Annotation annotation = 2;
  // An optional mask specifying which fields to update. Mutable fields are
  // [name][google.genomics.v1.Annotation.name],
  // [variant][google.genomics.v1.Annotation.variant],
  // [transcript][google.genomics.v1.Annotation.transcript], and
  // [info][google.genomics.v1.Annotation.info]. If unspecified, all mutable
  // fields will be updated.
  google.protobuf.FieldMask update_mask = 3;
 }
 message DeleteAnnotationRequest {
  // The ID of the annotation to be deleted.
  string annotation_id = 1;
 }
 message SearchAnnotationsRequest {
  // Required. The annotation sets to search within. The caller must have
  // `READ` access to these annotation sets.
  // All queried annotation sets must have the same type.
  repeated string annotation_set_ids = 1;
  // Required. `reference_id` or `reference_name` must be set.
  oneof reference {
    // The ID of the reference to query.
    string reference_id = 2;
    // The name of the reference to query, within the reference set associated
    // with this query.
    string reference_name = 3;
  }
  // The start position of the range on the reference, 0-based inclusive. If
  // specified,
  // [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or
  // [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name]
  // must be specified. Defaults to 0.
  int64 start = 4;
  // The end position of the range on the reference, 0-based exclusive. If
  // [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or
  // [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name]
  // must be specified, Defaults to the length of the reference.
  int64 end = 5;
  // The continuation token, which is used to page through large result sets.
  // To get the next page of results, set this parameter to the value of
  // `nextPageToken` from the previous response.
  string page_token = 6;
  // The maximum number of results to return in a single page. If unspecified,
  // defaults to 256. The maximum value is 2048.
  int32 page_size = 7;
 }
 message SearchAnnotationsResponse {
  // The matching annotations.
  repeated Annotation annotations = 1;
  // The continuation token, which is used to page through large result sets.
  // Provide this value in a subsequent request to return the next page of
  // results. This field will be empty if there aren't any additional results.
  string next_page_token = 2;
 }
 // When an [Annotation][google.genomics.v1.Annotation] or
 // [AnnotationSet][google.genomics.v1.AnnotationSet] is created, if `type` is
 // not specified it will be set to `GENERIC`.
 enum AnnotationType {
  ANNOTATION_TYPE_UNSPECIFIED = 0;
  // A `GENERIC` annotation type should be used when no other annotation
  // type will suffice. This represents an untyped annotation of the reference
  // genome.
  GENERIC = 1;
  // A `VARIANT` annotation type.
  VARIANT = 2;
  // A `GENE` annotation type represents the existence of a gene at the
  // associated reference coordinates. The start coordinate is typically the
  // gene's transcription start site and the end is typically the end of the
  // gene's last exon.
  GENE = 3;
  // A `TRANSCRIPT` annotation type represents the assertion that a
  // particular region of the reference genome may be transcribed as RNA.
  TRANSCRIPT = 4;
 }
--- a/google/genomics/v1/cigar.proto
+++ b/google/genomics/v1/cigar.proto
@ -0,0 +1,98 @@
 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 syntax = "proto3";
 package google.genomics.v1;
 import "google/api/annotations.proto";
 option cc_enable_arenas = true;
 option java_multiple_files = true;
 option java_outer_classname = "CigarProto";
 option java_package = "com.google.genomics.v1";
 // A single CIGAR operation.
 message CigarUnit {
  // Describes the different types of CIGAR alignment operations that exist.
  // Used wherever CIGAR alignments are used.
  enum Operation {
    OPERATION_UNSPECIFIED = 0;
    // An alignment match indicates that a sequence can be aligned to the
    // reference without evidence of an INDEL. Unlike the
    // `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators,
    // the `ALIGNMENT_MATCH` operator does not indicate whether the
    // reference and read sequences are an exact match. This operator is
    // equivalent to SAM's `M`.
    ALIGNMENT_MATCH = 1;
    // The insert operator indicates that the read contains evidence of bases
    // being inserted into the reference. This operator is equivalent to SAM's
    // `I`.
    INSERT = 2;
    // The delete operator indicates that the read contains evidence of bases
    // being deleted from the reference. This operator is equivalent to SAM's
    // `D`.
    DELETE = 3;
    // The skip operator indicates that this read skips a long segment of the
    // reference, but the bases have not been deleted. This operator is commonly
    // used when working with RNA-seq data, where reads may skip long segments
    // of the reference between exons. This operator is equivalent to SAM's
    // `N`.
    SKIP = 4;
    // The soft clip operator indicates that bases at the start/end of a read
    // have not been considered during alignment. This may occur if the majority
    // of a read maps, except for low quality bases at the start/end of a read.
    // This operator is equivalent to SAM's `S`. Bases that are soft
    // clipped will still be stored in the read.
    CLIP_SOFT = 5;
    // The hard clip operator indicates that bases at the start/end of a read
    // have been omitted from this alignment. This may occur if this linear
    // alignment is part of a chimeric alignment, or if the read has been
    // trimmed (for example, during error correction or to trim poly-A tails for
    // RNA-seq). This operator is equivalent to SAM's `H`.
    CLIP_HARD = 6;
    // The pad operator indicates that there is padding in an alignment. This
    // operator is equivalent to SAM's `P`.
    PAD = 7;
    // This operator indicates that this portion of the aligned sequence exactly
    // matches the reference. This operator is equivalent to SAM's `=`.
    SEQUENCE_MATCH = 8;
    // This operator indicates that this portion of the aligned sequence is an
    // alignment match to the reference, but a sequence mismatch. This can
    // indicate a SNP or a read error. This operator is equivalent to SAM's
    // `X`.
    SEQUENCE_MISMATCH = 9;
  }
  Operation operation = 1;
  // The number of genomic bases that the operation runs for. Required.
  int64 operation_length = 2;
  // `referenceSequence` is only used at mismatches
  // (`SEQUENCE_MISMATCH`) and deletions (`DELETE`).
  // Filling this field replaces SAM's MD tag. If the relevant information is
  // not available, this field is unset.
  string reference_sequence = 3;
 }
--- a/google/genomics/v1/datasets.proto
+++ b/google/genomics/v1/datasets.proto
@ -0,0 +1,211 @@
 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 syntax = "proto3";
 package google.genomics.v1;
 import "google/api/annotations.proto";
 import "google/iam/v1/iam_policy.proto";
 import "google/iam/v1/policy.proto";
 import "google/protobuf/empty.proto";
 import "google/protobuf/field_mask.proto";
 import "google/protobuf/timestamp.proto";
 option cc_enable_arenas = true;
 option java_multiple_files = true;
 option java_outer_classname = "DatasetsProto";
 option java_package = "com.google.genomics.v1";
 // This service manages datasets, which are collections of genomic data.
 service DatasetServiceV1 {
  // Lists datasets within a project.
  //
  // For the definitions of datasets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc ListDatasets(ListDatasetsRequest) returns (ListDatasetsResponse) {
    option (google.api.http) = { get: "/v1/datasets" };
  }
  // Creates a new dataset.
  //
  // For the definitions of datasets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc CreateDataset(CreateDatasetRequest) returns (Dataset) {
    option (google.api.http) = { post: "/v1/datasets" body: "dataset" };
  }
  // Gets a dataset by ID.
  //
  // For the definitions of datasets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc GetDataset(GetDatasetRequest) returns (Dataset) {
    option (google.api.http) = { get: "/v1/datasets/{dataset_id}" };
  }
  // Updates a dataset.
  //
  // For the definitions of datasets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // This method supports patch semantics.
  rpc UpdateDataset(UpdateDatasetRequest) returns (Dataset) {
    option (google.api.http) = { patch: "/v1/datasets/{dataset_id}" body: "dataset" };
  }
  // Deletes a dataset and all of its contents (all read group sets,
  // reference sets, variant sets, call sets, annotation sets, etc.)
  // This is reversible (up to one week after the deletion) via
  // the
  // [datasets.undelete][google.genomics.v1.DatasetServiceV1.UndeleteDataset]
  // operation.
  //
  // For the definitions of datasets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc DeleteDataset(DeleteDatasetRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = { delete: "/v1/datasets/{dataset_id}" };
  }
  // Undeletes a dataset by restoring a dataset which was deleted via this API.
  //
  // For the definitions of datasets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // This operation is only possible for a week after the deletion occurred.
  rpc UndeleteDataset(UndeleteDatasetRequest) returns (Dataset) {
    option (google.api.http) = { post: "/v1/datasets/{dataset_id}:undelete" body: "*" };
  }
  // Sets the access control policy on the specified dataset. Replaces any
  // existing policy.
  //
  // For the definitions of datasets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // See <a href="/iam/docs/managing-policies#setting_a_policy">Setting a
  // Policy</a> for more information.
  rpc SetIamPolicy(google.iam.v1.SetIamPolicyRequest) returns (google.iam.v1.Policy) {
    option (google.api.http) = { post: "/v1/{resource=datasets/*}:setIamPolicy" body: "*" };
  }
  // Gets the access control policy for the dataset. This is empty if the
  // policy or resource does not exist.
  //
  // See <a href="/iam/docs/managing-policies#getting_a_policy">Getting a
  // Policy</a> for more information.
  //
  // For the definitions of datasets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc GetIamPolicy(google.iam.v1.GetIamPolicyRequest) returns (google.iam.v1.Policy) {
    option (google.api.http) = { post: "/v1/{resource=datasets/*}:getIamPolicy" body: "*" };
  }
  // Returns permissions that a caller has on the specified resource.
  // See <a href="/iam/docs/managing-policies#testing_permissions">Testing
  // Permissions</a> for more information.
  //
  // For the definitions of datasets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc TestIamPermissions(google.iam.v1.TestIamPermissionsRequest) returns (google.iam.v1.TestIamPermissionsResponse) {
    option (google.api.http) = { post: "/v1/{resource=datasets/*}:testIamPermissions" body: "*" };
  }
 }
 // A Dataset is a collection of genomic data.
 //
 // For more genomics resource definitions, see [Fundamentals of Google
 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
 message Dataset {
  // The server-generated dataset ID, unique across all datasets.
  string id = 1;
  // The Google Developers Console project ID that this dataset belongs to.
  string project_id = 2;
  // The dataset name.
  string name = 3;
  // The time this dataset was created, in seconds from the epoch.
  google.protobuf.Timestamp create_time = 4;
 }
 // The dataset list request.
 message ListDatasetsRequest {
  // Required. The project to list datasets for.
  string project_id = 1;
  // The maximum number of results to return in a single page. If unspecified,
  // defaults to 50. The maximum value is 1024.
  int32 page_size = 2;
  // The continuation token, which is used to page through large result sets.
  // To get the next page of results, set this parameter to the value of
  // `nextPageToken` from the previous response.
  string page_token = 3;
 }
 // The dataset list response.
 message ListDatasetsResponse {
  // The list of matching Datasets.
  repeated Dataset datasets = 1;
  // The continuation token, which is used to page through large result sets.
  // Provide this value in a subsequent request to return the next page of
  // results. This field will be empty if there aren't any additional results.
  string next_page_token = 2;
 }
 message CreateDatasetRequest {
  // The dataset to be created. Must contain projectId and name.
  Dataset dataset = 1;
 }
 message UpdateDatasetRequest {
  // The ID of the dataset to be updated.
  string dataset_id = 1;
  // The new dataset data.
  Dataset dataset = 2;
  // An optional mask specifying which fields to update. At this time, the only
  // mutable field is [name][google.genomics.v1.Dataset.name]. The only
  // acceptable value is "name". If unspecified, all mutable fields will be
  // updated.
  google.protobuf.FieldMask update_mask = 3;
 }
 message DeleteDatasetRequest {
  // The ID of the dataset to be deleted.
  string dataset_id = 1;
 }
 message UndeleteDatasetRequest {
  // The ID of the dataset to be undeleted.
  string dataset_id = 1;
 }
 message GetDatasetRequest {
  // The ID of the dataset.
  string dataset_id = 1;
 }
--- a/google/genomics/v1/operations.proto
+++ b/google/genomics/v1/operations.proto
@ -0,0 +1,58 @@
 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 syntax = "proto3";
 package google.genomics.v1;
 import "google/api/annotations.proto";
 import "google/protobuf/any.proto";
 import "google/protobuf/timestamp.proto";
 option cc_enable_arenas = true;
 option java_multiple_files = true;
 option java_outer_classname = "OperationsProto";
 option java_package = "com.google.genomics.v1";
 // Metadata describing an [Operation][google.longrunning.Operation].
 message OperationMetadata {
  // The Google Cloud Project in which the job is scoped.
  string project_id = 1;
  // The time at which the job was submitted to the Genomics service.
  google.protobuf.Timestamp create_time = 2;
  // The time at which the job stopped running.
  google.protobuf.Timestamp end_time = 4;
  // The original request that started the operation. Note that this will be in
  // current version of the API. If the operation was started with v1beta2 API
  // and a GetOperation is performed on v1 API, a v1 request will be returned.
  google.protobuf.Any request = 5;
  // Optional event messages that were generated during the job's execution.
  // This also contains any warnings that were generated during import
  // or export.
  repeated OperationEvent events = 6;
  // Runtime metadata on this Operation.
  google.protobuf.Any runtime_metadata = 8;
 }
 // An event that occurred during an [Operation][google.longrunning.Operation].
 message OperationEvent {
  // Required description of event.
  string description = 3;
 }
--- a/google/genomics/v1/position.proto
+++ b/google/genomics/v1/position.proto
@ -0,0 +1,41 @@
 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 syntax = "proto3";
 package google.genomics.v1;
 import "google/api/annotations.proto";
 option cc_enable_arenas = true;
 option java_multiple_files = true;
 option java_outer_classname = "PositionProto";
 option java_package = "com.google.genomics.v1";
 // An abstraction for referring to a genomic position, in relation to some
 // already known reference. For now, represents a genomic position as a
 // reference name, a base number on that reference (0-based), and a
 // determination of forward or reverse strand.
 message Position {
  // The name of the reference in whatever reference set is being used.
  string reference_name = 1;
  // The 0-based offset from the start of the forward strand for that reference.
  int64 position = 2;
  // Whether this position is on the reverse strand, as opposed to the forward
  // strand.
  bool reverse_strand = 3;
 }
--- a/google/genomics/v1/range.proto
+++ b/google/genomics/v1/range.proto
@ -0,0 +1,38 @@
 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 syntax = "proto3";
 package google.genomics.v1;
 import "google/api/annotations.proto";
 option cc_enable_arenas = true;
 option java_multiple_files = true;
 option java_outer_classname = "RangeProto";
 option java_package = "com.google.genomics.v1";
 // A 0-based half-open genomic coordinate range for search requests.
 message Range {
  // The reference sequence name, for example `chr1`,
  // `1`, or `chrX`.
  string reference_name = 1;
  // The start position of the range on the reference, 0-based inclusive.
  int64 start = 2;
  // The end position of the range on the reference, 0-based exclusive.
  int64 end = 3;
 }
--- a/google/genomics/v1/readalignment.proto
+++ b/google/genomics/v1/readalignment.proto
@ -0,0 +1,220 @@
 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 syntax = "proto3";
 package google.genomics.v1;
 import "google/api/annotations.proto";
 import "google/genomics/v1/cigar.proto";
 import "google/genomics/v1/position.proto";
 import "google/protobuf/struct.proto";
 option cc_enable_arenas = true;
 option java_multiple_files = true;
 option java_outer_classname = "ReadAlignmentProto";
 option java_package = "com.google.genomics.v1";
 // A linear alignment can be represented by one CIGAR string. Describes the
 // mapped position and local alignment of the read to the reference.
 message LinearAlignment {
  // The position of this alignment.
  Position position = 1;
  // The mapping quality of this alignment. Represents how likely
  // the read maps to this position as opposed to other locations.
  //
  // Specifically, this is -10 log10 Pr(mapping position is wrong), rounded to
  // the nearest integer.
  int32 mapping_quality = 2;
  // Represents the local alignment of this sequence (alignment matches, indels,
  // etc) against the reference.
  repeated CigarUnit cigar = 3;
 }
 // A read alignment describes a linear alignment of a string of DNA to a
 // [reference sequence][google.genomics.v1.Reference], in addition to metadata
 // about the fragment (the molecule of DNA sequenced) and the read (the bases
 // which were read by the sequencer). A read is equivalent to a line in a SAM
 // file. A read belongs to exactly one read group and exactly one
 // [read group set][google.genomics.v1.ReadGroupSet].
 //
 // For more genomics resource definitions, see [Fundamentals of Google
 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
 //
 // ### Reverse-stranded reads
 //
 // Mapped reads (reads having a non-null `alignment`) can be aligned to either
 // the forward or the reverse strand of their associated reference. Strandedness
 // of a mapped read is encoded by `alignment.position.reverseStrand`.
 //
 // If we consider the reference to be a forward-stranded coordinate space of
 // `[0, reference.length)` with `0` as the left-most position and
 // `reference.length` as the right-most position, reads are always aligned left
 // to right. That is, `alignment.position.position` always refers to the
 // left-most reference coordinate and `alignment.cigar` describes the alignment
 // of this read to the reference from left to right. All per-base fields such as
 // `alignedSequence` and `alignedQuality` share this same left-to-right
 // orientation; this is true of reads which are aligned to either strand. For
 // reverse-stranded reads, this means that `alignedSequence` is the reverse
 // complement of the bases that were originally reported by the sequencing
 // machine.
 //
 // ### Generating a reference-aligned sequence string
 //
 // When interacting with mapped reads, it's often useful to produce a string
 // representing the local alignment of the read to reference. The following
 // pseudocode demonstrates one way of doing this:
 //
 //     out = ""
 //     offset = 0
 //     for c in read.alignment.cigar {
 //       switch c.operation {
 //       case "ALIGNMENT_MATCH", "SEQUENCE_MATCH", "SEQUENCE_MISMATCH":
 //         out += read.alignedSequence[offset:offset+c.operationLength]
 //         offset += c.operationLength
 //         break
 //       case "CLIP_SOFT", "INSERT":
 //         offset += c.operationLength
 //         break
 //       case "PAD":
 //         out += repeat("*", c.operationLength)
 //         break
 //       case "DELETE":
 //         out += repeat("-", c.operationLength)
 //         break
 //       case "SKIP":
 //         out += repeat(" ", c.operationLength)
 //         break
 //       case "CLIP_HARD":
 //         break
 //       }
 //     }
 //     return out
 //
 // ### Converting to SAM's CIGAR string
 //
 // The following pseudocode generates a SAM CIGAR string from the
 // `cigar` field. Note that this is a lossy conversion
 // (`cigar.referenceSequence` is lost).
 //
 //     cigarMap = {
 //       "ALIGNMENT_MATCH": "M",
 //       "INSERT": "I",
 //       "DELETE": "D",
 //       "SKIP": "N",
 //       "CLIP_SOFT": "S",
 //       "CLIP_HARD": "H",
 //       "PAD": "P",
 //       "SEQUENCE_MATCH": "=",
 //       "SEQUENCE_MISMATCH": "X",
 //     }
 //     cigarStr = ""
 //     for c in read.alignment.cigar {
 //       cigarStr += c.operationLength + cigarMap[c.operation]
 //     }
 //     return cigarStr
 message Read {
  // The server-generated read ID, unique across all reads. This is different
  // from the `fragmentName`.
  string id = 1;
  // The ID of the read group this read belongs to. A read belongs to exactly
  // one read group. This is a server-generated ID which is distinct from SAM's
  // RG tag (for that value, see
  // [ReadGroup.name][google.genomics.v1.ReadGroup.name]).
  string read_group_id = 2;
  // The ID of the read group set this read belongs to. A read belongs to
  // exactly one read group set.
  string read_group_set_id = 3;
  // The fragment name. Equivalent to QNAME (query template name) in SAM.
  string fragment_name = 4;
  // The orientation and the distance between reads from the fragment are
  // consistent with the sequencing protocol (SAM flag 0x2).
  bool proper_placement = 5;
  // The fragment is a PCR or optical duplicate (SAM flag 0x400).
  bool duplicate_fragment = 6;
  // The observed length of the fragment, equivalent to TLEN in SAM.
  int32 fragment_length = 7;
  // The read number in sequencing. 0-based and less than numberReads. This
  // field replaces SAM flag 0x40 and 0x80.
  int32 read_number = 8;
  // The number of reads in the fragment (extension to SAM flag 0x1).
  int32 number_reads = 9;
  // Whether this read did not pass filters, such as platform or vendor quality
  // controls (SAM flag 0x200).
  bool failed_vendor_quality_checks = 10;
  // The linear alignment for this alignment record. This field is null for
  // unmapped reads.
  LinearAlignment alignment = 11;
  // Whether this alignment is secondary. Equivalent to SAM flag 0x100.
  // A secondary alignment represents an alternative to the primary alignment
  // for this read. Aligners may return secondary alignments if a read can map
  // ambiguously to multiple coordinates in the genome. By convention, each read
  // has one and only one alignment where both `secondaryAlignment`
  // and `supplementaryAlignment` are false.
  bool secondary_alignment = 12;
  // Whether this alignment is supplementary. Equivalent to SAM flag 0x800.
  // Supplementary alignments are used in the representation of a chimeric
  // alignment. In a chimeric alignment, a read is split into multiple
  // linear alignments that map to different reference contigs. The first
  // linear alignment in the read will be designated as the representative
  // alignment; the remaining linear alignments will be designated as
  // supplementary alignments. These alignments may have different mapping
  // quality scores. In each linear alignment in a chimeric alignment, the read
  // will be hard clipped. The `alignedSequence` and
  // `alignedQuality` fields in the alignment record will only
  // represent the bases for its respective linear alignment.
  bool supplementary_alignment = 13;
  // The bases of the read sequence contained in this alignment record,
  // **without CIGAR operations applied** (equivalent to SEQ in SAM).
  // `alignedSequence` and `alignedQuality` may be
  // shorter than the full read sequence and quality. This will occur if the
  // alignment is part of a chimeric alignment, or if the read was trimmed. When
  // this occurs, the CIGAR for this read will begin/end with a hard clip
  // operator that will indicate the length of the excised sequence.
  string aligned_sequence = 14;
  // The quality of the read sequence contained in this alignment record
  // (equivalent to QUAL in SAM).
  // `alignedSequence` and `alignedQuality` may be shorter than the full read
  // sequence and quality. This will occur if the alignment is part of a
  // chimeric alignment, or if the read was trimmed. When this occurs, the CIGAR
  // for this read will begin/end with a hard clip operator that will indicate
  // the length of the excised sequence.
  repeated int32 aligned_quality = 15;
  // The mapping of the primary alignment of the
  // `(readNumber+1)%numberReads` read in the fragment. It replaces
  // mate position and mate strand in SAM.
  Position next_mate_position = 16;
  // A map of additional read alignment information. This must be of the form
  // map<string, string[]> (string key mapping to a list of string values).
  map<string, google.protobuf.ListValue> info = 17;
 }
--- a/google/genomics/v1/readgroup.proto
+++ b/google/genomics/v1/readgroup.proto
@ -0,0 +1,105 @@
 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 syntax = "proto3";
 package google.genomics.v1;
 import "google/api/annotations.proto";
 import "google/protobuf/struct.proto";
 option cc_enable_arenas = true;
 option java_multiple_files = true;
 option java_outer_classname = "ReadGroupProto";
 option java_package = "com.google.genomics.v1";
 // A read group is all the data that's processed the same way by the sequencer.
 message ReadGroup {
  message Experiment {
    // A client-supplied library identifier; a library is a collection of DNA
    // fragments which have been prepared for sequencing from a sample. This
    // field is important for quality control as error or bias can be introduced
    // during sample preparation.
    string library_id = 1;
    // The platform unit used as part of this experiment, for example
    // flowcell-barcode.lane for Illumina or slide for SOLiD. Corresponds to the
    // @RG PU field in the SAM spec.
    string platform_unit = 2;
    // The sequencing center used as part of this experiment.
    string sequencing_center = 3;
    // The instrument model used as part of this experiment. This maps to
    // sequencing technology in the SAM spec.
    string instrument_model = 4;
  }
  message Program {
    // The command line used to run this program.
    string command_line = 1;
    // The user specified locally unique ID of the program. Used along with
    // `prevProgramId` to define an ordering between programs.
    string id = 2;
    // The display name of the program. This is typically the colloquial name of
    // the tool used, for example 'bwa' or 'picard'.
    string name = 3;
    // The ID of the program run before this one.
    string prev_program_id = 4;
    // The version of the program run.
    string version = 5;
  }
  // The server-generated read group ID, unique for all read groups.
  // Note: This is different than the @RG ID field in the SAM spec. For that
  // value, see [name][google.genomics.v1.ReadGroup.name].
  string id = 1;
  // The dataset to which this read group belongs.
  string dataset_id = 2;
  // The read group name. This corresponds to the @RG ID field in the SAM spec.
  string name = 3;
  // A free-form text description of this read group.
  string description = 4;
  // A client-supplied sample identifier for the reads in this read group.
  string sample_id = 5;
  // The experiment used to generate this read group.
  Experiment experiment = 6;
  // The predicted insert size of this read group. The insert size is the length
  // the sequenced DNA fragment from end-to-end, not including the adapters.
  int32 predicted_insert_size = 7;
  // The programs used to generate this read group. Programs are always
  // identical for all read groups within a read group set. For this reason,
  // only the first read group in a returned set will have this field
  // populated.
  repeated Program programs = 10;
  // The reference set the reads in this read group are aligned to.
  string reference_set_id = 11;
  // A map of additional read group information. This must be of the form
  // map<string, string[]> (string key mapping to a list of string values).
  map<string, google.protobuf.ListValue> info = 12;
 }
--- a/google/genomics/v1/readgroupset.proto
+++ b/google/genomics/v1/readgroupset.proto
@ -0,0 +1,63 @@
 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 syntax = "proto3";
 package google.genomics.v1;
 import "google/api/annotations.proto";
 import "google/genomics/v1/readgroup.proto";
 import "google/protobuf/struct.proto";
 option cc_enable_arenas = true;
 option java_multiple_files = true;
 option java_outer_classname = "ReadGroupSetProto";
 option java_package = "com.google.genomics.v1";
 // A read group set is a logical collection of read groups, which are
 // collections of reads produced by a sequencer. A read group set typically
 // models reads corresponding to one sample, sequenced one way, and aligned one
 // way.
 //
 // * A read group set belongs to one dataset.
 // * A read group belongs to one read group set.
 // * A read belongs to one read group.
 //
 // For more genomics resource definitions, see [Fundamentals of Google
 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
 message ReadGroupSet {
  // The server-generated read group set ID, unique for all read group sets.
  string id = 1;
  // The dataset to which this read group set belongs.
  string dataset_id = 2;
  // The reference set to which the reads in this read group set are aligned.
  string reference_set_id = 3;
  // The read group set name. By default this will be initialized to the sample
  // name of the sequenced data contained in this set.
  string name = 4;
  // The filename of the original source file for this read group set, if any.
  string filename = 5;
  // The read groups in this set. There are typically 1-10 read groups in a read
  // group set.
  repeated ReadGroup read_groups = 6;
  // A map of additional read group set information.
  map<string, google.protobuf.ListValue> info = 7;
 }
--- a/google/genomics/v1/reads.proto
+++ b/google/genomics/v1/reads.proto
@ -0,0 +1,461 @@
 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 syntax = "proto3";
 package google.genomics.v1;
 import "google/api/annotations.proto";
 import "google/genomics/v1/range.proto";
 import "google/genomics/v1/readalignment.proto";
 import "google/genomics/v1/readgroupset.proto";
 import "google/longrunning/operations.proto";
 import "google/protobuf/empty.proto";
 import "google/protobuf/field_mask.proto";
 option cc_enable_arenas = true;
 option java_multiple_files = true;
 option java_outer_classname = "ReadsProto";
 option java_package = "com.google.genomics.v1";
 service StreamingReadService {
  // Returns a stream of all the reads matching the search request, ordered
  // by reference name, position, and ID.
  rpc StreamReads(StreamReadsRequest) returns (stream StreamReadsResponse) {
    option (google.api.http) = { post: "/v1/reads:stream" body: "*" };
  }
 }
 // The Readstore. A data store for DNA sequencing Reads.
 //
 service ReadServiceV1 {
  // Creates read group sets by asynchronously importing the provided
  // information.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // The caller must have WRITE permissions to the dataset.
  //
  // ## Notes on [BAM](https://samtools.github.io/hts-specs/SAMv1.pdf) import
  //
  // - Tags will be converted to strings - tag types are not preserved
  // - Comments (`@CO`) in the input file header will not be preserved
  // - Original header order of references (`@SQ`) will not be preserved
  // - Any reverse stranded unmapped reads will be reverse complemented, and
  // their qualities (also the "BQ" and "OQ" tags, if any) will be reversed
  // - Unmapped reads will be stripped of positional information (reference name
  // and position)
  rpc ImportReadGroupSets(ImportReadGroupSetsRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = { post: "/v1/readgroupsets:import" body: "*" };
  }
  // Exports a read group set to a BAM file in Google Cloud Storage.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Note that currently there may be some differences between exported BAM
  // files and the original BAM file at the time of import. See
  // [ImportReadGroupSets](google.genomics.v1.ReadServiceV1.ImportReadGroupSets)
  // for caveats.
  rpc ExportReadGroupSet(ExportReadGroupSetRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = { post: "/v1/readgroupsets/{read_group_set_id}:export" body: "*" };
  }
  // Searches for read group sets matching the criteria.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Implements
  // [GlobalAllianceApi.searchReadGroupSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L135).
  rpc SearchReadGroupSets(SearchReadGroupSetsRequest) returns (SearchReadGroupSetsResponse) {
    option (google.api.http) = { post: "/v1/readgroupsets/search" body: "*" };
  }
  // Updates a read group set.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // This method supports patch semantics.
  rpc UpdateReadGroupSet(UpdateReadGroupSetRequest) returns (ReadGroupSet) {
    option (google.api.http) = { patch: "/v1/readgroupsets/{read_group_set_id}" body: "read_group_set" };
  }
  // Deletes a read group set.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc DeleteReadGroupSet(DeleteReadGroupSetRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = { delete: "/v1/readgroupsets/{read_group_set_id}" };
  }
  // Gets a read group set by ID.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc GetReadGroupSet(GetReadGroupSetRequest) returns (ReadGroupSet) {
    option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}" };
  }
  // Lists fixed width coverage buckets for a read group set, each of which
  // correspond to a range of a reference sequence. Each bucket summarizes
  // coverage information across its corresponding genomic range.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Coverage is defined as the number of reads which are aligned to a given
  // base in the reference sequence. Coverage buckets are available at several
  // precomputed bucket widths, enabling retrieval of various coverage 'zoom
  // levels'. The caller must have READ permissions for the target read group
  // set.
  rpc ListCoverageBuckets(ListCoverageBucketsRequest) returns (ListCoverageBucketsResponse) {
    option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}/coveragebuckets" };
  }
  // Gets a list of reads for one or more read group sets.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Reads search operates over a genomic coordinate space of reference sequence
  // & position defined over the reference sequences to which the requested
  // read group sets are aligned.
  //
  // If a target positional range is specified, search returns all reads whose
  // alignment to the reference genome overlap the range. A query which
  // specifies only read group set IDs yields all reads in those read group
  // sets, including unmapped reads.
  //
  // All reads returned (including reads on subsequent pages) are ordered by
  // genomic coordinate (by reference sequence, then position). Reads with
  // equivalent genomic coordinates are returned in an unspecified order. This
  // order is consistent, such that two queries for the same content (regardless
  // of page size) yield reads in the same order across their respective streams
  // of paginated responses.
  //
  // Implements
  // [GlobalAllianceApi.searchReads](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L85).
  rpc SearchReads(SearchReadsRequest) returns (SearchReadsResponse) {
    option (google.api.http) = { post: "/v1/reads/search" body: "*" };
  }
 }
 // The read group set search request.
 message SearchReadGroupSetsRequest {
  // Restricts this query to read group sets within the given datasets. At least
  // one ID must be provided.
  repeated string dataset_ids = 1;
  // Only return read group sets for which a substring of the name matches this
  // string.
  string name = 3;
  // The continuation token, which is used to page through large result sets.
  // To get the next page of results, set this parameter to the value of
  // `nextPageToken` from the previous response.
  string page_token = 2;
  // The maximum number of results to return in a single page. If unspecified,
  // defaults to 256. The maximum value is 1024.
  int32 page_size = 4;
 }
 // The read group set search response.
 message SearchReadGroupSetsResponse {
  // The list of matching read group sets.
  repeated ReadGroupSet read_group_sets = 1;
  // The continuation token, which is used to page through large result sets.
  // Provide this value in a subsequent request to return the next page of
  // results. This field will be empty if there aren't any additional results.
  string next_page_token = 2;
 }
 // The read group set import request.
 message ImportReadGroupSetsRequest {
  enum PartitionStrategy {
    PARTITION_STRATEGY_UNSPECIFIED = 0;
    // In most cases, this strategy yields one read group set per file. This is
    // the default behavior.
    //
    // Allocate one read group set per file per sample. For BAM files, read
    // groups are considered to share a sample if they have identical sample
    // names. Furthermore, all reads for each file which do not belong to a read
    // group, if any, will be grouped into a single read group set per-file.
    PER_FILE_PER_SAMPLE = 1;
    // Includes all read groups in all imported files into a single read group
    // set. Requires that the headers for all imported files are equivalent. All
    // reads which do not belong to a read group, if any, will be grouped into a
    // separate read group set.
    MERGE_ALL = 2;
  }
  // Required. The ID of the dataset these read group sets will belong to. The
  // caller must have WRITE permissions to this dataset.
  string dataset_id = 1;
  // The reference set to which the imported read group sets are aligned to, if
  // any. The reference names of this reference set must be a superset of those
  // found in the imported file headers. If no reference set id is provided, a
  // best effort is made to associate with a matching reference set.
  string reference_set_id = 4;
  // A list of URIs pointing at [BAM
  // files](https://samtools.github.io/hts-specs/SAMv1.pdf)
  // in Google Cloud Storage.
  repeated string source_uris = 2;
  // The partition strategy describes how read groups are partitioned into read
  // group sets.
  PartitionStrategy partition_strategy = 5;
 }
 // The read group set import response.
 message ImportReadGroupSetsResponse {
  // IDs of the read group sets that were created.
  repeated string read_group_set_ids = 1;
 }
 // The read group set export request.
 message ExportReadGroupSetRequest {
  // Required. The Google Developers Console project ID that owns this
  // export. The caller must have WRITE access to this project.
  string project_id = 1;
  // Required. A Google Cloud Storage URI for the exported BAM file.
  // The currently authenticated user must have write access to the new file.
  // An error will be returned if the URI already contains data.
  string export_uri = 2;
  // Required. The ID of the read group set to export. The caller must have
  // READ access to this read group set.
  string read_group_set_id = 3;
  // The reference names to export. If this is not specified, all reference
  // sequences, including unmapped reads, are exported.
  // Use `*` to export only unmapped reads.
  repeated string reference_names = 4;
 }
 message UpdateReadGroupSetRequest {
  // The ID of the read group set to be updated. The caller must have WRITE
  // permissions to the dataset associated with this read group set.
  string read_group_set_id = 1;
  // The new read group set data. See `updateMask` for details on mutability of
  // fields.
  ReadGroupSet read_group_set = 2;
  // An optional mask specifying which fields to update. Supported fields:
  //
  // * [name][google.genomics.v1.ReadGroupSet.name].
  // * [referenceSetId][google.genomics.v1.ReadGroupSet.reference_set_id].
  //
  // Leaving `updateMask` unset is equivalent to specifying all mutable
  // fields.
  google.protobuf.FieldMask update_mask = 3;
 }
 message DeleteReadGroupSetRequest {
  // The ID of the read group set to be deleted. The caller must have WRITE
  // permissions to the dataset associated with this read group set.
  string read_group_set_id = 1;
 }
 message GetReadGroupSetRequest {
  // The ID of the read group set.
  string read_group_set_id = 1;
 }
 message ListCoverageBucketsRequest {
  // Required. The ID of the read group set over which coverage is requested.
  string read_group_set_id = 1;
  // The name of the reference to query, within the reference set associated
  // with this query. Optional.
  string reference_name = 3;
  // The start position of the range on the reference, 0-based inclusive. If
  // specified, `referenceName` must also be specified. Defaults to 0.
  int64 start = 4;
  // The end position of the range on the reference, 0-based exclusive. If
  // specified, `referenceName` must also be specified. If unset or 0, defaults
  // to the length of the reference.
  int64 end = 5;
  // The desired width of each reported coverage bucket in base pairs. This
  // will be rounded down to the nearest precomputed bucket width; the value
  // of which is returned as `bucketWidth` in the response. Defaults
  // to infinity (each bucket spans an entire reference sequence) or the length
  // of the target range, if specified. The smallest precomputed
  // `bucketWidth` is currently 2048 base pairs; this is subject to
  // change.
  int64 target_bucket_width = 6;
  // The continuation token, which is used to page through large result sets.
  // To get the next page of results, set this parameter to the value of
  // `nextPageToken` from the previous response.
  string page_token = 7;
  // The maximum number of results to return in a single page. If unspecified,
  // defaults to 1024. The maximum value is 2048.
  int32 page_size = 8;
 }
 // A bucket over which read coverage has been precomputed. A bucket corresponds
 // to a specific range of the reference sequence.
 message CoverageBucket {
  // The genomic coordinate range spanned by this bucket.
  Range range = 1;
  // The average number of reads which are aligned to each individual
  // reference base in this bucket.
  float mean_coverage = 2;
 }
 message ListCoverageBucketsResponse {
  // The length of each coverage bucket in base pairs. Note that buckets at the
  // end of a reference sequence may be shorter. This value is omitted if the
  // bucket width is infinity (the default behaviour, with no range or
  // `targetBucketWidth`).
  int64 bucket_width = 1;
  // The coverage buckets. The list of buckets is sparse; a bucket with 0
  // overlapping reads is not returned. A bucket never crosses more than one
  // reference sequence. Each bucket has width `bucketWidth`, unless
  // its end is the end of the reference sequence.
  repeated CoverageBucket coverage_buckets = 2;
  // The continuation token, which is used to page through large result sets.
  // Provide this value in a subsequent request to return the next page of
  // results. This field will be empty if there aren't any additional results.
  string next_page_token = 3;
 }
 // The read search request.
 message SearchReadsRequest {
  // The IDs of the read groups sets within which to search for reads. All
  // specified read group sets must be aligned against a common set of reference
  // sequences; this defines the genomic coordinates for the query. Must specify
  // one of `readGroupSetIds` or `readGroupIds`.
  repeated string read_group_set_ids = 1;
  // The IDs of the read groups within which to search for reads. All specified
  // read groups must belong to the same read group sets. Must specify one of
  // `readGroupSetIds` or `readGroupIds`.
  repeated string read_group_ids = 5;
  // The reference sequence name, for example `chr1`, `1`, or `chrX`. If set to
  // `*`, only unmapped reads are returned. If unspecified, all reads (mapped
  // and unmapped) are returned.
  string reference_name = 7;
  // The start position of the range on the reference, 0-based inclusive. If
  // specified, `referenceName` must also be specified.
  int64 start = 8;
  // The end position of the range on the reference, 0-based exclusive. If
  // specified, `referenceName` must also be specified.
  int64 end = 9;
  // The continuation token, which is used to page through large result sets.
  // To get the next page of results, set this parameter to the value of
  // `nextPageToken` from the previous response.
  string page_token = 3;
  // The maximum number of results to return in a single page. If unspecified,
  // defaults to 256. The maximum value is 2048.
  int32 page_size = 4;
 }
 // The read search response.
 message SearchReadsResponse {
  // The list of matching alignments sorted by mapped genomic coordinate,
  // if any, ascending in position within the same reference. Unmapped reads,
  // which have no position, are returned contiguously and are sorted in
  // ascending lexicographic order by fragment name.
  repeated Read alignments = 1;
  // The continuation token, which is used to page through large result sets.
  // Provide this value in a subsequent request to return the next page of
  // results. This field will be empty if there aren't any additional results.
  string next_page_token = 2;
 }
 // The stream reads request.
 message StreamReadsRequest {
  // The Google Developers Console project ID or number which will be billed
  // for this access. The caller must have WRITE access to this project.
  // Required.
  string project_id = 1;
  // The ID of the read group set from which to stream reads.
  string read_group_set_id = 2;
  // The reference sequence name, for example `chr1`,
  // `1`, or `chrX`. If set to *, only unmapped reads are
  // returned.
  string reference_name = 3;
  // The start position of the range on the reference, 0-based inclusive. If
  // specified, `referenceName` must also be specified.
  int64 start = 4;
  // The end position of the range on the reference, 0-based exclusive. If
  // specified, `referenceName` must also be specified.
  int64 end = 5;
  // Restricts results to a shard containing approximately `1/totalShards`
  // of the normal response payload for this query. Results from a sharded
  // request are disjoint from those returned by all queries which differ only
  // in their shard parameter. A shard may yield 0 results; this is especially
  // likely for large values of `totalShards`.
  //
  // Valid values are `[0, totalShards)`.
  int32 shard = 6;
  // Specifying `totalShards` causes a disjoint subset of the normal response
  // payload to be returned for each query with a unique `shard` parameter
  // specified. A best effort is made to yield equally sized shards. Sharding
  // can be used to distribute processing amongst workers, where each worker is
  // assigned a unique `shard` number and all workers specify the same
  // `totalShards` number. The union of reads returned for all sharded queries
  // `[0, totalShards)` is equal to those returned by a single unsharded query.
  //
  // Queries for different values of `totalShards` with common divisors will
  // share shard boundaries. For example, streaming `shard` 2 of 5
  // `totalShards` yields the same results as streaming `shard`s 4 and 5 of 10
  // `totalShards`. This property can be leveraged for adaptive retries.
  int32 total_shards = 7;
 }
 message StreamReadsResponse {
  repeated Read alignments = 1;
 }
--- a/google/genomics/v1/references.proto
+++ b/google/genomics/v1/references.proto
@ -0,0 +1,281 @@
 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 syntax = "proto3";
 package google.genomics.v1;
 import "google/api/annotations.proto";
 option cc_enable_arenas = true;
 option java_multiple_files = true;
 option java_outer_classname = "ReferencesProto";
 option java_package = "com.google.genomics.v1";
 service ReferenceServiceV1 {
  // Searches for reference sets which match the given criteria.
  //
  // For the definitions of references and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Implements
  // [GlobalAllianceApi.searchReferenceSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L71)
  rpc SearchReferenceSets(SearchReferenceSetsRequest) returns (SearchReferenceSetsResponse) {
    option (google.api.http) = { post: "/v1/referencesets/search" body: "*" };
  }
  // Gets a reference set.
  //
  // For the definitions of references and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Implements
  // [GlobalAllianceApi.getReferenceSet](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L83).
  rpc GetReferenceSet(GetReferenceSetRequest) returns (ReferenceSet) {
    option (google.api.http) = { get: "/v1/referencesets/{reference_set_id}" };
  }
  // Searches for references which match the given criteria.
  //
  // For the definitions of references and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Implements
  // [GlobalAllianceApi.searchReferences](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L146).
  rpc SearchReferences(SearchReferencesRequest) returns (SearchReferencesResponse) {
    option (google.api.http) = { post: "/v1/references/search" body: "*" };
  }
  // Gets a reference.
  //
  // For the definitions of references and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Implements
  // [GlobalAllianceApi.getReference](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L158).
  rpc GetReference(GetReferenceRequest) returns (Reference) {
    option (google.api.http) = { get: "/v1/references/{reference_id}" };
  }
  // Lists the bases in a reference, optionally restricted to a range.
  //
  // For the definitions of references and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Implements
  // [GlobalAllianceApi.getReferenceBases](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L221).
  rpc ListBases(ListBasesRequest) returns (ListBasesResponse) {
    option (google.api.http) = { get: "/v1/references/{reference_id}/bases" };
  }
 }
 // A reference is a canonical assembled DNA sequence, intended to act as a
 // reference coordinate space for other genomic annotations. A single reference
 // might represent the human chromosome 1 or mitochandrial DNA, for instance. A
 // reference belongs to one or more reference sets.
 //
 // For more genomics resource definitions, see [Fundamentals of Google
 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
 message Reference {
  // The server-generated reference ID, unique across all references.
  string id = 1;
  // The length of this reference's sequence.
  int64 length = 2;
  // MD5 of the upper-case sequence excluding all whitespace characters (this
  // is equivalent to SQ:M5 in SAM). This value is represented in lower case
  // hexadecimal format.
  string md5checksum = 3;
  // The name of this reference, for example `22`.
  string name = 4;
  // The URI from which the sequence was obtained. Typically specifies a FASTA
  // format file.
  string source_uri = 5;
  // All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
  // with a version number, for example `GCF_000001405.26`.
  repeated string source_accessions = 6;
  // ID from http://www.ncbi.nlm.nih.gov/taxonomy. For example, 9606 for human.
  int32 ncbi_taxon_id = 7;
 }
 // A reference set is a set of references which typically comprise a reference
 // assembly for a species, such as `GRCh38` which is representative
 // of the human genome. A reference set defines a common coordinate space for
 // comparing reference-aligned experimental data. A reference set contains 1 or
 // more references.
 //
 // For more genomics resource definitions, see [Fundamentals of Google
 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
 message ReferenceSet {
  // The server-generated reference set ID, unique across all reference sets.
  string id = 1;
  // The IDs of the reference objects that are part of this set.
  // `Reference.md5checksum` must be unique within this set.
  repeated string reference_ids = 2;
  // Order-independent MD5 checksum which identifies this reference set. The
  // checksum is computed by sorting all lower case hexidecimal string
  // `reference.md5checksum` (for all reference in this set) in
  // ascending lexicographic order, concatenating, and taking the MD5 of that
  // value. The resulting value is represented in lower case hexadecimal format.
  string md5checksum = 3;
  // ID from http://www.ncbi.nlm.nih.gov/taxonomy (for example, 9606 for human)
  // indicating the species which this reference set is intended to model. Note
  // that contained references may specify a different `ncbiTaxonId`, as
  // assemblies may contain reference sequences which do not belong to the
  // modeled species, for example EBV in a human reference genome.
  int32 ncbi_taxon_id = 4;
  // Free text description of this reference set.
  string description = 5;
  // Public id of this reference set, such as `GRCh37`.
  string assembly_id = 6;
  // The URI from which the references were obtained.
  string source_uri = 7;
  // All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
  // with a version number, for example `NC_000001.11`.
  repeated string source_accessions = 8;
 }
 message SearchReferenceSetsRequest {
  // If present, return reference sets for which the
  // [md5checksum][google.genomics.v1.ReferenceSet.md5checksum] matches exactly.
  repeated string md5checksums = 1;
  // If present, return reference sets for which a prefix of any of
  // [sourceAccessions][google.genomics.v1.ReferenceSet.source_accessions]
  // match any of these strings. Accession numbers typically have a main number
  // and a version, for example `NC_000001.11`.
  repeated string accessions = 2;
  // If present, return reference sets for which a substring of their
  // `assemblyId` matches this string (case insensitive).
  string assembly_id = 3;
  // The continuation token, which is used to page through large result sets.
  // To get the next page of results, set this parameter to the value of
  // `nextPageToken` from the previous response.
  string page_token = 4;
  // The maximum number of results to return in a single page. If unspecified,
  // defaults to 1024. The maximum value is 4096.
  int32 page_size = 5;
 }
 message SearchReferenceSetsResponse {
  // The matching references sets.
  repeated ReferenceSet reference_sets = 1;
  // The continuation token, which is used to page through large result sets.
  // Provide this value in a subsequent request to return the next page of
  // results. This field will be empty if there aren't any additional results.
  string next_page_token = 2;
 }
 message GetReferenceSetRequest {
  // The ID of the reference set.
  string reference_set_id = 1;
 }
 message SearchReferencesRequest {
  // If present, return references for which the
  // [md5checksum][google.genomics.v1.Reference.md5checksum] matches exactly.
  repeated string md5checksums = 1;
  // If present, return references for which a prefix of any of
  // [sourceAccessions][google.genomics.v1.Reference.source_accessions] match
  // any of these strings. Accession numbers typically have a main number and a
  // version, for example `GCF_000001405.26`.
  repeated string accessions = 2;
  // If present, return only references which belong to this reference set.
  string reference_set_id = 3;
  // The continuation token, which is used to page through large result sets.
  // To get the next page of results, set this parameter to the value of
  // `nextPageToken` from the previous response.
  string page_token = 4;
  // The maximum number of results to return in a single page. If unspecified,
  // defaults to 1024. The maximum value is 4096.
  int32 page_size = 5;
 }
 message SearchReferencesResponse {
  // The matching references.
  repeated Reference references = 1;
  // The continuation token, which is used to page through large result sets.
  // Provide this value in a subsequent request to return the next page of
  // results. This field will be empty if there aren't any additional results.
  string next_page_token = 2;
 }
 message GetReferenceRequest {
  // The ID of the reference.
  string reference_id = 1;
 }
 message ListBasesRequest {
  // The ID of the reference.
  string reference_id = 1;
  // The start position (0-based) of this query. Defaults to 0.
  int64 start = 2;
  // The end position (0-based, exclusive) of this query. Defaults to the length
  // of this reference.
  int64 end = 3;
  // The continuation token, which is used to page through large result sets.
  // To get the next page of results, set this parameter to the value of
  // `nextPageToken` from the previous response.
  string page_token = 4;
  // The maximum number of bases to return in a single page. If unspecified,
  // defaults to 200Kbp (kilo base pairs). The maximum value is 10Mbp (mega base
  // pairs).
  int32 page_size = 5;
 }
 message ListBasesResponse {
  // The offset position (0-based) of the given `sequence` from the
  // start of this `Reference`. This value will differ for each page
  // in a paginated request.
  int64 offset = 1;
  // A substring of the bases that make up this reference.
  string sequence = 2;
  // The continuation token, which is used to page through large result sets.
  // Provide this value in a subsequent request to return the next page of
  // results. This field will be empty if there aren't any additional results.
  string next_page_token = 3;
 }
--- a/google/genomics/v1/variants.proto
+++ b/google/genomics/v1/variants.proto
@ -0,0 +1,903 @@
 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 syntax = "proto3";
 package google.genomics.v1;
 import "google/api/annotations.proto";
 import "google/longrunning/operations.proto";
 import "google/protobuf/empty.proto";
 import "google/protobuf/field_mask.proto";
 import "google/protobuf/struct.proto";
 option cc_enable_arenas = true;
 option java_multiple_files = true;
 option java_outer_classname = "VariantsProto";
 option java_package = "com.google.genomics.v1";
 service StreamingVariantService {
  // Returns a stream of all the variants matching the search request, ordered
  // by reference name, position, and ID.
  rpc StreamVariants(StreamVariantsRequest) returns (stream StreamVariantsResponse) {
    option (google.api.http) = { post: "/v1/variants:stream" body: "*" };
  }
 }
 service VariantServiceV1 {
  // Creates variant data by asynchronously importing the provided information.
  //
  // For the definitions of variant sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // The variants for import will be merged with any existing variant that
  // matches its reference sequence, start, end, reference bases, and
  // alternative bases. If no such variant exists, a new one will be created.
  //
  // When variants are merged, the call information from the new variant
  // is added to the existing variant, and Variant info fields are merged
  // as specified in
  // [infoMergeConfig][google.genomics.v1.ImportVariantsRequest.info_merge_config].
  // As a special case, for single-sample VCF files, QUAL and FILTER fields will
  // be moved to the call level; these are sometimes interpreted in a
  // call-specific context.
  // Imported VCF headers are appended to the metadata already in a variant set.
  rpc ImportVariants(ImportVariantsRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = { post: "/v1/variants:import" body: "*" };
  }
  // Creates a new variant set.
  //
  // For the definitions of variant sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // The provided variant set must have a valid `datasetId` set - all other
  // fields are optional. Note that the `id` field will be ignored, as this is
  // assigned by the server.
  rpc CreateVariantSet(CreateVariantSetRequest) returns (VariantSet) {
    option (google.api.http) = { post: "/v1/variantsets" body: "variant_set" };
  }
  // Exports variant set data to an external destination.
  //
  // For the definitions of variant sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc ExportVariantSet(ExportVariantSetRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = { post: "/v1/variantsets/{variant_set_id}:export" body: "*" };
  }
  // Gets a variant set by ID.
  //
  // For the definitions of variant sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc GetVariantSet(GetVariantSetRequest) returns (VariantSet) {
    option (google.api.http) = { get: "/v1/variantsets/{variant_set_id}" };
  }
  // Returns a list of all variant sets matching search criteria.
  //
  // For the definitions of variant sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Implements
  // [GlobalAllianceApi.searchVariantSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L49).
  rpc SearchVariantSets(SearchVariantSetsRequest) returns (SearchVariantSetsResponse) {
    option (google.api.http) = { post: "/v1/variantsets/search" body: "*" };
  }
  // Deletes a variant set including all variants, call sets, and calls within.
  // This is not reversible.
  //
  // For the definitions of variant sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc DeleteVariantSet(DeleteVariantSetRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = { delete: "/v1/variantsets/{variant_set_id}" };
  }
  // Updates a variant set using patch semantics.
  //
  // For the definitions of variant sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc UpdateVariantSet(UpdateVariantSetRequest) returns (VariantSet) {
    option (google.api.http) = { patch: "/v1/variantsets/{variant_set_id}" body: "variant_set" };
  }
  // Gets a list of variants matching the criteria.
  //
  // For the definitions of variants and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Implements
  // [GlobalAllianceApi.searchVariants](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L126).
  rpc SearchVariants(SearchVariantsRequest) returns (SearchVariantsResponse) {
    option (google.api.http) = { post: "/v1/variants/search" body: "*" };
  }
  // Creates a new variant.
  //
  // For the definitions of variants and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc CreateVariant(CreateVariantRequest) returns (Variant) {
    option (google.api.http) = { post: "/v1/variants" body: "variant" };
  }
  // Updates a variant.
  //
  // For the definitions of variants and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // This method supports patch semantics. Returns the modified variant without
  // its calls.
  rpc UpdateVariant(UpdateVariantRequest) returns (Variant) {
    option (google.api.http) = { patch: "/v1/variants/{variant_id}" body: "variant" };
  }
  // Deletes a variant.
  //
  // For the definitions of variants and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc DeleteVariant(DeleteVariantRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = { delete: "/v1/variants/{variant_id}" };
  }
  // Gets a variant by ID.
  //
  // For the definitions of variants and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc GetVariant(GetVariantRequest) returns (Variant) {
    option (google.api.http) = { get: "/v1/variants/{variant_id}" };
  }
  // Merges the given variants with existing variants.
  //
  // For the definitions of variants and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Each variant will be
  // merged with an existing variant that matches its reference sequence,
  // start, end, reference bases, and alternative bases. If no such variant
  // exists, a new one will be created.
  //
  // When variants are merged, the call information from the new variant
  // is added to the existing variant. Variant info fields are merged as
  // specified in the
  // [infoMergeConfig][google.genomics.v1.MergeVariantsRequest.info_merge_config]
  // field of the MergeVariantsRequest.
  //
  // Please exercise caution when using this method!  It is easy to introduce
  // mistakes in existing variants and difficult to back out of them.  For
  // example,
  // suppose you were trying to merge a new variant with an existing one and
  // both
  // variants contain calls that belong to callsets with the same callset ID.
  //
  //     // Existing variant - irrelevant fields trimmed for clarity
  //     {
  //         "variantSetId": "10473108253681171589",
  //         "referenceName": "1",
  //         "start": "10582",
  //         "referenceBases": "G",
  //         "alternateBases": [
  //             "A"
  //         ],
  //         "calls": [
  //             {
  //                 "callSetId": "10473108253681171589-0",
  //                 "callSetName": "CALLSET0",
  //                 "genotype": [
  //                     0,
  //                     1
  //                 ],
  //             }
  //         ]
  //     }
  //
  //     // New variant with conflicting call information
  //     {
  //         "variantSetId": "10473108253681171589",
  //         "referenceName": "1",
  //         "start": "10582",
  //         "referenceBases": "G",
  //         "alternateBases": [
  //             "A"
  //         ],
  //         "calls": [
  //             {
  //                 "callSetId": "10473108253681171589-0",
  //                 "callSetName": "CALLSET0",
  //                 "genotype": [
  //                     1,
  //                     1
  //                 ],
  //             }
  //         ]
  //     }
  //
  // The resulting merged variant would overwrite the existing calls with those
  // from the new variant:
  //
  //     {
  //         "variantSetId": "10473108253681171589",
  //         "referenceName": "1",
  //         "start": "10582",
  //         "referenceBases": "G",
  //         "alternateBases": [
  //             "A"
  //         ],
  //         "calls": [
  //             {
  //                 "callSetId": "10473108253681171589-0",
  //                 "callSetName": "CALLSET0",
  //                 "genotype": [
  //                     1,
  //                     1
  //                 ],
  //             }
  //         ]
  //     }
  //
  // This may be the desired outcome, but it is up to the user to determine if
  // if that is indeed the case.
  rpc MergeVariants(MergeVariantsRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = { post: "/v1/variants:merge" body: "*" };
  }
  // Gets a list of call sets matching the criteria.
  //
  // For the definitions of call sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Implements
  // [GlobalAllianceApi.searchCallSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L178).
  rpc SearchCallSets(SearchCallSetsRequest) returns (SearchCallSetsResponse) {
    option (google.api.http) = { post: "/v1/callsets/search" body: "*" };
  }
  // Creates a new call set.
  //
  // For the definitions of call sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc CreateCallSet(CreateCallSetRequest) returns (CallSet) {
    option (google.api.http) = { post: "/v1/callsets" body: "call_set" };
  }
  // Updates a call set.
  //
  // For the definitions of call sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // This method supports patch semantics.
  rpc UpdateCallSet(UpdateCallSetRequest) returns (CallSet) {
    option (google.api.http) = { patch: "/v1/callsets/{call_set_id}" body: "call_set" };
  }
  // Deletes a call set.
  //
  // For the definitions of call sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc DeleteCallSet(DeleteCallSetRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = { delete: "/v1/callsets/{call_set_id}" };
  }
  // Gets a call set by ID.
  //
  // For the definitions of call sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc GetCallSet(GetCallSetRequest) returns (CallSet) {
    option (google.api.http) = { get: "/v1/callsets/{call_set_id}" };
  }
 }
 // Metadata describes a single piece of variant call metadata.
 // These data include a top level key and either a single value string (value)
 // or a list of key-value pairs (info.)
 // Value and info are mutually exclusive.
 message VariantSetMetadata {
  enum Type {
    TYPE_UNSPECIFIED = 0;
    INTEGER = 1;
    FLOAT = 2;
    FLAG = 3;
    CHARACTER = 4;
    STRING = 5;
  }
  // The top-level key.
  string key = 1;
  // The value field for simple metadata
  string value = 2;
  // User-provided ID field, not enforced by this API.
  // Two or more pieces of structured metadata with identical
  // id and key fields are considered equivalent.
  string id = 4;
  // The type of data. Possible types include: Integer, Float,
  // Flag, Character, and String.
  Type type = 5;
  // The number of values that can be included in a field described by this
  // metadata.
  string number = 8;
  // A textual description of this metadata.
  string description = 7;
  // Remaining structured metadata key-value pairs. This must be of the form
  // map<string, string[]> (string key mapping to a list of string values).
  map<string, google.protobuf.ListValue> info = 3;
 }
 // A variant set is a collection of call sets and variants. It contains summary
 // statistics of those contents. A variant set belongs to a dataset.
 //
 // For more genomics resource definitions, see [Fundamentals of Google
 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
 message VariantSet {
  // The dataset to which this variant set belongs.
  string dataset_id = 1;
  // The server-generated variant set ID, unique across all variant sets.
  string id = 2;
  // The reference set to which the variant set is mapped. The reference set
  // describes the alignment provenance of the variant set, while the
  // `referenceBounds` describe the shape of the actual variant data. The
  // reference set's reference names are a superset of those found in the
  // `referenceBounds`.
  //
  // For example, given a variant set that is mapped to the GRCh38 reference set
  // and contains a single variant on reference 'X', `referenceBounds` would
  // contain only an entry for 'X', while the associated reference set
  // enumerates all possible references: '1', '2', 'X', 'Y', 'MT', etc.
  string reference_set_id = 6;
  // A list of all references used by the variants in a variant set
  // with associated coordinate upper bounds for each one.
  repeated ReferenceBound reference_bounds = 5;
  // The metadata associated with this variant set.
  repeated VariantSetMetadata metadata = 4;
  // User-specified, mutable name.
  string name = 7;
  // A textual description of this variant set.
  string description = 8;
 }
 // A variant represents a change in DNA sequence relative to a reference
 // sequence. For example, a variant could represent a SNP or an insertion.
 // Variants belong to a variant set.
 //
 // For more genomics resource definitions, see [Fundamentals of Google
 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
 //
 // Each of the calls on a variant represent a determination of genotype with
 // respect to that variant. For example, a call might assign probability of 0.32
 // to the occurrence of a SNP named rs1234 in a sample named NA12345. A call
 // belongs to a call set, which contains related calls typically from one
 // sample.
 message Variant {
  // The ID of the variant set this variant belongs to.
  string variant_set_id = 15;
  // The server-generated variant ID, unique across all variants.
  string id = 2;
  // Names for the variant, for example a RefSNP ID.
  repeated string names = 3;
  // The date this variant was created, in milliseconds from the epoch.
  int64 created = 12;
  // The reference on which this variant occurs.
  // (such as `chr20` or `X`)
  string reference_name = 14;
  // The position at which this variant occurs (0-based).
  // This corresponds to the first base of the string of reference bases.
  int64 start = 16;
  // The end position (0-based) of this variant. This corresponds to the first
  // base after the last base in the reference allele. So, the length of
  // the reference allele is (end - start). This is useful for variants
  // that don't explicitly give alternate bases, for example large deletions.
  int64 end = 13;
  // The reference bases for this variant. They start at the given
  // position.
  string reference_bases = 6;
  // The bases that appear instead of the reference bases.
  repeated string alternate_bases = 7;
  // A measure of how likely this variant is to be real.
  // A higher value is better.
  double quality = 8;
  // A list of filters (normally quality filters) this variant has failed.
  // `PASS` indicates this variant has passed all filters.
  repeated string filter = 9;
  // A map of additional variant information. This must be of the form
  // map<string, string[]> (string key mapping to a list of string values).
  map<string, google.protobuf.ListValue> info = 10;
  // The variant calls for this particular variant. Each one represents the
  // determination of genotype with respect to this variant.
  repeated VariantCall calls = 11;
 }
 // A call represents the determination of genotype with respect to a particular
 // variant. It may include associated information such as quality and phasing.
 // For example, a call might assign a probability of 0.32 to the occurrence of
 // a SNP named rs1234 in a call set with the name NA12345.
 message VariantCall {
  // The ID of the call set this variant call belongs to.
  string call_set_id = 8;
  // The name of the call set this variant call belongs to.
  string call_set_name = 9;
  // The genotype of this variant call. Each value represents either the value
  // of the `referenceBases` field or a 1-based index into
  // `alternateBases`. If a variant had a `referenceBases`
  // value of `T` and an `alternateBases`
  // value of `["A", "C"]`, and the `genotype` was
  // `[2, 1]`, that would mean the call
  // represented the heterozygous value `CA` for this variant.
  // If the `genotype` was instead `[0, 1]`, the
  // represented value would be `TA`. Ordering of the
  // genotype values is important if the `phaseset` is present.
  // If a genotype is not called (that is, a `.` is present in the
  // GT string) -1 is returned.
  repeated int32 genotype = 7;
  // If this field is present, this variant call's genotype ordering implies
  // the phase of the bases and is consistent with any other variant calls in
  // the same reference sequence which have the same phaseset value.
  // When importing data from VCF, if the genotype data was phased but no
  // phase set was specified this field will be set to `*`.
  string phaseset = 5;
  // The genotype likelihoods for this variant call. Each array entry
  // represents how likely a specific genotype is for this call. The value
  // ordering is defined by the GL tag in the VCF spec.
  // If Phred-scaled genotype likelihood scores (PL) are available and
  // log10(P) genotype likelihood scores (GL) are not, PL scores are converted
  // to GL scores.  If both are available, PL scores are stored in `info`.
  repeated double genotype_likelihood = 6;
  // A map of additional variant call information. This must be of the form
  // map<string, string[]> (string key mapping to a list of string values).
  map<string, google.protobuf.ListValue> info = 2;
 }
 // A call set is a collection of variant calls, typically for one sample. It
 // belongs to a variant set.
 //
 // For more genomics resource definitions, see [Fundamentals of Google
 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
 message CallSet {
  // The server-generated call set ID, unique across all call sets.
  string id = 1;
  // The call set name.
  string name = 2;
  // The sample ID this call set corresponds to.
  string sample_id = 7;
  // The IDs of the variant sets this call set belongs to. This field must
  // have exactly length one, as a call set belongs to a single variant set.
  // This field is repeated for compatibility with the
  // [GA4GH 0.5.1
  // API](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variants.avdl#L76).
  repeated string variant_set_ids = 6;
  // The date this call set was created in milliseconds from the epoch.
  int64 created = 5;
  // A map of additional call set information. This must be of the form
  // map<string, string[]> (string key mapping to a list of string values).
  map<string, google.protobuf.ListValue> info = 4;
 }
 // ReferenceBound records an upper bound for the starting coordinate of
 // variants in a particular reference.
 message ReferenceBound {
  // The name of the reference associated with this reference bound.
  string reference_name = 1;
  // An upper bound (inclusive) on the starting coordinate of any
  // variant in the reference sequence.
  int64 upper_bound = 2;
 }
 // The variant data import request.
 message ImportVariantsRequest {
  enum Format {
    FORMAT_UNSPECIFIED = 0;
    // VCF (Variant Call Format). The VCF files should be uncompressed. gVCF is
    // also supported.
    FORMAT_VCF = 1;
    // Complete Genomics masterVarBeta format. The masterVarBeta files should
    // be bzip2 compressed.
    FORMAT_COMPLETE_GENOMICS = 2;
  }
  // Required. The variant set to which variant data should be imported.
  string variant_set_id = 1;
  // A list of URIs referencing variant files in Google Cloud Storage. URIs can
  // include wildcards [as described
  // here](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames).
  // Note that recursive wildcards ('**') are not supported.
  repeated string source_uris = 2;
  // The format of the variant data being imported. If unspecified, defaults to
  // to `VCF`.
  Format format = 3;
  // Convert reference names to the canonical representation.
  // hg19 haploytypes (those reference names containing "_hap")
  // are not modified in any way.
  // All other reference names are modified according to the following rules:
  // The reference name is capitalized.
  // The "chr" prefix is dropped for all autosomes and sex chromsomes.
  // For example "chr17" becomes "17" and "chrX" becomes "X".
  // All mitochondrial chromosomes ("chrM", "chrMT", etc) become "MT".
  bool normalize_reference_names = 5;
  // A mapping between info field keys and the InfoMergeOperations to
  // be performed on them. This is plumbed down to the MergeVariantRequests
  // generated by the resulting import job.
  map<string, InfoMergeOperation> info_merge_config = 6;
 }
 // The variant data import response.
 message ImportVariantsResponse {
  // IDs of the call sets created during the import.
  repeated string call_set_ids = 1;
 }
 // The CreateVariantSet request
 message CreateVariantSetRequest {
  // Required. The variant set to be created. Must have a valid `datasetId`.
  VariantSet variant_set = 1;
 }
 // The variant data export request.
 message ExportVariantSetRequest {
  enum Format {
    FORMAT_UNSPECIFIED = 0;
    // Export the data to Google BigQuery.
    FORMAT_BIGQUERY = 1;
  }
  // Required. The ID of the variant set that contains variant data which
  // should be exported. The caller must have READ access to this variant set.
  string variant_set_id = 1;
  // If provided, only variant call information from the specified call sets
  // will be exported. By default all variant calls are exported.
  repeated string call_set_ids = 2;
  // Required. The Google Cloud project ID that owns the destination
  // BigQuery dataset. The caller must have WRITE access to this project.  This
  // project will also own the resulting export job.
  string project_id = 3;
  // The format for the exported data.
  Format format = 4;
  // Required. The BigQuery dataset to export data to. This dataset must already
  // exist. Note that this is distinct from the Genomics concept of "dataset".
  string bigquery_dataset = 5;
  // Required. The BigQuery table to export data to.
  // If the table doesn't exist, it will be created. If it already exists, it
  // will be overwritten.
  string bigquery_table = 6;
 }
 // The variant set request.
 message GetVariantSetRequest {
  // Required. The ID of the variant set.
  string variant_set_id = 1;
 }
 // The search variant sets request.
 message SearchVariantSetsRequest {
  // Exactly one dataset ID must be provided here. Only variant sets which
  // belong to this dataset will be returned.
  repeated string dataset_ids = 1;
  // The continuation token, which is used to page through large result sets.
  // To get the next page of results, set this parameter to the value of
  // `nextPageToken` from the previous response.
  string page_token = 2;
  // The maximum number of results to return in a single page. If unspecified,
  // defaults to 1024.
  int32 page_size = 3;
 }
 // The search variant sets response.
 message SearchVariantSetsResponse {
  // The variant sets belonging to the requested dataset.
  repeated VariantSet variant_sets = 1;
  // The continuation token, which is used to page through large result sets.
  // Provide this value in a subsequent request to return the next page of
  // results. This field will be empty if there aren't any additional results.
  string next_page_token = 2;
 }
 // The delete variant set request.
 message DeleteVariantSetRequest {
  // The ID of the variant set to be deleted.
  string variant_set_id = 1;
 }
 message UpdateVariantSetRequest {
  // The ID of the variant to be updated (must already exist).
  string variant_set_id = 1;
  // The new variant data. Only the variant_set.metadata will be considered
  // for update.
  VariantSet variant_set = 2;
  // An optional mask specifying which fields to update. Supported fields:
  //
  // * [metadata][google.genomics.v1.VariantSet.metadata].
  // * [name][google.genomics.v1.VariantSet.name].
  // * [description][google.genomics.v1.VariantSet.description].
  //
  // Leaving `updateMask` unset is equivalent to specifying all mutable
  // fields.
  google.protobuf.FieldMask update_mask = 5;
 }
 // The variant search request.
 message SearchVariantsRequest {
  // At most one variant set ID must be provided. Only variants from this
  // variant set will be returned. If omitted, a call set id must be included in
  // the request.
  repeated string variant_set_ids = 1;
  // Only return variants which have exactly this name.
  string variant_name = 2;
  // Only return variant calls which belong to call sets with these ids.
  // Leaving this blank returns all variant calls. If a variant has no
  // calls belonging to any of these call sets, it won't be returned at all.
  // Currently, variants with no calls from any call set will never be returned.
  repeated string call_set_ids = 3;
  // Required. Only return variants in this reference sequence.
  string reference_name = 4;
  // The beginning of the window (0-based, inclusive) for which
  // overlapping variants should be returned. If unspecified, defaults to 0.
  int64 start = 5;
  // The end of the window, 0-based exclusive. If unspecified or 0, defaults to
  // the length of the reference.
  int64 end = 6;
  // The continuation token, which is used to page through large result sets.
  // To get the next page of results, set this parameter to the value of
  // `nextPageToken` from the previous response.
  string page_token = 7;
  // The maximum number of variants to return in a single page. If unspecified,
  // defaults to 5000. The maximum value is 10000.
  int32 page_size = 8;
  // The maximum number of calls to return in a single page. Note that this
  // limit may be exceeded in the event that a matching variant contains more
  // calls than the requested maximum. If unspecified, defaults to 5000. The
  // maximum value is 10000.
  int32 max_calls = 9;
 }
 // The variant search response.
 message SearchVariantsResponse {
  // The list of matching Variants.
  repeated Variant variants = 1;
  // The continuation token, which is used to page through large result sets.
  // Provide this value in a subsequent request to return the next page of
  // results. This field will be empty if there aren't any additional results.
  string next_page_token = 2;
 }
 message CreateVariantRequest {
  // The variant to be created.
  Variant variant = 1;
 }
 message UpdateVariantRequest {
  // The ID of the variant to be updated.
  string variant_id = 1;
  // The new variant data.
  Variant variant = 2;
  // An optional mask specifying which fields to update. At this time, mutable
  // fields are [names][google.genomics.v1.Variant.names] and
  // [info][google.genomics.v1.Variant.info]. Acceptable values are "names" and
  // "info". If unspecified, all mutable fields will be updated.
  google.protobuf.FieldMask update_mask = 3;
 }
 message DeleteVariantRequest {
  // The ID of the variant to be deleted.
  string variant_id = 1;
 }
 message GetVariantRequest {
  // The ID of the variant.
  string variant_id = 1;
 }
 message MergeVariantsRequest {
  // The destination variant set.
  string variant_set_id = 1;
  // The variants to be merged with existing variants.
  repeated Variant variants = 2;
  // A mapping between info field keys and the InfoMergeOperations to
  // be performed on them.
  map<string, InfoMergeOperation> info_merge_config = 3;
 }
 // The call set search request.
 message SearchCallSetsRequest {
  // Restrict the query to call sets within the given variant sets. At least one
  // ID must be provided.
  repeated string variant_set_ids = 1;
  // Only return call sets for which a substring of the name matches this
  // string.
  string name = 2;
  // The continuation token, which is used to page through large result sets.
  // To get the next page of results, set this parameter to the value of
  // `nextPageToken` from the previous response.
  string page_token = 3;
  // The maximum number of results to return in a single page. If unspecified,
  // defaults to 1024.
  int32 page_size = 4;
 }
 // The call set search response.
 message SearchCallSetsResponse {
  // The list of matching call sets.
  repeated CallSet call_sets = 1;
  // The continuation token, which is used to page through large result sets.
  // Provide this value in a subsequent request to return the next page of
  // results. This field will be empty if there aren't any additional results.
  string next_page_token = 2;
 }
 message CreateCallSetRequest {
  // The call set to be created.
  CallSet call_set = 1;
 }
 message UpdateCallSetRequest {
  // The ID of the call set to be updated.
  string call_set_id = 1;
  // The new call set data.
  CallSet call_set = 2;
  // An optional mask specifying which fields to update. At this time, the only
  // mutable field is [name][google.genomics.v1.CallSet.name]. The only
  // acceptable value is "name". If unspecified, all mutable fields will be
  // updated.
  google.protobuf.FieldMask update_mask = 3;
 }
 message DeleteCallSetRequest {
  // The ID of the call set to be deleted.
  string call_set_id = 1;
 }
 message GetCallSetRequest {
  // The ID of the call set.
  string call_set_id = 1;
 }
 // The stream variants request.
 message StreamVariantsRequest {
  // The Google Developers Console project ID or number which will be billed
  // for this access. The caller must have WRITE access to this project.
  // Required.
  string project_id = 1;
  // The variant set ID from which to stream variants.
  string variant_set_id = 2;
  // Only return variant calls which belong to call sets with these IDs.
  // Leaving this blank returns all variant calls.
  repeated string call_set_ids = 3;
  // Required. Only return variants in this reference sequence.
  string reference_name = 4;
  // The beginning of the window (0-based, inclusive) for which
  // overlapping variants should be returned.
  int64 start = 5;
  // The end of the window (0-based, exclusive) for which overlapping
  // variants should be returned.
  int64 end = 6;
 }
 message StreamVariantsResponse {
  repeated Variant variants = 1;
 }
 // Operations to be performed during import on Variant info fields.
 // These operations are set for each info field in the info_merge_config
 // map of ImportVariantsRequest, which is plumbed down to the
 // MergeVariantRequests generated by the import job.
 enum InfoMergeOperation {
  INFO_MERGE_OPERATION_UNSPECIFIED = 0;
  // By default, Variant info fields are persisted if the Variant doesn't
  // already exist in the variantset.  If the Variant is equivalent to a
  // Variant already in the variantset, the incoming Variant's info field
  // is ignored in favor of that of the already persisted Variant.
  IGNORE_NEW = 1;
  // This operation removes an info field from the incoming Variant
  // and persists this info field in each of the incoming Variant's Calls.
  MOVE_TO_CALLS = 2;
 }
--- a/google/genomics/v1alpha2/pipelines.proto
+++ b/google/genomics/v1alpha2/pipelines.proto
@ -0,0 +1,586 @@
 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 syntax = "proto3";
 package google.genomics.v1alpha2;
 import "google/api/annotations.proto";
 import "google/longrunning/operations.proto";
 import "google/protobuf/empty.proto";
 import "google/protobuf/timestamp.proto";
 import "google/rpc/code.proto";
 option cc_enable_arenas = true;
 option java_multiple_files = true;
 option java_outer_classname = "PipelinesProto";
 option java_package = "com.google.genomics.v1a";
 // A service for running genomics pipelines.
 service PipelinesV1Alpha2 {
  // Creates a pipeline that can be run later. Create takes a Pipeline that
  // has all fields other than `pipelineId` populated, and then returns
  // the same pipeline with `pipelineId` populated. This id can be used
  // to run the pipeline.
  //
  // Caller must have WRITE permission to the project.
  rpc CreatePipeline(CreatePipelineRequest) returns (Pipeline) {
    option (google.api.http) = { post: "/v1alpha2/pipelines" body: "pipeline" };
  }
  // Runs a pipeline. If `pipelineId` is specified in the request, then
  // run a saved pipeline. If `ephemeralPipeline` is specified, then run
  // that pipeline once without saving a copy.
  //
  // The caller must have READ permission to the project where the pipeline
  // is stored and WRITE permission to the project where the pipeline will be
  // run, as VMs will be created and storage will be used.
  rpc RunPipeline(RunPipelineRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = { post: "/v1alpha2/pipelines:run" body: "*" };
  }
  // Retrieves a pipeline based on ID.
  //
  // Caller must have READ permission to the project.
  rpc GetPipeline(GetPipelineRequest) returns (Pipeline) {
    option (google.api.http) = { get: "/v1alpha2/pipelines/{pipeline_id}" };
  }
  // Lists pipelines.
  //
  // Caller must have READ permission to the project.
  rpc ListPipelines(ListPipelinesRequest) returns (ListPipelinesResponse) {
    option (google.api.http) = { get: "/v1alpha2/pipelines" };
  }
  // Deletes a pipeline based on ID.
  //
  // Caller must have WRITE permission to the project.
  rpc DeletePipeline(DeletePipelineRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = { delete: "/v1alpha2/pipelines/{pipeline_id}" };
  }
  // Gets controller configuration information. Should only be called
  // by VMs created by the Pipelines Service and not by end users.
  rpc GetControllerConfig(GetControllerConfigRequest) returns (ControllerConfig) {
    option (google.api.http) = { get: "/v1alpha2/pipelines:getControllerConfig" };
  }
  // Sets status of a given operation. All timestamps are sent on each
  // call, and the whole series of events is replaced, in case
  // intermediate calls are lost. Should only be called by VMs created
  // by the Pipelines Service and not by end users.
  rpc SetOperationStatus(SetOperationStatusRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = { put: "/v1alpha2/pipelines:setOperationStatus" body: "*" };
  }
 }
 // Describes a GCE resource that is being managed by a running
 // [pipeline][google.genomics.v1alpha2.Pipeline].
 message GCE {
  // The instance on which the operation is running.
  string instance_name = 1;
  // The availability zone in which the instance resides.
  string zone = 2;
  // The machine type of the instance.
  string machine_type = 3;
  // The names of the disks that were created for this pipeline.
  repeated string disk_names = 4;
 }
 // Runtime metadata that will be populated in the
 // [runtimeMetadata][google.genomics.v1.OperationMetadata.runtime_metadata]
 // field of the Operation associated with a RunPipeline execution.
 message RuntimeMetadata {
  // Execution information specific to Google Compute Engine.
  GCE gce = 1;
 }
 // The pipeline object. Represents a transformation from a set of input
 // parameters to a set of output parameters. The transformation is defined
 // as a docker image and command to run within that image. Each pipeline
 // is run on a Google Compute Engine VM. A pipeline can be created with the
 // `create` method and then later run with the `run` method, or a pipeline can
 // be defined and run all at once with the `run` method.
 message Pipeline {
  // Required. The project in which to create the pipeline. The caller must have
  // WRITE access.
  string project_id = 1;
  // Required. A user specified pipeline name that does not have to be unique.
  // This name can be used for filtering Pipelines in ListPipelines.
  string name = 2;
  // User-specified description.
  string description = 3;
  // Input parameters of the pipeline.
  repeated PipelineParameter input_parameters = 8;
  // Output parameters of the pipeline.
  repeated PipelineParameter output_parameters = 9;
  // Required. The executor indicates in which environment the pipeline runs.
  oneof executor {
    // Specifies the docker run information.
    DockerExecutor docker = 5;
  }
  // Required. Specifies resource requirements for the pipeline run.
  // Required fields:
  //
  // *
  // [minimumCpuCores][google.genomics.v1alpha2.PipelineResources.minimum_cpu_cores]
  //
  // *
  // [minimumRamGb][google.genomics.v1alpha2.PipelineResources.minimum_ram_gb]
  PipelineResources resources = 6;
  // Unique pipeline id that is generated by the service when CreatePipeline
  // is called. Cannot be specified in the Pipeline used in the
  // CreatePipelineRequest, and will be populated in the response to
  // CreatePipeline and all subsequent Get and List calls. Indicates that the
  // service has registered this pipeline.
  string pipeline_id = 7;
 }
 // The request to create a pipeline. The pipeline field here should not have
 // `pipelineId` populated, as that will be populated by the server.
 message CreatePipelineRequest {
  // The pipeline to create. Should not have `pipelineId` populated.
  Pipeline pipeline = 1;
 }
 // The pipeline run arguments.
 message RunPipelineArgs {
  // Required. The project in which to run the pipeline. The caller must have
  // WRITER access to all Google Cloud services and resources (e.g. Google
  // Compute Engine) will be used.
  string project_id = 1;
  // Pipeline input arguments; keys are defined in the pipeline documentation.
  // All input parameters that do not have default values  must be specified.
  // If parameters with defaults are specified here, the defaults will be
  // overridden.
  map<string, string> inputs = 2;
  // Pipeline output arguments; keys are defined in the pipeline
  // documentation.  All output parameters of without default values
  // must be specified.  If parameters with defaults are specified
  // here, the defaults will be overridden.
  map<string, string> outputs = 3;
  // The Google Cloud Service Account that will be used to access data and
  // services. By default, the compute service account associated with
  // `projectId` is used.
  ServiceAccount service_account = 4;
  // Client-specified pipeline operation identifier.
  string client_id = 5;
  // Specifies resource requirements/overrides for the pipeline run.
  PipelineResources resources = 6;
  // Required. Logging options. Used by the service to communicate results
  // to the user.
  LoggingOptions logging = 7;
 }
 // The request to run a pipeline. If `pipelineId` is specified, it
 // refers to a saved pipeline created with CreatePipeline and set as
 // the `pipelineId` of the returned Pipeline object. If
 // `ephemeralPipeline` is specified, that pipeline is run once
 // with the given args and not saved. It is an error to specify both
 // `pipelineId` and `ephemeralPipeline`. `pipelineArgs`
 // must be specified.
 message RunPipelineRequest {
  oneof pipeline {
    // The already created pipeline to run.
    string pipeline_id = 1;
    // A new pipeline object to run once and then delete.
    Pipeline ephemeral_pipeline = 2;
  }
  // The arguments to use when running this pipeline.
  RunPipelineArgs pipeline_args = 3;
 }
 // A request to get a saved pipeline by id.
 message GetPipelineRequest {
  // Caller must have READ access to the project in which this pipeline
  // is defined.
  string pipeline_id = 1;
 }
 // A request to list pipelines in a given project. Pipelines can be
 // filtered by name using `namePrefix`: all pipelines with names that
 // begin with `namePrefix` will be returned. Uses standard pagination:
 // `pageSize` indicates how many pipelines to return, and
 // `pageToken` comes from a previous ListPipelinesResponse to
 // indicate offset.
 message ListPipelinesRequest {
  // Required. The name of the project to search for pipelines. Caller
  // must have READ access to this project.
  string project_id = 1;
  // Pipelines with names that match this prefix should be
  // returned.  If unspecified, all pipelines in the project, up to
  // `pageSize`, will be returned.
  string name_prefix = 2;
  // Number of pipelines to return at once. Defaults to 256, and max
  // is 2048.
  int32 page_size = 3;
  // Token to use to indicate where to start getting results.
  // If unspecified, returns the first page of results.
  string page_token = 4;
 }
 // The response of ListPipelines. Contains at most `pageSize`
 // pipelines. If it contains `pageSize` pipelines, and more pipelines
 // exist, then `nextPageToken` will be populated and should be
 // used as the `pageToken` argument to a subsequent ListPipelines
 // request.
 message ListPipelinesResponse {
  // The matched pipelines.
  repeated Pipeline pipelines = 1;
  // The token to use to get the next page of results.
  string next_page_token = 2;
 }
 // The request to delete a saved pipeline by ID.
 message DeletePipelineRequest {
  // Caller must have WRITE access to the project in which this pipeline
  // is defined.
  string pipeline_id = 1;
 }
 // Request to get controller configuation.  Should only be used
 // by VMs created by the Pipelines Service and not by end users.
 message GetControllerConfigRequest {
  // The operation to retrieve controller configuration for.
  string operation_id = 1;
  uint64 validation_token = 2;
 }
 // Stores the information that the controller will fetch from the
 // server in order to run. Should only be used by VMs created by the
 // Pipelines Service and not by end users.
 message ControllerConfig {
  message RepeatedString {
    repeated string values = 1;
  }
  string image = 1;
  string cmd = 2;
  string gcs_log_path = 3;
  string machine_type = 4;
  map<string, string> vars = 5;
  map<string, string> disks = 6;
  map<string, RepeatedString> gcs_sources = 7;
  map<string, RepeatedString> gcs_sinks = 8;
 }
 // Stores the list of events and times they occured for major events in job
 // execution.
 message TimestampEvent {
  // String indicating the type of event
  string description = 1;
  // The time this event occured.
  google.protobuf.Timestamp timestamp = 2;
 }
 // Request to set operation status. Should only be used by VMs
 // created by the Pipelines Service and not by end users.
 message SetOperationStatusRequest {
  string operation_id = 1;
  repeated TimestampEvent timestamp_events = 2;
  google.rpc.Code error_code = 3;
  string error_message = 4;
  uint64 validation_token = 5;
 }
 // A Google Cloud Service Account.
 message ServiceAccount {
  // Email address of the service account. Defaults to `default`,
  // which uses the compute service account associated with the project.
  string email = 1;
  // List of scopes to be enabled for this service account on the
  // pipeline virtual machine.
  // The following scopes are automatically included:
  // * https://www.googleapis.com/auth/genomics
  // * https://www.googleapis.com/auth/compute
  // * https://www.googleapis.com/auth/devstorage.full_control
  repeated string scopes = 2;
 }
 // The logging options for the pipeline run.
 message LoggingOptions {
  // The location in Google Cloud Storage to which the pipeline logs
  // will be copied. Can be specified as a fully qualified directory
  // path, in which case logs will be output with a unique identifier
  // as the filename in that directory, or as a fully specified path,
  // which must end in `.log`, in which case that path will be
  // used, and the user must ensure that logs are not
  // overwritten. Stdout and stderr logs from the run are also
  // generated and output as `-stdout.log` and `-stderr.log`.
  string gcs_path = 1;
 }
 // The system resources for the pipeline run.
 message PipelineResources {
  // A Google Compute Engine disk resource specification.
  message Disk {
    // The types of disks that may be attached to VMs.
    enum Type {
      // Default disk type. Use one of the other options below.
      TYPE_UNSPECIFIED = 0;
      // Specifies a Google Compute Engine persistent hard disk. See
      // https://cloud.google.com/compute/docs/disks/persistent-disks#typeofdisks
      // for details.
      PERSISTENT_HDD = 1;
      // Specifies a Google Compute Engine persistent solid-state disk. See
      // https://cloud.google.com/compute/docs/disks/persistent-disks#typeofdisks
      // for details.
      PERSISTENT_SSD = 2;
      // Specifies a Google Compute Engine local SSD.
      // See https://cloud.google.com/compute/docs/disks/local-ssd for details.
      LOCAL_SSD = 3;
    }
    // Required. The name of the disk that can be used in the pipeline
    // parameters. Must be 1 - 63 characters.
    // The name "boot" is reserved for system use.
    string name = 1;
    // Required. The type of the disk to create.
    Type type = 2;
    // The size of the disk. Defaults to 500 (GB).
    // This field is not applicable for local SSD.
    int32 size_gb = 3;
    // The full or partial URL of the persistent disk to attach. See
    // https://cloud.google.com/compute/docs/reference/latest/instances#resource
    // and
    // https://cloud.google.com/compute/docs/disks/persistent-disks#snapshots
    // for more details.
    string source = 4;
    // Specifies whether or not to delete the disk when the pipeline
    // completes. This field is applicable only for newly created disks. See
    // https://cloud.google.com/compute/docs/reference/latest/instances#resource
    // for more details.
    // By default, `autoDelete` is `false`. `autoDelete` will be enabled if set
    // to `true` at create time or run time.
    bool auto_delete = 6;
    // Specifies how a sourced-base persistent disk will be mounted. See
    // https://cloud.google.com/compute/docs/disks/persistent-disks#use_multi_instances
    // for more details.
    // Can only be set at create time.
    bool read_only = 7;
    // Required at create time and cannot be overridden at run time.
    // Specifies the path in the docker container where files on
    // this disk should be located. For example, if `mountPoint`
    // is `/mnt/disk`, and the parameter has `localPath`
    // `inputs/file.txt`, the docker container can access the data at
    // `/mnt/disk/inputs/file.txt`.
    string mount_point = 8;
  }
  // The minimum number of cores to use. Defaults to 1.
  int32 minimum_cpu_cores = 1;
  // At create time means that preemptible machines may be
  // used for the run. At run time, means they should be used. Cannot
  // be true at run time if false at create time.
  // Defaults to `false`.
  bool preemptible = 2;
  // The minimum amount of RAM to use. Defaults to 3.75 (GB)
  double minimum_ram_gb = 3;
  // Disks to attach.
  repeated Disk disks = 4;
  // List of Google Compute Engine availability zones to which resource
  // creation will restricted. If empty, any zone may be chosen.
  repeated string zones = 5;
  // The size of the boot disk. Defaults to 10 (GB).
  int32 boot_disk_size_gb = 6;
 }
 // Parameters facilitate setting and delivering data into the
 // pipeline's execution environment. They are defined at create time,
 // with optional defaults, and can be overridden at run time.
 //
 // If `localCopy` is unset, then the parameter specifies a string that
 // is passed as-is into the pipeline, as the value of the environment
 // variable with the given name.  A default value can be optionally
 // specified at create time. The default can be overridden at run time
 // using the inputs map. If no default is given, a value must be
 // supplied at runtime.
 //
 // If `localCopy` is defined, then the parameter specifies a data
 // source or sink, both in Google Cloud Storage and on the Docker container
 // where the pipeline computation is run. The [service account associated with
 // the Pipeline][google.genomics.v1alpha2.RunPipelineArgs.service_account] (by
 // default the project's Compute Engine service account) must have access to the
 // Google Cloud Storage paths.
 //
 // At run time, the Google Cloud Storage paths can be overridden if a default
 // was provided at create time, or must be set otherwise. The pipeline runner
 // should add a key/value pair to either the inputs or outputs map. The
 // indicated data copies will be carried out before/after pipeline execution,
 // just as if the corresponding arguments were provided to `gsutil cp`.
 //
 // For example: Given the following `PipelineParameter`, specified
 // in the `inputParameters` list:
 //
 // ```
 // {name: "input_file", localCopy: {path: "file.txt", disk: "pd1"}}
 // ```
 //
 // where `disk` is defined in the `PipelineResources` object as:
 //
 // ```
 // {name: "pd1", mountPoint: "/mnt/disk/"}
 // ```
 //
 // We create a disk named `pd1`, mount it on the host VM, and map
 // `/mnt/pd1` to `/mnt/disk` in the docker container.  At
 // runtime, an entry for `input_file` would be required in the inputs
 // map, such as:
 //
 // ```
 //   inputs["input_file"] = "gs://my-bucket/bar.txt"
 // ```
 //
 // This would generate the following gsutil call:
 //
 // ```
 //   gsutil cp gs://my-bucket/bar.txt /mnt/pd1/file.txt
 // ```
 //
 // The file `/mnt/pd1/file.txt` maps to `/mnt/disk/file.txt` in the
 // Docker container. Acceptable paths are:
 //
 // <table>
 //   <thead>
 //     <tr><th>Google Cloud storage path</th><th>Local path</th></tr>
 //   </thead>
 //   <tbody>
 //     <tr><td>file</td><td>file</td></tr>
 //     <tr><td>glob</td><td>directory</td></tr>
 //   </tbody>
 // </table>
 //
 // For outputs, the direction of the copy is reversed:
 //
 // ```
 //   gsutil cp /mnt/disk/file.txt gs://my-bucket/bar.txt
 // ```
 //
 // Acceptable paths are:
 //
 // <table>
 //   <thead>
 //     <tr><th>Local path</th><th>Google Cloud Storage path</th></tr>
 //   </thead>
 //   <tbody>
 //     <tr><td>file</td><td>file</td></tr>
 //     <tr>
 //       <td>file</td>
 //       <td>directory - directory must already exist</td>
 //     </tr>
 //     <tr>
 //       <td>glob</td>
 //       <td>directory - directory will be created if it doesn't exist</td></tr>
 //   </tbody>
 // </table>
 //
 // One restriction due to docker limitations, is that for outputs that are found
 // on the boot disk, the local path cannot be a glob and must be a file.
 message PipelineParameter {
  // LocalCopy defines how a remote file should be copied to and from the VM.
  message LocalCopy {
    // Required. The path within the user's docker container where
    // this input should be localized to and from, relative to the specified
    // disk's mount point. For example: file.txt,
    string path = 1;
    // Required. The name of the disk where this parameter is
    // located. Can be the name of one of the disks specified in the
    // Resources field, or "boot", which represents the Docker
    // instance's boot disk and has a mount point of `/`.
    string disk = 2;
  }
  // Required. Name of the parameter - the pipeline runner uses this string
  // as the key to the input and output maps in RunPipeline.
  string name = 1;
  // Human-readable description.
  string description = 2;
  // The default value for this parameter. Can be overridden at runtime.
  // If `localCopy` is present, then this must be a Google Cloud Storage path
  // beginning with `gs://`.
  string default_value = 5;
  // If present, this parameter is marked for copying to and from the VM.
  // `LocalCopy` indicates where on the VM the file should be. The value
  // given to this parameter (either at runtime or using `defaultValue`)
  // must be the remote path where the file should be.
  LocalCopy local_copy = 6;
 }
 // The Docker execuctor specification.
 message DockerExecutor {
  // Required. Image name from either Docker Hub or Google Container Repository.
  // Users that run pipelines must have READ access to the image.
  string image_name = 1;
  // Required. The command string to run. Parameters that do not have
  // `localCopy` specified should be used as environment variables, while
  // those that do can be accessed at the defined paths.
  string cmd = 2;
 }