import protos for google genomics API.

9 years ago · 7e973fab0e
parent b042947963
commit 7e973fab0e
14 changed files with 3741 additions and 0 deletions
--- a/google/genomics/README.md
+++ b/google/genomics/README.md
@ -0,0 +1,14 @@
+Stores, processes, explores and shares genomic data. This API implements
+the Global Alliance for Genomics and Health (GA4GH) v0.5.1 API as well as
+several extensions.
+
+The Google Genomics API supports access via both
+[JSON/REST](https://cloud.google.com/genomics/reference/rest) and
+[gRPC](https://cloud.google.com/genomics/reference/rpc). JSON/REST is more
+broadly available and is easier for getting started with Google Genomics; it
+works well for small metadata resources (datasets, variant sets, read group
+sets) and for browsing small genomic regions for datasets of any size. For
+performant bulk data access (reads and variants), use gRPC.
+
+See also an [overview of genomic resources](https://cloud.google.com/genomics/v1/users-guide)
+and an overview of [Genomics on Google Cloud](https://cloud.google.com/genomics/overview).
--- a/google/genomics/v1/annotations.proto
+++ b/google/genomics/v1/annotations.proto
@ -0,0 +1,662 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.genomics.v1;
+
+import "google/api/annotations.proto";
+import "google/protobuf/empty.proto";
+import "google/protobuf/field_mask.proto";
+import "google/protobuf/struct.proto";
+import "google/protobuf/wrappers.proto";
+import "google/rpc/status.proto";
+
+option cc_enable_arenas = true;
+option java_multiple_files = true;
+option java_outer_classname = "AnnotationsProto";
+option java_package = "com.google.genomics.v1";
+
+
+// This service provides storage and positional retrieval of genomic
+// reference annotations, including variant annotations.
+service AnnotationServiceV1 {
+  // Creates a new annotation set. Caller must have WRITE permission for the
+  // associated dataset.
+  //
+  // The following fields are required:
+  //
+  //   * [datasetId][google.genomics.v1.AnnotationSet.dataset_id]
+  //   * [referenceSetId][google.genomics.v1.AnnotationSet.reference_set_id]
+  //
+  // All other fields may be optionally specified, unless documented as being
+  // server-generated (for example, the `id` field).
+  rpc CreateAnnotationSet(CreateAnnotationSetRequest) returns (AnnotationSet) {
+    option (google.api.http) = { post: "/v1/annotationsets" body: "annotation_set" };
+  }
+
+  // Gets an annotation set. Caller must have READ permission for
+  // the associated dataset.
+  rpc GetAnnotationSet(GetAnnotationSetRequest) returns (AnnotationSet) {
+    option (google.api.http) = { get: "/v1/annotationsets/{annotation_set_id}" };
+  }
+
+  // Updates an annotation set. The update must respect all mutability
+  // restrictions and other invariants described on the annotation set resource.
+  // Caller must have WRITE permission for the associated dataset.
+  rpc UpdateAnnotationSet(UpdateAnnotationSetRequest) returns (AnnotationSet) {
+    option (google.api.http) = { put: "/v1/annotationsets/{annotation_set_id}" body: "annotation_set" };
+  }
+
+  // Deletes an annotation set. Caller must have WRITE permission
+  // for the associated annotation set.
+  rpc DeleteAnnotationSet(DeleteAnnotationSetRequest) returns (google.protobuf.Empty) {
+    option (google.api.http) = { delete: "/v1/annotationsets/{annotation_set_id}" };
+  }
+
+  // Searches for annotation sets that match the given criteria. Annotation sets
+  // are returned in an unspecified order. This order is consistent, such that
+  // two queries for the same content (regardless of page size) yield annotation
+  // sets in the same order across their respective streams of paginated
+  // responses. Caller must have READ permission for the queried datasets.
+  rpc SearchAnnotationSets(SearchAnnotationSetsRequest) returns (SearchAnnotationSetsResponse) {
+    option (google.api.http) = { post: "/v1/annotationsets/search" body: "*" };
+  }
+
+  // Creates a new annotation. Caller must have WRITE permission
+  // for the associated annotation set.
+  //
+  // The following fields are required:
+  //
+  // * [annotationSetId][google.genomics.v1.Annotation.annotation_set_id]
+  // * [referenceName][google.genomics.v1.Annotation.reference_name] or
+  //   [referenceId][google.genomics.v1.Annotation.reference_id]
+  //
+  // ### Transcripts
+  //
+  // For annotations of type TRANSCRIPT, the following fields of
+  // [transcript][google.genomics.v1.Annotation.transcript] must be provided:
+  //
+  // * [exons.start][google.genomics.v1.Transcript.Exon.start]
+  // * [exons.end][google.genomics.v1.Transcript.Exon.end]
+  //
+  // All other fields may be optionally specified, unless documented as being
+  // server-generated (for example, the `id` field). The annotated
+  // range must be no longer than 100Mbp (mega base pairs). See the
+  // [Annotation resource][google.genomics.v1.Annotation]
+  // for additional restrictions on each field.
+  rpc CreateAnnotation(CreateAnnotationRequest) returns (Annotation) {
+    option (google.api.http) = { post: "/v1/annotations" body: "annotation" };
+  }
+
+  // Creates one or more new annotations atomically. All annotations must
+  // belong to the same annotation set. Caller must have WRITE
+  // permission for this annotation set. For optimal performance, batch
+  // positionally adjacent annotations together.
+  //
+  // If the request has a systemic issue, such as an attempt to write to
+  // an inaccessible annotation set, the entire RPC will fail accordingly. For
+  // lesser data issues, when possible an error will be isolated to the
+  // corresponding batch entry in the response; the remaining well formed
+  // annotations will be created normally.
+  //
+  // For details on the requirements for each individual annotation resource,
+  // see
+  // [CreateAnnotation][google.genomics.v1.AnnotationServiceV1.CreateAnnotation].
+  rpc BatchCreateAnnotations(BatchCreateAnnotationsRequest) returns (BatchCreateAnnotationsResponse) {
+    option (google.api.http) = { post: "/v1/annotations:batchCreate" body: "*" };
+  }
+
+  // Gets an annotation. Caller must have READ permission
+  // for the associated annotation set.
+  rpc GetAnnotation(GetAnnotationRequest) returns (Annotation) {
+    option (google.api.http) = { get: "/v1/annotations/{annotation_id}" };
+  }
+
+  // Updates an annotation. Caller must have
+  // WRITE permission for the associated dataset.
+  rpc UpdateAnnotation(UpdateAnnotationRequest) returns (Annotation) {
+    option (google.api.http) = { put: "/v1/annotations/{annotation_id}" body: "annotation" };
+  }
+
+  // Deletes an annotation. Caller must have WRITE permission for
+  // the associated annotation set.
+  rpc DeleteAnnotation(DeleteAnnotationRequest) returns (google.protobuf.Empty) {
+    option (google.api.http) = { delete: "/v1/annotations/{annotation_id}" };
+  }
+
+  // Searches for annotations that match the given criteria. Results are
+  // ordered by genomic coordinate (by reference sequence, then position).
+  // Annotations with equivalent genomic coordinates are returned in an
+  // unspecified order. This order is consistent, such that two queries for the
+  // same content (regardless of page size) yield annotations in the same order
+  // across their respective streams of paginated responses. Caller must have
+  // READ permission for the queried annotation sets.
+  rpc SearchAnnotations(SearchAnnotationsRequest) returns (SearchAnnotationsResponse) {
+    option (google.api.http) = { post: "/v1/annotations/search" body: "*" };
+  }
+}
+
+// An annotation set is a logical grouping of annotations that share consistent
+// type information and provenance. Examples of annotation sets include 'all
+// genes from refseq', and 'all variant annotations from ClinVar'.
+message AnnotationSet {
+  // The server-generated annotation set ID, unique across all annotation sets.
+  string id = 1;
+
+  // The dataset to which this annotation set belongs.
+  string dataset_id = 2;
+
+  // The ID of the reference set that defines the coordinate space for this
+  // set's annotations.
+  string reference_set_id = 3;
+
+  // The display name for this annotation set.
+  string name = 4;
+
+  // The source URI describing the file from which this annotation set was
+  // generated, if any.
+  string source_uri = 5;
+
+  // The type of annotations contained within this set.
+  AnnotationType type = 6;
+
+  // A map of additional read alignment information. This must be of the form
+  // map<string, string[]> (string key mapping to a list of string values).
+  map<string, google.protobuf.ListValue> info = 17;
+}
+
+// An annotation describes a region of reference genome. The value of an
+// annotation may be one of several canonical types, supplemented by arbitrary
+// info tags. An annotation is not inherently associated with a specific
+// sample or individual (though a client could choose to use annotations in
+// this way). Example canonical annotation types are `GENE` and
+// `VARIANT`.
+message Annotation {
+  // The server-generated annotation ID, unique across all annotations.
+  string id = 1;
+
+  // The annotation set to which this annotation belongs.
+  string annotation_set_id = 2;
+
+  // The display name of this annotation.
+  string name = 3;
+
+  // The ID of the Google Genomics reference associated with this range.
+  string reference_id = 4;
+
+  // The display name corresponding to the reference specified by
+  // `referenceId`, for example `chr1`, `1`, or `chrX`.
+  string reference_name = 5;
+
+  // The start position of the range on the reference, 0-based inclusive.
+  int64 start = 6;
+
+  // The end position of the range on the reference, 0-based exclusive.
+  int64 end = 7;
+
+  // Whether this range refers to the reverse strand, as opposed to the forward
+  // strand. Note that regardless of this field, the start/end position of the
+  // range always refer to the forward strand.
+  bool reverse_strand = 8;
+
+  // The data type for this annotation. Must match the containing annotation
+  // set's type.
+  AnnotationType type = 9;
+
+  oneof value {
+    // A variant annotation, which describes the effect of a variant on the
+    // genome, the coding sequence, and/or higher level consequences at the
+    // organism level e.g. pathogenicity. This field is only set for annotations
+    // of type `VARIANT`.
+    VariantAnnotation variant = 10;
+
+    // A transcript value represents the assertion that a particular region of
+    // the reference genome may be transcribed as RNA. An alternative splicing
+    // pattern would be represented as a separate transcript object. This field
+    // is only set for annotations of type `TRANSCRIPT`.
+    Transcript transcript = 11;
+  }
+
+  // A map of additional read alignment information. This must be of the form
+  // map<string, string[]> (string key mapping to a list of string values).
+  map<string, google.protobuf.ListValue> info = 12;
+}
+
+message VariantAnnotation {
+  message ClinicalCondition {
+    // A set of names for the condition.
+    repeated string names = 1;
+
+    // The set of external IDs for this condition.
+    repeated ExternalId external_ids = 2;
+
+    // The MedGen concept id associated with this gene.
+    // Search for these IDs at http://www.ncbi.nlm.nih.gov/medgen/
+    string concept_id = 3;
+
+    // The OMIM id for this condition.
+    // Search for these IDs at http://omim.org/
+    string omim_id = 4;
+  }
+
+  enum Type {
+    TYPE_UNSPECIFIED = 0;
+
+    // `TYPE_OTHER` should be used when no other Type will suffice.
+    // Further explanation of the variant type may be included in the
+    // [info][google.genomics.v1.Annotation.info] field.
+    TYPE_OTHER = 1;
+
+    // `INSERTION` indicates an insertion.
+    INSERTION = 2;
+
+    // `DELETION` indicates a deletion.
+    DELETION = 3;
+
+    // `SUBSTITUTION` indicates a block substitution of
+    // two or more nucleotides.
+    SUBSTITUTION = 4;
+
+    // `SNP` indicates a single nucleotide polymorphism.
+    SNP = 5;
+
+    // `STRUCTURAL` indicates a large structural variant,
+    // including chromosomal fusions, inversions, etc.
+    STRUCTURAL = 6;
+
+    // `CNV` indicates a variation in copy number.
+    CNV = 7;
+  }
+
+  enum Effect {
+    EFFECT_UNSPECIFIED = 0;
+
+    // `EFFECT_OTHER` should be used when no other Effect
+    // will suffice.
+    EFFECT_OTHER = 1;
+
+    // `FRAMESHIFT` indicates a mutation in which the insertion or
+    // deletion of nucleotides resulted in a frameshift change.
+    FRAMESHIFT = 2;
+
+    // `FRAME_PRESERVING_INDEL` indicates a mutation in which a
+    // multiple of three nucleotides has been inserted or deleted, resulting
+    // in no change to the reading frame of the coding sequence.
+    FRAME_PRESERVING_INDEL = 3;
+
+    // `SYNONYMOUS_SNP` indicates a single nucleotide polymorphism
+    // mutation that results in no amino acid change.
+    SYNONYMOUS_SNP = 4;
+
+    // `NONSYNONYMOUS_SNP` indicates a single nucleotide
+    // polymorphism mutation that results in an amino acid change.
+    NONSYNONYMOUS_SNP = 5;
+
+    // `STOP_GAIN` indicates a mutation that leads to the creation
+    // of a stop codon at the variant site. Frameshift mutations creating
+    // downstream stop codons do not count as `STOP_GAIN`.
+    STOP_GAIN = 6;
+
+    // `STOP_LOSS` indicates a mutation that eliminates a
+    // stop codon at the variant site.
+    STOP_LOSS = 7;
+
+    // `SPLICE_SITE_DISRUPTION` indicates that this variant is
+    // found in a splice site for the associated transcript, and alters the
+    // normal splicing pattern.
+    SPLICE_SITE_DISRUPTION = 8;
+  }
+
+  enum ClinicalSignificance {
+    CLINICAL_SIGNIFICANCE_UNSPECIFIED = 0;
+
+    // `OTHER` should be used when no other clinical significance
+    // value will suffice.
+    CLINICAL_SIGNIFICANCE_OTHER = 1;
+
+    UNCERTAIN = 2;
+
+    BENIGN = 3;
+
+    LIKELY_BENIGN = 4;
+
+    LIKELY_PATHOGENIC = 5;
+
+    PATHOGENIC = 6;
+
+    DRUG_RESPONSE = 7;
+
+    HISTOCOMPATIBILITY = 8;
+
+    CONFERS_SENSITIVITY = 9;
+
+    RISK_FACTOR = 10;
+
+    ASSOCIATION = 11;
+
+    PROTECTIVE = 12;
+
+    // `MULTIPLE_REPORTED` should be used when multiple clinical
+    // signficances are reported for a variant. The original clinical
+    // significance values may be provided in the `info` field.
+    MULTIPLE_REPORTED = 13;
+  }
+
+  // Type has been adapted from ClinVar's list of variant types.
+  Type type = 1;
+
+  // Effect of the variant on the coding sequence.
+  Effect effect = 2;
+
+  // The alternate allele for this variant. If multiple alternate alleles
+  // exist at this location, create a separate variant for each one, as they
+  // may represent distinct conditions.
+  string alternate_bases = 3;
+
+  // Google annotation ID of the gene affected by this variant. This should
+  // be provided when the variant is created.
+  string gene_id = 4;
+
+  // Google annotation IDs of the transcripts affected by this variant. These
+  // should be provided when the variant is created.
+  repeated string transcript_ids = 5;
+
+  // The set of conditions associated with this variant.
+  // A condition describes the way a variant influences human health.
+  repeated ClinicalCondition conditions = 6;
+
+  // Describes the clinical significance of a variant.
+  // It is adapted from the ClinVar controlled vocabulary for clinical
+  // significance described at:
+  // http://www.ncbi.nlm.nih.gov/clinvar/docs/clinsig/
+  ClinicalSignificance clinical_significance = 7;
+}
+
+// A transcript represents the assertion that a particular region of the
+// reference genome may be transcribed as RNA.
+message Transcript {
+  message Exon {
+    // The start position of the exon on this annotation's reference sequence,
+    // 0-based inclusive. Note that this is relative to the reference start, and
+    // **not** the containing annotation start.
+    int64 start = 1;
+
+    // The end position of the exon on this annotation's reference sequence,
+    // 0-based exclusive. Note that this is relative to the reference start, and
+    // *not* the containing annotation start.
+    int64 end = 2;
+
+    // The frame of this exon. Contains a value of 0, 1, or 2, which indicates
+    // the offset of the first coding base of the exon within the reading frame
+    // of the coding DNA sequence, if any. This field is dependent on the
+    // strandedness of this annotation (see
+    // [Annotation.reverse_strand][google.genomics.v1.Annotation.reverse_strand]).
+    // For forward stranded annotations, this offset is relative to the
+    // [exon.start][google.genomics.v1.Transcript.Exon.start]. For reverse
+    // strand annotations, this offset is relative to the
+    // [exon.end][google.genomics.v1.Transcript.Exon.end] `- 1`.
+    //
+    // Unset if this exon does not intersect the coding sequence. Upon creation
+    // of a transcript, the frame must be populated for all or none of the
+    // coding exons.
+    google.protobuf.Int32Value frame = 3;
+  }
+
+  message CodingSequence {
+    // The start of the coding sequence on this annotation's reference sequence,
+    // 0-based inclusive. Note that this position is relative to the reference
+    // start, and *not* the containing annotation start.
+    int64 start = 1;
+
+    // The end of the coding sequence on this annotation's reference sequence,
+    // 0-based exclusive. Note that this position is relative to the reference
+    // start, and *not* the containing annotation start.
+    int64 end = 2;
+  }
+
+  // The annotation ID of the gene from which this transcript is transcribed.
+  string gene_id = 1;
+
+  // The <a href="http://en.wikipedia.org/wiki/Exon">exons</a> that compose
+  // this transcript. This field should be unset for genomes where transcript
+  // splicing does not occur, for example prokaryotes.
+  //
+  // Introns are regions of the transcript that are not included in the
+  // spliced RNA product. Though not explicitly modeled here, intron ranges can
+  // be deduced; all regions of this transcript that are not exons are introns.
+  //
+  // Exonic sequences do not necessarily code for a translational product
+  // (amino acids). Only the regions of exons bounded by the
+  // [codingSequence][google.genomics.v1.Transcript.coding_sequence] correspond
+  // to coding DNA sequence.
+  //
+  // Exons are ordered by start position and may not overlap.
+  repeated Exon exons = 2;
+
+  // The range of the coding sequence for this transcript, if any. To determine
+  // the exact ranges of coding sequence, intersect this range with those of the
+  // [exons][google.genomics.v1.Transcript.exons], if any. If there are any
+  // [exons][google.genomics.v1.Transcript.exons], the
+  // [codingSequence][google.genomics.v1.Transcript.coding_sequence] must start
+  // and end within them.
+  //
+  // Note that in some cases, the reference genome will not exactly match the
+  // observed mRNA transcript e.g. due to variance in the source genome from
+  // reference. In these cases,
+  // [exon.frame][google.genomics.v1.Transcript.Exon.frame] will not necessarily
+  // match the expected reference reading frame and coding exon reference bases
+  // cannot necessarily be concatenated to produce the original transcript mRNA.
+  CodingSequence coding_sequence = 3;
+}
+
+message ExternalId {
+  // The name of the source of this data.
+  string source_name = 1;
+
+  // The id used by the source of this data.
+  string id = 2;
+}
+
+message CreateAnnotationSetRequest {
+  // The annotation set to create.
+  AnnotationSet annotation_set = 1;
+}
+
+message GetAnnotationSetRequest {
+  // The ID of the annotation set to be retrieved.
+  string annotation_set_id = 1;
+}
+
+message UpdateAnnotationSetRequest {
+  // The ID of the annotation set to be updated.
+  string annotation_set_id = 1;
+
+  // The new annotation set.
+  AnnotationSet annotation_set = 2;
+
+  // An optional mask specifying which fields to update. Mutable fields are
+  // [name][google.genomics.v1.AnnotationSet.name],
+  // [source_uri][google.genomics.v1.AnnotationSet.source_uri], and
+  // [info][google.genomics.v1.AnnotationSet.info]. If unspecified, all
+  // mutable fields will be updated.
+  google.protobuf.FieldMask update_mask = 3;
+}
+
+message DeleteAnnotationSetRequest {
+  // The ID of the annotation set to be deleted.
+  string annotation_set_id = 1;
+}
+
+message SearchAnnotationSetsRequest {
+  // Required. The dataset IDs to search within. Caller must have `READ` access
+  // to these datasets.
+  repeated string dataset_ids = 1;
+
+  // If specified, only annotation sets associated with the given reference set
+  // are returned.
+  string reference_set_id = 2;
+
+  // Only return annotations sets for which a substring of the name matches this
+  // string (case insensitive).
+  string name = 3;
+
+  // If specified, only annotation sets that have any of these types are
+  // returned.
+  repeated AnnotationType types = 4;
+
+  // The continuation token, which is used to page through large result sets.
+  // To get the next page of results, set this parameter to the value of
+  // `nextPageToken` from the previous response.
+  string page_token = 5;
+
+  // The maximum number of results to return in a single page. If unspecified,
+  // defaults to 128. The maximum value is 1024.
+  int32 page_size = 6;
+}
+
+message SearchAnnotationSetsResponse {
+  // The matching annotation sets.
+  repeated AnnotationSet annotation_sets = 1;
+
+  // The continuation token, which is used to page through large result sets.
+  // Provide this value in a subsequent request to return the next page of
+  // results. This field will be empty if there aren't any additional results.
+  string next_page_token = 2;
+}
+
+message CreateAnnotationRequest {
+  // The annotation to be created.
+  Annotation annotation = 1;
+}
+
+message BatchCreateAnnotationsRequest {
+  // The annotations to be created. At most 4096 can be specified in a single
+  // request.
+  repeated Annotation annotations = 1;
+}
+
+message BatchCreateAnnotationsResponse {
+  message Entry {
+    // The creation status.
+    google.rpc.Status status = 1;
+
+    // The created annotation, if creation was successful.
+    Annotation annotation = 2;
+  }
+
+  // The resulting per-annotation entries, ordered consistently with the
+  // original request.
+  repeated Entry entries = 1;
+}
+
+message GetAnnotationRequest {
+  // The ID of the annotation to be retrieved.
+  string annotation_id = 1;
+}
+
+message UpdateAnnotationRequest {
+  // The ID of the annotation to be updated.
+  string annotation_id = 1;
+
+  // The new annotation.
+  Annotation annotation = 2;
+
+  // An optional mask specifying which fields to update. Mutable fields are
+  // [name][google.genomics.v1.Annotation.name],
+  // [variant][google.genomics.v1.Annotation.variant],
+  // [transcript][google.genomics.v1.Annotation.transcript], and
+  // [info][google.genomics.v1.Annotation.info]. If unspecified, all mutable
+  // fields will be updated.
+  google.protobuf.FieldMask update_mask = 3;
+}
+
+message DeleteAnnotationRequest {
+  // The ID of the annotation to be deleted.
+  string annotation_id = 1;
+}
+
+message SearchAnnotationsRequest {
+  // Required. The annotation sets to search within. The caller must have
+  // `READ` access to these annotation sets.
+  // All queried annotation sets must have the same type.
+  repeated string annotation_set_ids = 1;
+
+  // Required. `reference_id` or `reference_name` must be set.
+  oneof reference {
+    // The ID of the reference to query.
+    string reference_id = 2;
+
+    // The name of the reference to query, within the reference set associated
+    // with this query.
+    string reference_name = 3;
+  }
+
+  // The start position of the range on the reference, 0-based inclusive. If
+  // specified,
+  // [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or
+  // [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name]
+  // must be specified. Defaults to 0.
+  int64 start = 4;
+
+  // The end position of the range on the reference, 0-based exclusive. If
+  // [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or
+  // [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name]
+  // must be specified, Defaults to the length of the reference.
+  int64 end = 5;
+
+  // The continuation token, which is used to page through large result sets.
+  // To get the next page of results, set this parameter to the value of
+  // `nextPageToken` from the previous response.
+  string page_token = 6;
+
+  // The maximum number of results to return in a single page. If unspecified,
+  // defaults to 256. The maximum value is 2048.
+  int32 page_size = 7;
+}
+
+message SearchAnnotationsResponse {
+  // The matching annotations.
+  repeated Annotation annotations = 1;
+
+  // The continuation token, which is used to page through large result sets.
+  // Provide this value in a subsequent request to return the next page of
+  // results. This field will be empty if there aren't any additional results.
+  string next_page_token = 2;
+}
+
+// When an [Annotation][google.genomics.v1.Annotation] or
+// [AnnotationSet][google.genomics.v1.AnnotationSet] is created, if `type` is
+// not specified it will be set to `GENERIC`.
+enum AnnotationType {
+  ANNOTATION_TYPE_UNSPECIFIED = 0;
+
+  // A `GENERIC` annotation type should be used when no other annotation
+  // type will suffice. This represents an untyped annotation of the reference
+  // genome.
+  GENERIC = 1;
+
+  // A `VARIANT` annotation type.
+  VARIANT = 2;
+
+  // A `GENE` annotation type represents the existence of a gene at the
+  // associated reference coordinates. The start coordinate is typically the
+  // gene's transcription start site and the end is typically the end of the
+  // gene's last exon.
+  GENE = 3;
+
+  // A `TRANSCRIPT` annotation type represents the assertion that a
+  // particular region of the reference genome may be transcribed as RNA.
+  TRANSCRIPT = 4;
+}
--- a/google/genomics/v1/cigar.proto
+++ b/google/genomics/v1/cigar.proto
@ -0,0 +1,98 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.genomics.v1;
+
+import "google/api/annotations.proto";
+
+option cc_enable_arenas = true;
+option java_multiple_files = true;
+option java_outer_classname = "CigarProto";
+option java_package = "com.google.genomics.v1";
+
+
+// A single CIGAR operation.
+message CigarUnit {
+  // Describes the different types of CIGAR alignment operations that exist.
+  // Used wherever CIGAR alignments are used.
+  enum Operation {
+    OPERATION_UNSPECIFIED = 0;
+
+    // An alignment match indicates that a sequence can be aligned to the
+    // reference without evidence of an INDEL. Unlike the
+    // `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators,
+    // the `ALIGNMENT_MATCH` operator does not indicate whether the
+    // reference and read sequences are an exact match. This operator is
+    // equivalent to SAM's `M`.
+    ALIGNMENT_MATCH = 1;
+
+    // The insert operator indicates that the read contains evidence of bases
+    // being inserted into the reference. This operator is equivalent to SAM's
+    // `I`.
+    INSERT = 2;
+
+    // The delete operator indicates that the read contains evidence of bases
+    // being deleted from the reference. This operator is equivalent to SAM's
+    // `D`.
+    DELETE = 3;
+
+    // The skip operator indicates that this read skips a long segment of the
+    // reference, but the bases have not been deleted. This operator is commonly
+    // used when working with RNA-seq data, where reads may skip long segments
+    // of the reference between exons. This operator is equivalent to SAM's
+    // `N`.
+    SKIP = 4;
+
+    // The soft clip operator indicates that bases at the start/end of a read
+    // have not been considered during alignment. This may occur if the majority
+    // of a read maps, except for low quality bases at the start/end of a read.
+    // This operator is equivalent to SAM's `S`. Bases that are soft
+    // clipped will still be stored in the read.
+    CLIP_SOFT = 5;
+
+    // The hard clip operator indicates that bases at the start/end of a read
+    // have been omitted from this alignment. This may occur if this linear
+    // alignment is part of a chimeric alignment, or if the read has been
+    // trimmed (for example, during error correction or to trim poly-A tails for
+    // RNA-seq). This operator is equivalent to SAM's `H`.
+    CLIP_HARD = 6;
+
+    // The pad operator indicates that there is padding in an alignment. This
+    // operator is equivalent to SAM's `P`.
+    PAD = 7;
+
+    // This operator indicates that this portion of the aligned sequence exactly
+    // matches the reference. This operator is equivalent to SAM's `=`.
+    SEQUENCE_MATCH = 8;
+
+    // This operator indicates that this portion of the aligned sequence is an
+    // alignment match to the reference, but a sequence mismatch. This can
+    // indicate a SNP or a read error. This operator is equivalent to SAM's
+    // `X`.
+    SEQUENCE_MISMATCH = 9;
+  }
+
+  Operation operation = 1;
+
+  // The number of genomic bases that the operation runs for. Required.
+  int64 operation_length = 2;
+
+  // `referenceSequence` is only used at mismatches
+  // (`SEQUENCE_MISMATCH`) and deletions (`DELETE`).
+  // Filling this field replaces SAM's MD tag. If the relevant information is
+  // not available, this field is unset.
+  string reference_sequence = 3;
+}
--- a/google/genomics/v1/datasets.proto
+++ b/google/genomics/v1/datasets.proto
@ -0,0 +1,211 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.genomics.v1;
+
+import "google/api/annotations.proto";
+import "google/iam/v1/iam_policy.proto";
+import "google/iam/v1/policy.proto";
+import "google/protobuf/empty.proto";
+import "google/protobuf/field_mask.proto";
+import "google/protobuf/timestamp.proto";
+
+option cc_enable_arenas = true;
+option java_multiple_files = true;
+option java_outer_classname = "DatasetsProto";
+option java_package = "com.google.genomics.v1";
+
+
+// This service manages datasets, which are collections of genomic data.
+service DatasetServiceV1 {
+  // Lists datasets within a project.
+  //
+  // For the definitions of datasets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc ListDatasets(ListDatasetsRequest) returns (ListDatasetsResponse) {
+    option (google.api.http) = { get: "/v1/datasets" };
+  }
+
+  // Creates a new dataset.
+  //
+  // For the definitions of datasets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc CreateDataset(CreateDatasetRequest) returns (Dataset) {
+    option (google.api.http) = { post: "/v1/datasets" body: "dataset" };
+  }
+
+  // Gets a dataset by ID.
+  //
+  // For the definitions of datasets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc GetDataset(GetDatasetRequest) returns (Dataset) {
+    option (google.api.http) = { get: "/v1/datasets/{dataset_id}" };
+  }
+
+  // Updates a dataset.
+  //
+  // For the definitions of datasets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // This method supports patch semantics.
+  rpc UpdateDataset(UpdateDatasetRequest) returns (Dataset) {
+    option (google.api.http) = { patch: "/v1/datasets/{dataset_id}" body: "dataset" };
+  }
+
+  // Deletes a dataset and all of its contents (all read group sets,
+  // reference sets, variant sets, call sets, annotation sets, etc.)
+  // This is reversible (up to one week after the deletion) via
+  // the
+  // [datasets.undelete][google.genomics.v1.DatasetServiceV1.UndeleteDataset]
+  // operation.
+  //
+  // For the definitions of datasets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc DeleteDataset(DeleteDatasetRequest) returns (google.protobuf.Empty) {
+    option (google.api.http) = { delete: "/v1/datasets/{dataset_id}" };
+  }
+
+  // Undeletes a dataset by restoring a dataset which was deleted via this API.
+  //
+  // For the definitions of datasets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // This operation is only possible for a week after the deletion occurred.
+  rpc UndeleteDataset(UndeleteDatasetRequest) returns (Dataset) {
+    option (google.api.http) = { post: "/v1/datasets/{dataset_id}:undelete" body: "*" };
+  }
+
+  // Sets the access control policy on the specified dataset. Replaces any
+  // existing policy.
+  //
+  // For the definitions of datasets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // See <a href="/iam/docs/managing-policies#setting_a_policy">Setting a
+  // Policy</a> for more information.
+  rpc SetIamPolicy(google.iam.v1.SetIamPolicyRequest) returns (google.iam.v1.Policy) {
+    option (google.api.http) = { post: "/v1/{resource=datasets/*}:setIamPolicy" body: "*" };
+  }
+
+  // Gets the access control policy for the dataset. This is empty if the
+  // policy or resource does not exist.
+  //
+  // See <a href="/iam/docs/managing-policies#getting_a_policy">Getting a
+  // Policy</a> for more information.
+  //
+  // For the definitions of datasets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc GetIamPolicy(google.iam.v1.GetIamPolicyRequest) returns (google.iam.v1.Policy) {
+    option (google.api.http) = { post: "/v1/{resource=datasets/*}:getIamPolicy" body: "*" };
+  }
+
+  // Returns permissions that a caller has on the specified resource.
+  // See <a href="/iam/docs/managing-policies#testing_permissions">Testing
+  // Permissions</a> for more information.
+  //
+  // For the definitions of datasets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc TestIamPermissions(google.iam.v1.TestIamPermissionsRequest) returns (google.iam.v1.TestIamPermissionsResponse) {
+    option (google.api.http) = { post: "/v1/{resource=datasets/*}:testIamPermissions" body: "*" };
+  }
+}
+
+// A Dataset is a collection of genomic data.
+//
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+message Dataset {
+  // The server-generated dataset ID, unique across all datasets.
+  string id = 1;
+
+  // The Google Developers Console project ID that this dataset belongs to.
+  string project_id = 2;
+
+  // The dataset name.
+  string name = 3;
+
+  // The time this dataset was created, in seconds from the epoch.
+  google.protobuf.Timestamp create_time = 4;
+}
+
+// The dataset list request.
+message ListDatasetsRequest {
+  // Required. The project to list datasets for.
+  string project_id = 1;
+
+  // The maximum number of results to return in a single page. If unspecified,
+  // defaults to 50. The maximum value is 1024.
+  int32 page_size = 2;
+
+  // The continuation token, which is used to page through large result sets.
+  // To get the next page of results, set this parameter to the value of
+  // `nextPageToken` from the previous response.
+  string page_token = 3;
+}
+
+// The dataset list response.
+message ListDatasetsResponse {
+  // The list of matching Datasets.
+  repeated Dataset datasets = 1;
+
+  // The continuation token, which is used to page through large result sets.
+  // Provide this value in a subsequent request to return the next page of
+  // results. This field will be empty if there aren't any additional results.
+  string next_page_token = 2;
+}
+
+message CreateDatasetRequest {
+  // The dataset to be created. Must contain projectId and name.
+  Dataset dataset = 1;
+}
+
+message UpdateDatasetRequest {
+  // The ID of the dataset to be updated.
+  string dataset_id = 1;
+
+  // The new dataset data.
+  Dataset dataset = 2;
+
+  // An optional mask specifying which fields to update. At this time, the only
+  // mutable field is [name][google.genomics.v1.Dataset.name]. The only
+  // acceptable value is "name". If unspecified, all mutable fields will be
+  // updated.
+  google.protobuf.FieldMask update_mask = 3;
+}
+
+message DeleteDatasetRequest {
+  // The ID of the dataset to be deleted.
+  string dataset_id = 1;
+}
+
+message UndeleteDatasetRequest {
+  // The ID of the dataset to be undeleted.
+  string dataset_id = 1;
+}
+
+message GetDatasetRequest {
+  // The ID of the dataset.
+  string dataset_id = 1;
+}
--- a/google/genomics/v1/operations.proto
+++ b/google/genomics/v1/operations.proto
@ -0,0 +1,58 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.genomics.v1;
+
+import "google/api/annotations.proto";
+import "google/protobuf/any.proto";
+import "google/protobuf/timestamp.proto";
+
+option cc_enable_arenas = true;
+option java_multiple_files = true;
+option java_outer_classname = "OperationsProto";
+option java_package = "com.google.genomics.v1";
+
+
+// Metadata describing an [Operation][google.longrunning.Operation].
+message OperationMetadata {
+  // The Google Cloud Project in which the job is scoped.
+  string project_id = 1;
+
+  // The time at which the job was submitted to the Genomics service.
+  google.protobuf.Timestamp create_time = 2;
+
+  // The time at which the job stopped running.
+  google.protobuf.Timestamp end_time = 4;
+
+  // The original request that started the operation. Note that this will be in
+  // current version of the API. If the operation was started with v1beta2 API
+  // and a GetOperation is performed on v1 API, a v1 request will be returned.
+  google.protobuf.Any request = 5;
+
+  // Optional event messages that were generated during the job's execution.
+  // This also contains any warnings that were generated during import
+  // or export.
+  repeated OperationEvent events = 6;
+
+  // Runtime metadata on this Operation.
+  google.protobuf.Any runtime_metadata = 8;
+}
+
+// An event that occurred during an [Operation][google.longrunning.Operation].
+message OperationEvent {
+  // Required description of event.
+  string description = 3;
+}
--- a/google/genomics/v1/position.proto
+++ b/google/genomics/v1/position.proto
@ -0,0 +1,41 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.genomics.v1;
+
+import "google/api/annotations.proto";
+
+option cc_enable_arenas = true;
+option java_multiple_files = true;
+option java_outer_classname = "PositionProto";
+option java_package = "com.google.genomics.v1";
+
+
+// An abstraction for referring to a genomic position, in relation to some
+// already known reference. For now, represents a genomic position as a
+// reference name, a base number on that reference (0-based), and a
+// determination of forward or reverse strand.
+message Position {
+  // The name of the reference in whatever reference set is being used.
+  string reference_name = 1;
+
+  // The 0-based offset from the start of the forward strand for that reference.
+  int64 position = 2;
+
+  // Whether this position is on the reverse strand, as opposed to the forward
+  // strand.
+  bool reverse_strand = 3;
+}
--- a/google/genomics/v1/range.proto
+++ b/google/genomics/v1/range.proto
@ -0,0 +1,38 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.genomics.v1;
+
+import "google/api/annotations.proto";
+
+option cc_enable_arenas = true;
+option java_multiple_files = true;
+option java_outer_classname = "RangeProto";
+option java_package = "com.google.genomics.v1";
+
+
+// A 0-based half-open genomic coordinate range for search requests.
+message Range {
+  // The reference sequence name, for example `chr1`,
+  // `1`, or `chrX`.
+  string reference_name = 1;
+
+  // The start position of the range on the reference, 0-based inclusive.
+  int64 start = 2;
+
+  // The end position of the range on the reference, 0-based exclusive.
+  int64 end = 3;
+}
--- a/google/genomics/v1/readalignment.proto
+++ b/google/genomics/v1/readalignment.proto
@ -0,0 +1,220 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.genomics.v1;
+
+import "google/api/annotations.proto";
+import "google/genomics/v1/cigar.proto";
+import "google/genomics/v1/position.proto";
+import "google/protobuf/struct.proto";
+
+option cc_enable_arenas = true;
+option java_multiple_files = true;
+option java_outer_classname = "ReadAlignmentProto";
+option java_package = "com.google.genomics.v1";
+
+
+// A linear alignment can be represented by one CIGAR string. Describes the
+// mapped position and local alignment of the read to the reference.
+message LinearAlignment {
+  // The position of this alignment.
+  Position position = 1;
+
+  // The mapping quality of this alignment. Represents how likely
+  // the read maps to this position as opposed to other locations.
+  //
+  // Specifically, this is -10 log10 Pr(mapping position is wrong), rounded to
+  // the nearest integer.
+  int32 mapping_quality = 2;
+
+  // Represents the local alignment of this sequence (alignment matches, indels,
+  // etc) against the reference.
+  repeated CigarUnit cigar = 3;
+}
+
+// A read alignment describes a linear alignment of a string of DNA to a
+// [reference sequence][google.genomics.v1.Reference], in addition to metadata
+// about the fragment (the molecule of DNA sequenced) and the read (the bases
+// which were read by the sequencer). A read is equivalent to a line in a SAM
+// file. A read belongs to exactly one read group and exactly one
+// [read group set][google.genomics.v1.ReadGroupSet].
+//
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+//
+// ### Reverse-stranded reads
+//
+// Mapped reads (reads having a non-null `alignment`) can be aligned to either
+// the forward or the reverse strand of their associated reference. Strandedness
+// of a mapped read is encoded by `alignment.position.reverseStrand`.
+//
+// If we consider the reference to be a forward-stranded coordinate space of
+// `[0, reference.length)` with `0` as the left-most position and
+// `reference.length` as the right-most position, reads are always aligned left
+// to right. That is, `alignment.position.position` always refers to the
+// left-most reference coordinate and `alignment.cigar` describes the alignment
+// of this read to the reference from left to right. All per-base fields such as
+// `alignedSequence` and `alignedQuality` share this same left-to-right
+// orientation; this is true of reads which are aligned to either strand. For
+// reverse-stranded reads, this means that `alignedSequence` is the reverse
+// complement of the bases that were originally reported by the sequencing
+// machine.
+//
+// ### Generating a reference-aligned sequence string
+//
+// When interacting with mapped reads, it's often useful to produce a string
+// representing the local alignment of the read to reference. The following
+// pseudocode demonstrates one way of doing this:
+//
+//     out = ""
+//     offset = 0
+//     for c in read.alignment.cigar {
+//       switch c.operation {
+//       case "ALIGNMENT_MATCH", "SEQUENCE_MATCH", "SEQUENCE_MISMATCH":
+//         out += read.alignedSequence[offset:offset+c.operationLength]
+//         offset += c.operationLength
+//         break
+//       case "CLIP_SOFT", "INSERT":
+//         offset += c.operationLength
+//         break
+//       case "PAD":
+//         out += repeat("*", c.operationLength)
+//         break
+//       case "DELETE":
+//         out += repeat("-", c.operationLength)
+//         break
+//       case "SKIP":
+//         out += repeat(" ", c.operationLength)
+//         break
+//       case "CLIP_HARD":
+//         break
+//       }
+//     }
+//     return out
+//
+// ### Converting to SAM's CIGAR string
+//
+// The following pseudocode generates a SAM CIGAR string from the
+// `cigar` field. Note that this is a lossy conversion
+// (`cigar.referenceSequence` is lost).
+//
+//     cigarMap = {
+//       "ALIGNMENT_MATCH": "M",
+//       "INSERT": "I",
+//       "DELETE": "D",
+//       "SKIP": "N",
+//       "CLIP_SOFT": "S",
+//       "CLIP_HARD": "H",
+//       "PAD": "P",
+//       "SEQUENCE_MATCH": "=",
+//       "SEQUENCE_MISMATCH": "X",
+//     }
+//     cigarStr = ""
+//     for c in read.alignment.cigar {
+//       cigarStr += c.operationLength + cigarMap[c.operation]
+//     }
+//     return cigarStr
+message Read {
+  // The server-generated read ID, unique across all reads. This is different
+  // from the `fragmentName`.
+  string id = 1;
+
+  // The ID of the read group this read belongs to. A read belongs to exactly
+  // one read group. This is a server-generated ID which is distinct from SAM's
+  // RG tag (for that value, see
+  // [ReadGroup.name][google.genomics.v1.ReadGroup.name]).
+  string read_group_id = 2;
+
+  // The ID of the read group set this read belongs to. A read belongs to
+  // exactly one read group set.
+  string read_group_set_id = 3;
+
+  // The fragment name. Equivalent to QNAME (query template name) in SAM.
+  string fragment_name = 4;
+
+  // The orientation and the distance between reads from the fragment are
+  // consistent with the sequencing protocol (SAM flag 0x2).
+  bool proper_placement = 5;
+
+  // The fragment is a PCR or optical duplicate (SAM flag 0x400).
+  bool duplicate_fragment = 6;
+
+  // The observed length of the fragment, equivalent to TLEN in SAM.
+  int32 fragment_length = 7;
+
+  // The read number in sequencing. 0-based and less than numberReads. This
+  // field replaces SAM flag 0x40 and 0x80.
+  int32 read_number = 8;
+
+  // The number of reads in the fragment (extension to SAM flag 0x1).
+  int32 number_reads = 9;
+
+  // Whether this read did not pass filters, such as platform or vendor quality
+  // controls (SAM flag 0x200).
+  bool failed_vendor_quality_checks = 10;
+
+  // The linear alignment for this alignment record. This field is null for
+  // unmapped reads.
+  LinearAlignment alignment = 11;
+
+  // Whether this alignment is secondary. Equivalent to SAM flag 0x100.
+  // A secondary alignment represents an alternative to the primary alignment
+  // for this read. Aligners may return secondary alignments if a read can map
+  // ambiguously to multiple coordinates in the genome. By convention, each read
+  // has one and only one alignment where both `secondaryAlignment`
+  // and `supplementaryAlignment` are false.
+  bool secondary_alignment = 12;
+
+  // Whether this alignment is supplementary. Equivalent to SAM flag 0x800.
+  // Supplementary alignments are used in the representation of a chimeric
+  // alignment. In a chimeric alignment, a read is split into multiple
+  // linear alignments that map to different reference contigs. The first
+  // linear alignment in the read will be designated as the representative
+  // alignment; the remaining linear alignments will be designated as
+  // supplementary alignments. These alignments may have different mapping
+  // quality scores. In each linear alignment in a chimeric alignment, the read
+  // will be hard clipped. The `alignedSequence` and
+  // `alignedQuality` fields in the alignment record will only
+  // represent the bases for its respective linear alignment.
+  bool supplementary_alignment = 13;
+
+  // The bases of the read sequence contained in this alignment record,
+  // **without CIGAR operations applied** (equivalent to SEQ in SAM).
+  // `alignedSequence` and `alignedQuality` may be
+  // shorter than the full read sequence and quality. This will occur if the
+  // alignment is part of a chimeric alignment, or if the read was trimmed. When
+  // this occurs, the CIGAR for this read will begin/end with a hard clip
+  // operator that will indicate the length of the excised sequence.
+  string aligned_sequence = 14;
+
+  // The quality of the read sequence contained in this alignment record
+  // (equivalent to QUAL in SAM).
+  // `alignedSequence` and `alignedQuality` may be shorter than the full read
+  // sequence and quality. This will occur if the alignment is part of a
+  // chimeric alignment, or if the read was trimmed. When this occurs, the CIGAR
+  // for this read will begin/end with a hard clip operator that will indicate
+  // the length of the excised sequence.
+  repeated int32 aligned_quality = 15;
+
+  // The mapping of the primary alignment of the
+  // `(readNumber+1)%numberReads` read in the fragment. It replaces
+  // mate position and mate strand in SAM.
+  Position next_mate_position = 16;
+
+  // A map of additional read alignment information. This must be of the form
+  // map<string, string[]> (string key mapping to a list of string values).
+  map<string, google.protobuf.ListValue> info = 17;
+}
--- a/google/genomics/v1/readgroup.proto
+++ b/google/genomics/v1/readgroup.proto
@ -0,0 +1,105 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.genomics.v1;
+
+import "google/api/annotations.proto";
+import "google/protobuf/struct.proto";
+
+option cc_enable_arenas = true;
+option java_multiple_files = true;
+option java_outer_classname = "ReadGroupProto";
+option java_package = "com.google.genomics.v1";
+
+
+// A read group is all the data that's processed the same way by the sequencer.
+message ReadGroup {
+  message Experiment {
+    // A client-supplied library identifier; a library is a collection of DNA
+    // fragments which have been prepared for sequencing from a sample. This
+    // field is important for quality control as error or bias can be introduced
+    // during sample preparation.
+    string library_id = 1;
+
+    // The platform unit used as part of this experiment, for example
+    // flowcell-barcode.lane for Illumina or slide for SOLiD. Corresponds to the
+    // @RG PU field in the SAM spec.
+    string platform_unit = 2;
+
+    // The sequencing center used as part of this experiment.
+    string sequencing_center = 3;
+
+    // The instrument model used as part of this experiment. This maps to
+    // sequencing technology in the SAM spec.
+    string instrument_model = 4;
+  }
+
+  message Program {
+    // The command line used to run this program.
+    string command_line = 1;
+
+    // The user specified locally unique ID of the program. Used along with
+    // `prevProgramId` to define an ordering between programs.
+    string id = 2;
+
+    // The display name of the program. This is typically the colloquial name of
+    // the tool used, for example 'bwa' or 'picard'.
+    string name = 3;
+
+    // The ID of the program run before this one.
+    string prev_program_id = 4;
+
+    // The version of the program run.
+    string version = 5;
+  }
+
+  // The server-generated read group ID, unique for all read groups.
+  // Note: This is different than the @RG ID field in the SAM spec. For that
+  // value, see [name][google.genomics.v1.ReadGroup.name].
+  string id = 1;
+
+  // The dataset to which this read group belongs.
+  string dataset_id = 2;
+
+  // The read group name. This corresponds to the @RG ID field in the SAM spec.
+  string name = 3;
+
+  // A free-form text description of this read group.
+  string description = 4;
+
+  // A client-supplied sample identifier for the reads in this read group.
+  string sample_id = 5;
+
+  // The experiment used to generate this read group.
+  Experiment experiment = 6;
+
+  // The predicted insert size of this read group. The insert size is the length
+  // the sequenced DNA fragment from end-to-end, not including the adapters.
+  int32 predicted_insert_size = 7;
+
+  // The programs used to generate this read group. Programs are always
+  // identical for all read groups within a read group set. For this reason,
+  // only the first read group in a returned set will have this field
+  // populated.
+  repeated Program programs = 10;
+
+  // The reference set the reads in this read group are aligned to.
+  string reference_set_id = 11;
+
+  // A map of additional read group information. This must be of the form
+  // map<string, string[]> (string key mapping to a list of string values).
+  map<string, google.protobuf.ListValue> info = 12;
+}
--- a/google/genomics/v1/readgroupset.proto
+++ b/google/genomics/v1/readgroupset.proto
@ -0,0 +1,63 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.genomics.v1;
+
+import "google/api/annotations.proto";
+import "google/genomics/v1/readgroup.proto";
+import "google/protobuf/struct.proto";
+
+option cc_enable_arenas = true;
+option java_multiple_files = true;
+option java_outer_classname = "ReadGroupSetProto";
+option java_package = "com.google.genomics.v1";
+
+
+// A read group set is a logical collection of read groups, which are
+// collections of reads produced by a sequencer. A read group set typically
+// models reads corresponding to one sample, sequenced one way, and aligned one
+// way.
+//
+// * A read group set belongs to one dataset.
+// * A read group belongs to one read group set.
+// * A read belongs to one read group.
+//
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+message ReadGroupSet {
+  // The server-generated read group set ID, unique for all read group sets.
+  string id = 1;
+
+  // The dataset to which this read group set belongs.
+  string dataset_id = 2;
+
+  // The reference set to which the reads in this read group set are aligned.
+  string reference_set_id = 3;
+
+  // The read group set name. By default this will be initialized to the sample
+  // name of the sequenced data contained in this set.
+  string name = 4;
+
+  // The filename of the original source file for this read group set, if any.
+  string filename = 5;
+
+  // The read groups in this set. There are typically 1-10 read groups in a read
+  // group set.
+  repeated ReadGroup read_groups = 6;
+
+  // A map of additional read group set information.
+  map<string, google.protobuf.ListValue> info = 7;
+}
--- a/google/genomics/v1/reads.proto
+++ b/google/genomics/v1/reads.proto
@ -0,0 +1,461 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.genomics.v1;
+
+import "google/api/annotations.proto";
+import "google/genomics/v1/range.proto";
+import "google/genomics/v1/readalignment.proto";
+import "google/genomics/v1/readgroupset.proto";
+import "google/longrunning/operations.proto";
+import "google/protobuf/empty.proto";
+import "google/protobuf/field_mask.proto";
+
+option cc_enable_arenas = true;
+option java_multiple_files = true;
+option java_outer_classname = "ReadsProto";
+option java_package = "com.google.genomics.v1";
+
+
+service StreamingReadService {
+  // Returns a stream of all the reads matching the search request, ordered
+  // by reference name, position, and ID.
+  rpc StreamReads(StreamReadsRequest) returns (stream StreamReadsResponse) {
+    option (google.api.http) = { post: "/v1/reads:stream" body: "*" };
+  }
+}
+
+// The Readstore. A data store for DNA sequencing Reads.
+//
+service ReadServiceV1 {
+  // Creates read group sets by asynchronously importing the provided
+  // information.
+  //
+  // For the definitions of read group sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // The caller must have WRITE permissions to the dataset.
+  //
+  // ## Notes on [BAM](https://samtools.github.io/hts-specs/SAMv1.pdf) import
+  //
+  // - Tags will be converted to strings - tag types are not preserved
+  // - Comments (`@CO`) in the input file header will not be preserved
+  // - Original header order of references (`@SQ`) will not be preserved
+  // - Any reverse stranded unmapped reads will be reverse complemented, and
+  // their qualities (also the "BQ" and "OQ" tags, if any) will be reversed
+  // - Unmapped reads will be stripped of positional information (reference name
+  // and position)
+  rpc ImportReadGroupSets(ImportReadGroupSetsRequest) returns (google.longrunning.Operation) {
+    option (google.api.http) = { post: "/v1/readgroupsets:import" body: "*" };
+  }
+
+  // Exports a read group set to a BAM file in Google Cloud Storage.
+  //
+  // For the definitions of read group sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // Note that currently there may be some differences between exported BAM
+  // files and the original BAM file at the time of import. See
+  // [ImportReadGroupSets](google.genomics.v1.ReadServiceV1.ImportReadGroupSets)
+  // for caveats.
+  rpc ExportReadGroupSet(ExportReadGroupSetRequest) returns (google.longrunning.Operation) {
+    option (google.api.http) = { post: "/v1/readgroupsets/{read_group_set_id}:export" body: "*" };
+  }
+
+  // Searches for read group sets matching the criteria.
+  //
+  // For the definitions of read group sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // Implements
+  // [GlobalAllianceApi.searchReadGroupSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L135).
+  rpc SearchReadGroupSets(SearchReadGroupSetsRequest) returns (SearchReadGroupSetsResponse) {
+    option (google.api.http) = { post: "/v1/readgroupsets/search" body: "*" };
+  }
+
+  // Updates a read group set.
+  //
+  // For the definitions of read group sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // This method supports patch semantics.
+  rpc UpdateReadGroupSet(UpdateReadGroupSetRequest) returns (ReadGroupSet) {
+    option (google.api.http) = { patch: "/v1/readgroupsets/{read_group_set_id}" body: "read_group_set" };
+  }
+
+  // Deletes a read group set.
+  //
+  // For the definitions of read group sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc DeleteReadGroupSet(DeleteReadGroupSetRequest) returns (google.protobuf.Empty) {
+    option (google.api.http) = { delete: "/v1/readgroupsets/{read_group_set_id}" };
+  }
+
+  // Gets a read group set by ID.
+  //
+  // For the definitions of read group sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc GetReadGroupSet(GetReadGroupSetRequest) returns (ReadGroupSet) {
+    option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}" };
+  }
+
+  // Lists fixed width coverage buckets for a read group set, each of which
+  // correspond to a range of a reference sequence. Each bucket summarizes
+  // coverage information across its corresponding genomic range.
+  //
+  // For the definitions of read group sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // Coverage is defined as the number of reads which are aligned to a given
+  // base in the reference sequence. Coverage buckets are available at several
+  // precomputed bucket widths, enabling retrieval of various coverage 'zoom
+  // levels'. The caller must have READ permissions for the target read group
+  // set.
+  rpc ListCoverageBuckets(ListCoverageBucketsRequest) returns (ListCoverageBucketsResponse) {
+    option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}/coveragebuckets" };
+  }
+
+  // Gets a list of reads for one or more read group sets.
+  //
+  // For the definitions of read group sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // Reads search operates over a genomic coordinate space of reference sequence
+  // & position defined over the reference sequences to which the requested
+  // read group sets are aligned.
+  //
+  // If a target positional range is specified, search returns all reads whose
+  // alignment to the reference genome overlap the range. A query which
+  // specifies only read group set IDs yields all reads in those read group
+  // sets, including unmapped reads.
+  //
+  // All reads returned (including reads on subsequent pages) are ordered by
+  // genomic coordinate (by reference sequence, then position). Reads with
+  // equivalent genomic coordinates are returned in an unspecified order. This
+  // order is consistent, such that two queries for the same content (regardless
+  // of page size) yield reads in the same order across their respective streams
+  // of paginated responses.
+  //
+  // Implements
+  // [GlobalAllianceApi.searchReads](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L85).
+  rpc SearchReads(SearchReadsRequest) returns (SearchReadsResponse) {
+    option (google.api.http) = { post: "/v1/reads/search" body: "*" };
+  }
+}
+
+// The read group set search request.
+message SearchReadGroupSetsRequest {
+  // Restricts this query to read group sets within the given datasets. At least
+  // one ID must be provided.
+  repeated string dataset_ids = 1;
+
+  // Only return read group sets for which a substring of the name matches this
+  // string.
+  string name = 3;
+
+  // The continuation token, which is used to page through large result sets.
+  // To get the next page of results, set this parameter to the value of
+  // `nextPageToken` from the previous response.
+  string page_token = 2;
+
+  // The maximum number of results to return in a single page. If unspecified,
+  // defaults to 256. The maximum value is 1024.
+  int32 page_size = 4;
+}
+
+// The read group set search response.
+message SearchReadGroupSetsResponse {
+  // The list of matching read group sets.
+  repeated ReadGroupSet read_group_sets = 1;
+
+  // The continuation token, which is used to page through large result sets.
+  // Provide this value in a subsequent request to return the next page of
+  // results. This field will be empty if there aren't any additional results.
+  string next_page_token = 2;
+}
+
+// The read group set import request.
+message ImportReadGroupSetsRequest {
+  enum PartitionStrategy {
+    PARTITION_STRATEGY_UNSPECIFIED = 0;
+
+    // In most cases, this strategy yields one read group set per file. This is
+    // the default behavior.
+    //
+    // Allocate one read group set per file per sample. For BAM files, read
+    // groups are considered to share a sample if they have identical sample
+    // names. Furthermore, all reads for each file which do not belong to a read
+    // group, if any, will be grouped into a single read group set per-file.
+    PER_FILE_PER_SAMPLE = 1;
+
+    // Includes all read groups in all imported files into a single read group
+    // set. Requires that the headers for all imported files are equivalent. All
+    // reads which do not belong to a read group, if any, will be grouped into a
+    // separate read group set.
+    MERGE_ALL = 2;
+  }
+
+  // Required. The ID of the dataset these read group sets will belong to. The
+  // caller must have WRITE permissions to this dataset.
+  string dataset_id = 1;
+
+  // The reference set to which the imported read group sets are aligned to, if
+  // any. The reference names of this reference set must be a superset of those
+  // found in the imported file headers. If no reference set id is provided, a
+  // best effort is made to associate with a matching reference set.
+  string reference_set_id = 4;
+
+  // A list of URIs pointing at [BAM
+  // files](https://samtools.github.io/hts-specs/SAMv1.pdf)
+  // in Google Cloud Storage.
+  repeated string source_uris = 2;
+
+  // The partition strategy describes how read groups are partitioned into read
+  // group sets.
+  PartitionStrategy partition_strategy = 5;
+}
+
+// The read group set import response.
+message ImportReadGroupSetsResponse {
+  // IDs of the read group sets that were created.
+  repeated string read_group_set_ids = 1;
+}
+
+// The read group set export request.
+message ExportReadGroupSetRequest {
+  // Required. The Google Developers Console project ID that owns this
+  // export. The caller must have WRITE access to this project.
+  string project_id = 1;
+
+  // Required. A Google Cloud Storage URI for the exported BAM file.
+  // The currently authenticated user must have write access to the new file.
+  // An error will be returned if the URI already contains data.
+  string export_uri = 2;
+
+  // Required. The ID of the read group set to export. The caller must have
+  // READ access to this read group set.
+  string read_group_set_id = 3;
+
+  // The reference names to export. If this is not specified, all reference
+  // sequences, including unmapped reads, are exported.
+  // Use `*` to export only unmapped reads.
+  repeated string reference_names = 4;
+}
+
+message UpdateReadGroupSetRequest {
+  // The ID of the read group set to be updated. The caller must have WRITE
+  // permissions to the dataset associated with this read group set.
+  string read_group_set_id = 1;
+
+  // The new read group set data. See `updateMask` for details on mutability of
+  // fields.
+  ReadGroupSet read_group_set = 2;
+
+  // An optional mask specifying which fields to update. Supported fields:
+  //
+  // * [name][google.genomics.v1.ReadGroupSet.name].
+  // * [referenceSetId][google.genomics.v1.ReadGroupSet.reference_set_id].
+  //
+  // Leaving `updateMask` unset is equivalent to specifying all mutable
+  // fields.
+  google.protobuf.FieldMask update_mask = 3;
+}
+
+message DeleteReadGroupSetRequest {
+  // The ID of the read group set to be deleted. The caller must have WRITE
+  // permissions to the dataset associated with this read group set.
+  string read_group_set_id = 1;
+}
+
+message GetReadGroupSetRequest {
+  // The ID of the read group set.
+  string read_group_set_id = 1;
+}
+
+message ListCoverageBucketsRequest {
+  // Required. The ID of the read group set over which coverage is requested.
+  string read_group_set_id = 1;
+
+  // The name of the reference to query, within the reference set associated
+  // with this query. Optional.
+  string reference_name = 3;
+
+  // The start position of the range on the reference, 0-based inclusive. If
+  // specified, `referenceName` must also be specified. Defaults to 0.
+  int64 start = 4;
+
+  // The end position of the range on the reference, 0-based exclusive. If
+  // specified, `referenceName` must also be specified. If unset or 0, defaults
+  // to the length of the reference.
+  int64 end = 5;
+
+  // The desired width of each reported coverage bucket in base pairs. This
+  // will be rounded down to the nearest precomputed bucket width; the value
+  // of which is returned as `bucketWidth` in the response. Defaults
+  // to infinity (each bucket spans an entire reference sequence) or the length
+  // of the target range, if specified. The smallest precomputed
+  // `bucketWidth` is currently 2048 base pairs; this is subject to
+  // change.
+  int64 target_bucket_width = 6;
+
+  // The continuation token, which is used to page through large result sets.
+  // To get the next page of results, set this parameter to the value of
+  // `nextPageToken` from the previous response.
+  string page_token = 7;
+
+  // The maximum number of results to return in a single page. If unspecified,
+  // defaults to 1024. The maximum value is 2048.
+  int32 page_size = 8;
+}
+
+// A bucket over which read coverage has been precomputed. A bucket corresponds
+// to a specific range of the reference sequence.
+message CoverageBucket {
+  // The genomic coordinate range spanned by this bucket.
+  Range range = 1;
+
+  // The average number of reads which are aligned to each individual
+  // reference base in this bucket.
+  float mean_coverage = 2;
+}
+
+message ListCoverageBucketsResponse {
+  // The length of each coverage bucket in base pairs. Note that buckets at the
+  // end of a reference sequence may be shorter. This value is omitted if the
+  // bucket width is infinity (the default behaviour, with no range or
+  // `targetBucketWidth`).
+  int64 bucket_width = 1;
+
+  // The coverage buckets. The list of buckets is sparse; a bucket with 0
+  // overlapping reads is not returned. A bucket never crosses more than one
+  // reference sequence. Each bucket has width `bucketWidth`, unless
+  // its end is the end of the reference sequence.
+  repeated CoverageBucket coverage_buckets = 2;
+
+  // The continuation token, which is used to page through large result sets.
+  // Provide this value in a subsequent request to return the next page of
+  // results. This field will be empty if there aren't any additional results.
+  string next_page_token = 3;
+}
+
+// The read search request.
+message SearchReadsRequest {
+  // The IDs of the read groups sets within which to search for reads. All
+  // specified read group sets must be aligned against a common set of reference
+  // sequences; this defines the genomic coordinates for the query. Must specify
+  // one of `readGroupSetIds` or `readGroupIds`.
+  repeated string read_group_set_ids = 1;
+
+  // The IDs of the read groups within which to search for reads. All specified
+  // read groups must belong to the same read group sets. Must specify one of
+  // `readGroupSetIds` or `readGroupIds`.
+  repeated string read_group_ids = 5;
+
+  // The reference sequence name, for example `chr1`, `1`, or `chrX`. If set to
+  // `*`, only unmapped reads are returned. If unspecified, all reads (mapped
+  // and unmapped) are returned.
+  string reference_name = 7;
+
+  // The start position of the range on the reference, 0-based inclusive. If
+  // specified, `referenceName` must also be specified.
+  int64 start = 8;
+
+  // The end position of the range on the reference, 0-based exclusive. If
+  // specified, `referenceName` must also be specified.
+  int64 end = 9;
+
+  // The continuation token, which is used to page through large result sets.
+  // To get the next page of results, set this parameter to the value of
+  // `nextPageToken` from the previous response.
+  string page_token = 3;
+
+  // The maximum number of results to return in a single page. If unspecified,
+  // defaults to 256. The maximum value is 2048.
+  int32 page_size = 4;
+}
+
+// The read search response.
+message SearchReadsResponse {
+  // The list of matching alignments sorted by mapped genomic coordinate,
+  // if any, ascending in position within the same reference. Unmapped reads,
+  // which have no position, are returned contiguously and are sorted in
+  // ascending lexicographic order by fragment name.
+  repeated Read alignments = 1;
+
+  // The continuation token, which is used to page through large result sets.
+  // Provide this value in a subsequent request to return the next page of
+  // results. This field will be empty if there aren't any additional results.
+  string next_page_token = 2;
+}
+
+// The stream reads request.
+message StreamReadsRequest {
+  // The Google Developers Console project ID or number which will be billed
+  // for this access. The caller must have WRITE access to this project.
+  // Required.
+  string project_id = 1;
+
+  // The ID of the read group set from which to stream reads.
+  string read_group_set_id = 2;
+
+  // The reference sequence name, for example `chr1`,
+  // `1`, or `chrX`. If set to *, only unmapped reads are
+  // returned.
+  string reference_name = 3;
+
+  // The start position of the range on the reference, 0-based inclusive. If
+  // specified, `referenceName` must also be specified.
+  int64 start = 4;
+
+  // The end position of the range on the reference, 0-based exclusive. If
+  // specified, `referenceName` must also be specified.
+  int64 end = 5;
+
+  // Restricts results to a shard containing approximately `1/totalShards`
+  // of the normal response payload for this query. Results from a sharded
+  // request are disjoint from those returned by all queries which differ only
+  // in their shard parameter. A shard may yield 0 results; this is especially
+  // likely for large values of `totalShards`.
+  //
+  // Valid values are `[0, totalShards)`.
+  int32 shard = 6;
+
+  // Specifying `totalShards` causes a disjoint subset of the normal response
+  // payload to be returned for each query with a unique `shard` parameter
+  // specified. A best effort is made to yield equally sized shards. Sharding
+  // can be used to distribute processing amongst workers, where each worker is
+  // assigned a unique `shard` number and all workers specify the same
+  // `totalShards` number. The union of reads returned for all sharded queries
+  // `[0, totalShards)` is equal to those returned by a single unsharded query.
+  //
+  // Queries for different values of `totalShards` with common divisors will
+  // share shard boundaries. For example, streaming `shard` 2 of 5
+  // `totalShards` yields the same results as streaming `shard`s 4 and 5 of 10
+  // `totalShards`. This property can be leveraged for adaptive retries.
+  int32 total_shards = 7;
+}
+
+message StreamReadsResponse {
+  repeated Read alignments = 1;
+}
--- a/google/genomics/v1/references.proto
+++ b/google/genomics/v1/references.proto
@ -0,0 +1,281 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.genomics.v1;
+
+import "google/api/annotations.proto";
+
+option cc_enable_arenas = true;
+option java_multiple_files = true;
+option java_outer_classname = "ReferencesProto";
+option java_package = "com.google.genomics.v1";
+
+
+service ReferenceServiceV1 {
+  // Searches for reference sets which match the given criteria.
+  //
+  // For the definitions of references and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // Implements
+  // [GlobalAllianceApi.searchReferenceSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L71)
+  rpc SearchReferenceSets(SearchReferenceSetsRequest) returns (SearchReferenceSetsResponse) {
+    option (google.api.http) = { post: "/v1/referencesets/search" body: "*" };
+  }
+
+  // Gets a reference set.
+  //
+  // For the definitions of references and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // Implements
+  // [GlobalAllianceApi.getReferenceSet](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L83).
+  rpc GetReferenceSet(GetReferenceSetRequest) returns (ReferenceSet) {
+    option (google.api.http) = { get: "/v1/referencesets/{reference_set_id}" };
+  }
+
+  // Searches for references which match the given criteria.
+  //
+  // For the definitions of references and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // Implements
+  // [GlobalAllianceApi.searchReferences](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L146).
+  rpc SearchReferences(SearchReferencesRequest) returns (SearchReferencesResponse) {
+    option (google.api.http) = { post: "/v1/references/search" body: "*" };
+  }
+
+  // Gets a reference.
+  //
+  // For the definitions of references and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // Implements
+  // [GlobalAllianceApi.getReference](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L158).
+  rpc GetReference(GetReferenceRequest) returns (Reference) {
+    option (google.api.http) = { get: "/v1/references/{reference_id}" };
+  }
+
+  // Lists the bases in a reference, optionally restricted to a range.
+  //
+  // For the definitions of references and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // Implements
+  // [GlobalAllianceApi.getReferenceBases](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L221).
+  rpc ListBases(ListBasesRequest) returns (ListBasesResponse) {
+    option (google.api.http) = { get: "/v1/references/{reference_id}/bases" };
+  }
+}
+
+// A reference is a canonical assembled DNA sequence, intended to act as a
+// reference coordinate space for other genomic annotations. A single reference
+// might represent the human chromosome 1 or mitochandrial DNA, for instance. A
+// reference belongs to one or more reference sets.
+//
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+message Reference {
+  // The server-generated reference ID, unique across all references.
+  string id = 1;
+
+  // The length of this reference's sequence.
+  int64 length = 2;
+
+  // MD5 of the upper-case sequence excluding all whitespace characters (this
+  // is equivalent to SQ:M5 in SAM). This value is represented in lower case
+  // hexadecimal format.
+  string md5checksum = 3;
+
+  // The name of this reference, for example `22`.
+  string name = 4;
+
+  // The URI from which the sequence was obtained. Typically specifies a FASTA
+  // format file.
+  string source_uri = 5;
+
+  // All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
+  // with a version number, for example `GCF_000001405.26`.
+  repeated string source_accessions = 6;
+
+  // ID from http://www.ncbi.nlm.nih.gov/taxonomy. For example, 9606 for human.
+  int32 ncbi_taxon_id = 7;
+}
+
+// A reference set is a set of references which typically comprise a reference
+// assembly for a species, such as `GRCh38` which is representative
+// of the human genome. A reference set defines a common coordinate space for
+// comparing reference-aligned experimental data. A reference set contains 1 or
+// more references.
+//
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+message ReferenceSet {
+  // The server-generated reference set ID, unique across all reference sets.
+  string id = 1;
+
+  // The IDs of the reference objects that are part of this set.
+  // `Reference.md5checksum` must be unique within this set.
+  repeated string reference_ids = 2;
+
+  // Order-independent MD5 checksum which identifies this reference set. The
+  // checksum is computed by sorting all lower case hexidecimal string
+  // `reference.md5checksum` (for all reference in this set) in
+  // ascending lexicographic order, concatenating, and taking the MD5 of that
+  // value. The resulting value is represented in lower case hexadecimal format.
+  string md5checksum = 3;
+
+  // ID from http://www.ncbi.nlm.nih.gov/taxonomy (for example, 9606 for human)
+  // indicating the species which this reference set is intended to model. Note
+  // that contained references may specify a different `ncbiTaxonId`, as
+  // assemblies may contain reference sequences which do not belong to the
+  // modeled species, for example EBV in a human reference genome.
+  int32 ncbi_taxon_id = 4;
+
+  // Free text description of this reference set.
+  string description = 5;
+
+  // Public id of this reference set, such as `GRCh37`.
+  string assembly_id = 6;
+
+  // The URI from which the references were obtained.
+  string source_uri = 7;
+
+  // All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
+  // with a version number, for example `NC_000001.11`.
+  repeated string source_accessions = 8;
+}
+
+message SearchReferenceSetsRequest {
+  // If present, return reference sets for which the
+  // [md5checksum][google.genomics.v1.ReferenceSet.md5checksum] matches exactly.
+  repeated string md5checksums = 1;
+
+  // If present, return reference sets for which a prefix of any of
+  // [sourceAccessions][google.genomics.v1.ReferenceSet.source_accessions]
+  // match any of these strings. Accession numbers typically have a main number
+  // and a version, for example `NC_000001.11`.
+  repeated string accessions = 2;
+
+  // If present, return reference sets for which a substring of their
+  // `assemblyId` matches this string (case insensitive).
+  string assembly_id = 3;
+
+  // The continuation token, which is used to page through large result sets.
+  // To get the next page of results, set this parameter to the value of
+  // `nextPageToken` from the previous response.
+  string page_token = 4;
+
+  // The maximum number of results to return in a single page. If unspecified,
+  // defaults to 1024. The maximum value is 4096.
+  int32 page_size = 5;
+}
+
+message SearchReferenceSetsResponse {
+  // The matching references sets.
+  repeated ReferenceSet reference_sets = 1;
+
+  // The continuation token, which is used to page through large result sets.
+  // Provide this value in a subsequent request to return the next page of
+  // results. This field will be empty if there aren't any additional results.
+  string next_page_token = 2;
+}
+
+message GetReferenceSetRequest {
+  // The ID of the reference set.
+  string reference_set_id = 1;
+}
+
+message SearchReferencesRequest {
+  // If present, return references for which the
+  // [md5checksum][google.genomics.v1.Reference.md5checksum] matches exactly.
+  repeated string md5checksums = 1;
+
+  // If present, return references for which a prefix of any of
+  // [sourceAccessions][google.genomics.v1.Reference.source_accessions] match
+  // any of these strings. Accession numbers typically have a main number and a
+  // version, for example `GCF_000001405.26`.
+  repeated string accessions = 2;
+
+  // If present, return only references which belong to this reference set.
+  string reference_set_id = 3;
+
+  // The continuation token, which is used to page through large result sets.
+  // To get the next page of results, set this parameter to the value of
+  // `nextPageToken` from the previous response.
+  string page_token = 4;
+
+  // The maximum number of results to return in a single page. If unspecified,
+  // defaults to 1024. The maximum value is 4096.
+  int32 page_size = 5;
+}
+
+message SearchReferencesResponse {
+  // The matching references.
+  repeated Reference references = 1;
+
+  // The continuation token, which is used to page through large result sets.
+  // Provide this value in a subsequent request to return the next page of
+  // results. This field will be empty if there aren't any additional results.
+  string next_page_token = 2;
+}
+
+message GetReferenceRequest {
+  // The ID of the reference.
+  string reference_id = 1;
+}
+
+message ListBasesRequest {
+  // The ID of the reference.
+  string reference_id = 1;
+
+  // The start position (0-based) of this query. Defaults to 0.
+  int64 start = 2;
+
+  // The end position (0-based, exclusive) of this query. Defaults to the length
+  // of this reference.
+  int64 end = 3;
+
+  // The continuation token, which is used to page through large result sets.
+  // To get the next page of results, set this parameter to the value of
+  // `nextPageToken` from the previous response.
+  string page_token = 4;
+
+  // The maximum number of bases to return in a single page. If unspecified,
+  // defaults to 200Kbp (kilo base pairs). The maximum value is 10Mbp (mega base
+  // pairs).
+  int32 page_size = 5;
+}
+
+message ListBasesResponse {
+  // The offset position (0-based) of the given `sequence` from the
+  // start of this `Reference`. This value will differ for each page
+  // in a paginated request.
+  int64 offset = 1;
+
+  // A substring of the bases that make up this reference.
+  string sequence = 2;
+
+  // The continuation token, which is used to page through large result sets.
+  // Provide this value in a subsequent request to return the next page of
+  // results. This field will be empty if there aren't any additional results.
+  string next_page_token = 3;
+}
--- a/google/genomics/v1/variants.proto
+++ b/google/genomics/v1/variants.proto
@ -0,0 +1,903 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.genomics.v1;
+
+import "google/api/annotations.proto";
+import "google/longrunning/operations.proto";
+import "google/protobuf/empty.proto";
+import "google/protobuf/field_mask.proto";
+import "google/protobuf/struct.proto";
+
+option cc_enable_arenas = true;
+option java_multiple_files = true;
+option java_outer_classname = "VariantsProto";
+option java_package = "com.google.genomics.v1";
+
+
+service StreamingVariantService {
+  // Returns a stream of all the variants matching the search request, ordered
+  // by reference name, position, and ID.
+  rpc StreamVariants(StreamVariantsRequest) returns (stream StreamVariantsResponse) {
+    option (google.api.http) = { post: "/v1/variants:stream" body: "*" };
+  }
+}
+
+service VariantServiceV1 {
+  // Creates variant data by asynchronously importing the provided information.
+  //
+  // For the definitions of variant sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // The variants for import will be merged with any existing variant that
+  // matches its reference sequence, start, end, reference bases, and
+  // alternative bases. If no such variant exists, a new one will be created.
+  //
+  // When variants are merged, the call information from the new variant
+  // is added to the existing variant, and Variant info fields are merged
+  // as specified in
+  // [infoMergeConfig][google.genomics.v1.ImportVariantsRequest.info_merge_config].
+  // As a special case, for single-sample VCF files, QUAL and FILTER fields will
+  // be moved to the call level; these are sometimes interpreted in a
+  // call-specific context.
+  // Imported VCF headers are appended to the metadata already in a variant set.
+  rpc ImportVariants(ImportVariantsRequest) returns (google.longrunning.Operation) {
+    option (google.api.http) = { post: "/v1/variants:import" body: "*" };
+  }
+
+  // Creates a new variant set.
+  //
+  // For the definitions of variant sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // The provided variant set must have a valid `datasetId` set - all other
+  // fields are optional. Note that the `id` field will be ignored, as this is
+  // assigned by the server.
+  rpc CreateVariantSet(CreateVariantSetRequest) returns (VariantSet) {
+    option (google.api.http) = { post: "/v1/variantsets" body: "variant_set" };
+  }
+
+  // Exports variant set data to an external destination.
+  //
+  // For the definitions of variant sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc ExportVariantSet(ExportVariantSetRequest) returns (google.longrunning.Operation) {
+    option (google.api.http) = { post: "/v1/variantsets/{variant_set_id}:export" body: "*" };
+  }
+
+  // Gets a variant set by ID.
+  //
+  // For the definitions of variant sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc GetVariantSet(GetVariantSetRequest) returns (VariantSet) {
+    option (google.api.http) = { get: "/v1/variantsets/{variant_set_id}" };
+  }
+
+  // Returns a list of all variant sets matching search criteria.
+  //
+  // For the definitions of variant sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // Implements
+  // [GlobalAllianceApi.searchVariantSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L49).
+  rpc SearchVariantSets(SearchVariantSetsRequest) returns (SearchVariantSetsResponse) {
+    option (google.api.http) = { post: "/v1/variantsets/search" body: "*" };
+  }
+
+  // Deletes a variant set including all variants, call sets, and calls within.
+  // This is not reversible.
+  //
+  // For the definitions of variant sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc DeleteVariantSet(DeleteVariantSetRequest) returns (google.protobuf.Empty) {
+    option (google.api.http) = { delete: "/v1/variantsets/{variant_set_id}" };
+  }
+
+  // Updates a variant set using patch semantics.
+  //
+  // For the definitions of variant sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc UpdateVariantSet(UpdateVariantSetRequest) returns (VariantSet) {
+    option (google.api.http) = { patch: "/v1/variantsets/{variant_set_id}" body: "variant_set" };
+  }
+
+  // Gets a list of variants matching the criteria.
+  //
+  // For the definitions of variants and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // Implements
+  // [GlobalAllianceApi.searchVariants](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L126).
+  rpc SearchVariants(SearchVariantsRequest) returns (SearchVariantsResponse) {
+    option (google.api.http) = { post: "/v1/variants/search" body: "*" };
+  }
+
+  // Creates a new variant.
+  //
+  // For the definitions of variants and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc CreateVariant(CreateVariantRequest) returns (Variant) {
+    option (google.api.http) = { post: "/v1/variants" body: "variant" };
+  }
+
+  // Updates a variant.
+  //
+  // For the definitions of variants and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // This method supports patch semantics. Returns the modified variant without
+  // its calls.
+  rpc UpdateVariant(UpdateVariantRequest) returns (Variant) {
+    option (google.api.http) = { patch: "/v1/variants/{variant_id}" body: "variant" };
+  }
+
+  // Deletes a variant.
+  //
+  // For the definitions of variants and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc DeleteVariant(DeleteVariantRequest) returns (google.protobuf.Empty) {
+    option (google.api.http) = { delete: "/v1/variants/{variant_id}" };
+  }
+
+  // Gets a variant by ID.
+  //
+  // For the definitions of variants and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc GetVariant(GetVariantRequest) returns (Variant) {
+    option (google.api.http) = { get: "/v1/variants/{variant_id}" };
+  }
+
+  // Merges the given variants with existing variants.
+  //
+  // For the definitions of variants and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // Each variant will be
+  // merged with an existing variant that matches its reference sequence,
+  // start, end, reference bases, and alternative bases. If no such variant
+  // exists, a new one will be created.
+  //
+  // When variants are merged, the call information from the new variant
+  // is added to the existing variant. Variant info fields are merged as
+  // specified in the
+  // [infoMergeConfig][google.genomics.v1.MergeVariantsRequest.info_merge_config]
+  // field of the MergeVariantsRequest.
+  //
+  // Please exercise caution when using this method!  It is easy to introduce
+  // mistakes in existing variants and difficult to back out of them.  For
+  // example,
+  // suppose you were trying to merge a new variant with an existing one and
+  // both
+  // variants contain calls that belong to callsets with the same callset ID.
+  //
+  //     // Existing variant - irrelevant fields trimmed for clarity
+  //     {
+  //         "variantSetId": "10473108253681171589",
+  //         "referenceName": "1",
+  //         "start": "10582",
+  //         "referenceBases": "G",
+  //         "alternateBases": [
+  //             "A"
+  //         ],
+  //         "calls": [
+  //             {
+  //                 "callSetId": "10473108253681171589-0",
+  //                 "callSetName": "CALLSET0",
+  //                 "genotype": [
+  //                     0,
+  //                     1
+  //                 ],
+  //             }
+  //         ]
+  //     }
+  //
+  //     // New variant with conflicting call information
+  //     {
+  //         "variantSetId": "10473108253681171589",
+  //         "referenceName": "1",
+  //         "start": "10582",
+  //         "referenceBases": "G",
+  //         "alternateBases": [
+  //             "A"
+  //         ],
+  //         "calls": [
+  //             {
+  //                 "callSetId": "10473108253681171589-0",
+  //                 "callSetName": "CALLSET0",
+  //                 "genotype": [
+  //                     1,
+  //                     1
+  //                 ],
+  //             }
+  //         ]
+  //     }
+  //
+  // The resulting merged variant would overwrite the existing calls with those
+  // from the new variant:
+  //
+  //     {
+  //         "variantSetId": "10473108253681171589",
+  //         "referenceName": "1",
+  //         "start": "10582",
+  //         "referenceBases": "G",
+  //         "alternateBases": [
+  //             "A"
+  //         ],
+  //         "calls": [
+  //             {
+  //                 "callSetId": "10473108253681171589-0",
+  //                 "callSetName": "CALLSET0",
+  //                 "genotype": [
+  //                     1,
+  //                     1
+  //                 ],
+  //             }
+  //         ]
+  //     }
+  //
+  // This may be the desired outcome, but it is up to the user to determine if
+  // if that is indeed the case.
+  rpc MergeVariants(MergeVariantsRequest) returns (google.protobuf.Empty) {
+    option (google.api.http) = { post: "/v1/variants:merge" body: "*" };
+  }
+
+  // Gets a list of call sets matching the criteria.
+  //
+  // For the definitions of call sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // Implements
+  // [GlobalAllianceApi.searchCallSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L178).
+  rpc SearchCallSets(SearchCallSetsRequest) returns (SearchCallSetsResponse) {
+    option (google.api.http) = { post: "/v1/callsets/search" body: "*" };
+  }
+
+  // Creates a new call set.
+  //
+  // For the definitions of call sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc CreateCallSet(CreateCallSetRequest) returns (CallSet) {
+    option (google.api.http) = { post: "/v1/callsets" body: "call_set" };
+  }
+
+  // Updates a call set.
+  //
+  // For the definitions of call sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  //
+  // This method supports patch semantics.
+  rpc UpdateCallSet(UpdateCallSetRequest) returns (CallSet) {
+    option (google.api.http) = { patch: "/v1/callsets/{call_set_id}" body: "call_set" };
+  }
+
+  // Deletes a call set.
+  //
+  // For the definitions of call sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc DeleteCallSet(DeleteCallSetRequest) returns (google.protobuf.Empty) {
+    option (google.api.http) = { delete: "/v1/callsets/{call_set_id}" };
+  }
+
+  // Gets a call set by ID.
+  //
+  // For the definitions of call sets and other genomics resources, see
+  // [Fundamentals of Google
+  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+  rpc GetCallSet(GetCallSetRequest) returns (CallSet) {
+    option (google.api.http) = { get: "/v1/callsets/{call_set_id}" };
+  }
+}
+
+// Metadata describes a single piece of variant call metadata.
+// These data include a top level key and either a single value string (value)
+// or a list of key-value pairs (info.)
+// Value and info are mutually exclusive.
+message VariantSetMetadata {
+  enum Type {
+    TYPE_UNSPECIFIED = 0;
+
+    INTEGER = 1;
+
+    FLOAT = 2;
+
+    FLAG = 3;
+
+    CHARACTER = 4;
+
+    STRING = 5;
+  }
+
+  // The top-level key.
+  string key = 1;
+
+  // The value field for simple metadata
+  string value = 2;
+
+  // User-provided ID field, not enforced by this API.
+  // Two or more pieces of structured metadata with identical
+  // id and key fields are considered equivalent.
+  string id = 4;
+
+  // The type of data. Possible types include: Integer, Float,
+  // Flag, Character, and String.
+  Type type = 5;
+
+  // The number of values that can be included in a field described by this
+  // metadata.
+  string number = 8;
+
+  // A textual description of this metadata.
+  string description = 7;
+
+  // Remaining structured metadata key-value pairs. This must be of the form
+  // map<string, string[]> (string key mapping to a list of string values).
+  map<string, google.protobuf.ListValue> info = 3;
+}
+
+// A variant set is a collection of call sets and variants. It contains summary
+// statistics of those contents. A variant set belongs to a dataset.
+//
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+message VariantSet {
+  // The dataset to which this variant set belongs.
+  string dataset_id = 1;
+
+  // The server-generated variant set ID, unique across all variant sets.
+  string id = 2;
+
+  // The reference set to which the variant set is mapped. The reference set
+  // describes the alignment provenance of the variant set, while the
+  // `referenceBounds` describe the shape of the actual variant data. The
+  // reference set's reference names are a superset of those found in the
+  // `referenceBounds`.
+  //
+  // For example, given a variant set that is mapped to the GRCh38 reference set
+  // and contains a single variant on reference 'X', `referenceBounds` would
+  // contain only an entry for 'X', while the associated reference set
+  // enumerates all possible references: '1', '2', 'X', 'Y', 'MT', etc.
+  string reference_set_id = 6;
+
+  // A list of all references used by the variants in a variant set
+  // with associated coordinate upper bounds for each one.
+  repeated ReferenceBound reference_bounds = 5;
+
+  // The metadata associated with this variant set.
+  repeated VariantSetMetadata metadata = 4;
+
+  // User-specified, mutable name.
+  string name = 7;
+
+  // A textual description of this variant set.
+  string description = 8;
+}
+
+// A variant represents a change in DNA sequence relative to a reference
+// sequence. For example, a variant could represent a SNP or an insertion.
+// Variants belong to a variant set.
+//
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+//
+// Each of the calls on a variant represent a determination of genotype with
+// respect to that variant. For example, a call might assign probability of 0.32
+// to the occurrence of a SNP named rs1234 in a sample named NA12345. A call
+// belongs to a call set, which contains related calls typically from one
+// sample.
+message Variant {
+  // The ID of the variant set this variant belongs to.
+  string variant_set_id = 15;
+
+  // The server-generated variant ID, unique across all variants.
+  string id = 2;
+
+  // Names for the variant, for example a RefSNP ID.
+  repeated string names = 3;
+
+  // The date this variant was created, in milliseconds from the epoch.
+  int64 created = 12;
+
+  // The reference on which this variant occurs.
+  // (such as `chr20` or `X`)
+  string reference_name = 14;
+
+  // The position at which this variant occurs (0-based).
+  // This corresponds to the first base of the string of reference bases.
+  int64 start = 16;
+
+  // The end position (0-based) of this variant. This corresponds to the first
+  // base after the last base in the reference allele. So, the length of
+  // the reference allele is (end - start). This is useful for variants
+  // that don't explicitly give alternate bases, for example large deletions.
+  int64 end = 13;
+
+  // The reference bases for this variant. They start at the given
+  // position.
+  string reference_bases = 6;
+
+  // The bases that appear instead of the reference bases.
+  repeated string alternate_bases = 7;
+
+  // A measure of how likely this variant is to be real.
+  // A higher value is better.
+  double quality = 8;
+
+  // A list of filters (normally quality filters) this variant has failed.
+  // `PASS` indicates this variant has passed all filters.
+  repeated string filter = 9;
+
+  // A map of additional variant information. This must be of the form
+  // map<string, string[]> (string key mapping to a list of string values).
+  map<string, google.protobuf.ListValue> info = 10;
+
+  // The variant calls for this particular variant. Each one represents the
+  // determination of genotype with respect to this variant.
+  repeated VariantCall calls = 11;
+}
+
+// A call represents the determination of genotype with respect to a particular
+// variant. It may include associated information such as quality and phasing.
+// For example, a call might assign a probability of 0.32 to the occurrence of
+// a SNP named rs1234 in a call set with the name NA12345.
+message VariantCall {
+  // The ID of the call set this variant call belongs to.
+  string call_set_id = 8;
+
+  // The name of the call set this variant call belongs to.
+  string call_set_name = 9;
+
+  // The genotype of this variant call. Each value represents either the value
+  // of the `referenceBases` field or a 1-based index into
+  // `alternateBases`. If a variant had a `referenceBases`
+  // value of `T` and an `alternateBases`
+  // value of `["A", "C"]`, and the `genotype` was
+  // `[2, 1]`, that would mean the call
+  // represented the heterozygous value `CA` for this variant.
+  // If the `genotype` was instead `[0, 1]`, the
+  // represented value would be `TA`. Ordering of the
+  // genotype values is important if the `phaseset` is present.
+  // If a genotype is not called (that is, a `.` is present in the
+  // GT string) -1 is returned.
+  repeated int32 genotype = 7;
+
+  // If this field is present, this variant call's genotype ordering implies
+  // the phase of the bases and is consistent with any other variant calls in
+  // the same reference sequence which have the same phaseset value.
+  // When importing data from VCF, if the genotype data was phased but no
+  // phase set was specified this field will be set to `*`.
+  string phaseset = 5;
+
+  // The genotype likelihoods for this variant call. Each array entry
+  // represents how likely a specific genotype is for this call. The value
+  // ordering is defined by the GL tag in the VCF spec.
+  // If Phred-scaled genotype likelihood scores (PL) are available and
+  // log10(P) genotype likelihood scores (GL) are not, PL scores are converted
+  // to GL scores.  If both are available, PL scores are stored in `info`.
+  repeated double genotype_likelihood = 6;
+
+  // A map of additional variant call information. This must be of the form
+  // map<string, string[]> (string key mapping to a list of string values).
+  map<string, google.protobuf.ListValue> info = 2;
+}
+
+// A call set is a collection of variant calls, typically for one sample. It
+// belongs to a variant set.
+//
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+message CallSet {
+  // The server-generated call set ID, unique across all call sets.
+  string id = 1;
+
+  // The call set name.
+  string name = 2;
+
+  // The sample ID this call set corresponds to.
+  string sample_id = 7;
+
+  // The IDs of the variant sets this call set belongs to. This field must
+  // have exactly length one, as a call set belongs to a single variant set.
+  // This field is repeated for compatibility with the
+  // [GA4GH 0.5.1
+  // API](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variants.avdl#L76).
+  repeated string variant_set_ids = 6;
+
+  // The date this call set was created in milliseconds from the epoch.
+  int64 created = 5;
+
+  // A map of additional call set information. This must be of the form
+  // map<string, string[]> (string key mapping to a list of string values).
+  map<string, google.protobuf.ListValue> info = 4;
+}
+
+// ReferenceBound records an upper bound for the starting coordinate of
+// variants in a particular reference.
+message ReferenceBound {
+  // The name of the reference associated with this reference bound.
+  string reference_name = 1;
+
+  // An upper bound (inclusive) on the starting coordinate of any
+  // variant in the reference sequence.
+  int64 upper_bound = 2;
+}
+
+// The variant data import request.
+message ImportVariantsRequest {
+  enum Format {
+    FORMAT_UNSPECIFIED = 0;
+
+    // VCF (Variant Call Format). The VCF files should be uncompressed. gVCF is
+    // also supported.
+    FORMAT_VCF = 1;
+
+    // Complete Genomics masterVarBeta format. The masterVarBeta files should
+    // be bzip2 compressed.
+    FORMAT_COMPLETE_GENOMICS = 2;
+  }
+
+  // Required. The variant set to which variant data should be imported.
+  string variant_set_id = 1;
+
+  // A list of URIs referencing variant files in Google Cloud Storage. URIs can
+  // include wildcards [as described
+  // here](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames).
+  // Note that recursive wildcards ('**') are not supported.
+  repeated string source_uris = 2;
+
+  // The format of the variant data being imported. If unspecified, defaults to
+  // to `VCF`.
+  Format format = 3;
+
+  // Convert reference names to the canonical representation.
+  // hg19 haploytypes (those reference names containing "_hap")
+  // are not modified in any way.
+  // All other reference names are modified according to the following rules:
+  // The reference name is capitalized.
+  // The "chr" prefix is dropped for all autosomes and sex chromsomes.
+  // For example "chr17" becomes "17" and "chrX" becomes "X".
+  // All mitochondrial chromosomes ("chrM", "chrMT", etc) become "MT".
+  bool normalize_reference_names = 5;
+
+  // A mapping between info field keys and the InfoMergeOperations to
+  // be performed on them. This is plumbed down to the MergeVariantRequests
+  // generated by the resulting import job.
+  map<string, InfoMergeOperation> info_merge_config = 6;
+}
+
+// The variant data import response.
+message ImportVariantsResponse {
+  // IDs of the call sets created during the import.
+  repeated string call_set_ids = 1;
+}
+
+// The CreateVariantSet request
+message CreateVariantSetRequest {
+  // Required. The variant set to be created. Must have a valid `datasetId`.
+  VariantSet variant_set = 1;
+}
+
+// The variant data export request.
+message ExportVariantSetRequest {
+  enum Format {
+    FORMAT_UNSPECIFIED = 0;
+
+    // Export the data to Google BigQuery.
+    FORMAT_BIGQUERY = 1;
+  }
+
+  // Required. The ID of the variant set that contains variant data which
+  // should be exported. The caller must have READ access to this variant set.
+  string variant_set_id = 1;
+
+  // If provided, only variant call information from the specified call sets
+  // will be exported. By default all variant calls are exported.
+  repeated string call_set_ids = 2;
+
+  // Required. The Google Cloud project ID that owns the destination
+  // BigQuery dataset. The caller must have WRITE access to this project.  This
+  // project will also own the resulting export job.
+  string project_id = 3;
+
+  // The format for the exported data.
+  Format format = 4;
+
+  // Required. The BigQuery dataset to export data to. This dataset must already
+  // exist. Note that this is distinct from the Genomics concept of "dataset".
+  string bigquery_dataset = 5;
+
+  // Required. The BigQuery table to export data to.
+  // If the table doesn't exist, it will be created. If it already exists, it
+  // will be overwritten.
+  string bigquery_table = 6;
+}
+
+// The variant set request.
+message GetVariantSetRequest {
+  // Required. The ID of the variant set.
+  string variant_set_id = 1;
+}
+
+// The search variant sets request.
+message SearchVariantSetsRequest {
+  // Exactly one dataset ID must be provided here. Only variant sets which
+  // belong to this dataset will be returned.
+  repeated string dataset_ids = 1;
+
+  // The continuation token, which is used to page through large result sets.
+  // To get the next page of results, set this parameter to the value of
+  // `nextPageToken` from the previous response.
+  string page_token = 2;
+
+  // The maximum number of results to return in a single page. If unspecified,
+  // defaults to 1024.
+  int32 page_size = 3;
+}
+
+// The search variant sets response.
+message SearchVariantSetsResponse {
+  // The variant sets belonging to the requested dataset.
+  repeated VariantSet variant_sets = 1;
+
+  // The continuation token, which is used to page through large result sets.
+  // Provide this value in a subsequent request to return the next page of
+  // results. This field will be empty if there aren't any additional results.
+  string next_page_token = 2;
+}
+
+// The delete variant set request.
+message DeleteVariantSetRequest {
+  // The ID of the variant set to be deleted.
+  string variant_set_id = 1;
+}
+
+message UpdateVariantSetRequest {
+  // The ID of the variant to be updated (must already exist).
+  string variant_set_id = 1;
+
+  // The new variant data. Only the variant_set.metadata will be considered
+  // for update.
+  VariantSet variant_set = 2;
+
+  // An optional mask specifying which fields to update. Supported fields:
+  //
+  // * [metadata][google.genomics.v1.VariantSet.metadata].
+  // * [name][google.genomics.v1.VariantSet.name].
+  // * [description][google.genomics.v1.VariantSet.description].
+  //
+  // Leaving `updateMask` unset is equivalent to specifying all mutable
+  // fields.
+  google.protobuf.FieldMask update_mask = 5;
+}
+
+// The variant search request.
+message SearchVariantsRequest {
+  // At most one variant set ID must be provided. Only variants from this
+  // variant set will be returned. If omitted, a call set id must be included in
+  // the request.
+  repeated string variant_set_ids = 1;
+
+  // Only return variants which have exactly this name.
+  string variant_name = 2;
+
+  // Only return variant calls which belong to call sets with these ids.
+  // Leaving this blank returns all variant calls. If a variant has no
+  // calls belonging to any of these call sets, it won't be returned at all.
+  // Currently, variants with no calls from any call set will never be returned.
+  repeated string call_set_ids = 3;
+
+  // Required. Only return variants in this reference sequence.
+  string reference_name = 4;
+
+  // The beginning of the window (0-based, inclusive) for which
+  // overlapping variants should be returned. If unspecified, defaults to 0.
+  int64 start = 5;
+
+  // The end of the window, 0-based exclusive. If unspecified or 0, defaults to
+  // the length of the reference.
+  int64 end = 6;
+
+  // The continuation token, which is used to page through large result sets.
+  // To get the next page of results, set this parameter to the value of
+  // `nextPageToken` from the previous response.
+  string page_token = 7;
+
+  // The maximum number of variants to return in a single page. If unspecified,
+  // defaults to 5000. The maximum value is 10000.
+  int32 page_size = 8;
+
+  // The maximum number of calls to return in a single page. Note that this
+  // limit may be exceeded in the event that a matching variant contains more
+  // calls than the requested maximum. If unspecified, defaults to 5000. The
+  // maximum value is 10000.
+  int32 max_calls = 9;
+}
+
+// The variant search response.
+message SearchVariantsResponse {
+  // The list of matching Variants.
+  repeated Variant variants = 1;
+
+  // The continuation token, which is used to page through large result sets.
+  // Provide this value in a subsequent request to return the next page of
+  // results. This field will be empty if there aren't any additional results.
+  string next_page_token = 2;
+}
+
+message CreateVariantRequest {
+  // The variant to be created.
+  Variant variant = 1;
+}
+
+message UpdateVariantRequest {
+  // The ID of the variant to be updated.
+  string variant_id = 1;
+
+  // The new variant data.
+  Variant variant = 2;
+
+  // An optional mask specifying which fields to update. At this time, mutable
+  // fields are [names][google.genomics.v1.Variant.names] and
+  // [info][google.genomics.v1.Variant.info]. Acceptable values are "names" and
+  // "info". If unspecified, all mutable fields will be updated.
+  google.protobuf.FieldMask update_mask = 3;
+}
+
+message DeleteVariantRequest {
+  // The ID of the variant to be deleted.
+  string variant_id = 1;
+}
+
+message GetVariantRequest {
+  // The ID of the variant.
+  string variant_id = 1;
+}
+
+message MergeVariantsRequest {
+  // The destination variant set.
+  string variant_set_id = 1;
+
+  // The variants to be merged with existing variants.
+  repeated Variant variants = 2;
+
+  // A mapping between info field keys and the InfoMergeOperations to
+  // be performed on them.
+  map<string, InfoMergeOperation> info_merge_config = 3;
+}
+
+// The call set search request.
+message SearchCallSetsRequest {
+  // Restrict the query to call sets within the given variant sets. At least one
+  // ID must be provided.
+  repeated string variant_set_ids = 1;
+
+  // Only return call sets for which a substring of the name matches this
+  // string.
+  string name = 2;
+
+  // The continuation token, which is used to page through large result sets.
+  // To get the next page of results, set this parameter to the value of
+  // `nextPageToken` from the previous response.
+  string page_token = 3;
+
+  // The maximum number of results to return in a single page. If unspecified,
+  // defaults to 1024.
+  int32 page_size = 4;
+}
+
+// The call set search response.
+message SearchCallSetsResponse {
+  // The list of matching call sets.
+  repeated CallSet call_sets = 1;
+
+  // The continuation token, which is used to page through large result sets.
+  // Provide this value in a subsequent request to return the next page of
+  // results. This field will be empty if there aren't any additional results.
+  string next_page_token = 2;
+}
+
+message CreateCallSetRequest {
+  // The call set to be created.
+  CallSet call_set = 1;
+}
+
+message UpdateCallSetRequest {
+  // The ID of the call set to be updated.
+  string call_set_id = 1;
+
+  // The new call set data.
+  CallSet call_set = 2;
+
+  // An optional mask specifying which fields to update. At this time, the only
+  // mutable field is [name][google.genomics.v1.CallSet.name]. The only
+  // acceptable value is "name". If unspecified, all mutable fields will be
+  // updated.
+  google.protobuf.FieldMask update_mask = 3;
+}
+
+message DeleteCallSetRequest {
+  // The ID of the call set to be deleted.
+  string call_set_id = 1;
+}
+
+message GetCallSetRequest {
+  // The ID of the call set.
+  string call_set_id = 1;
+}
+
+// The stream variants request.
+message StreamVariantsRequest {
+  // The Google Developers Console project ID or number which will be billed
+  // for this access. The caller must have WRITE access to this project.
+  // Required.
+  string project_id = 1;
+
+  // The variant set ID from which to stream variants.
+  string variant_set_id = 2;
+
+  // Only return variant calls which belong to call sets with these IDs.
+  // Leaving this blank returns all variant calls.
+  repeated string call_set_ids = 3;
+
+  // Required. Only return variants in this reference sequence.
+  string reference_name = 4;
+
+  // The beginning of the window (0-based, inclusive) for which
+  // overlapping variants should be returned.
+  int64 start = 5;
+
+  // The end of the window (0-based, exclusive) for which overlapping
+  // variants should be returned.
+  int64 end = 6;
+}
+
+message StreamVariantsResponse {
+  repeated Variant variants = 1;
+}
+
+// Operations to be performed during import on Variant info fields.
+// These operations are set for each info field in the info_merge_config
+// map of ImportVariantsRequest, which is plumbed down to the
+// MergeVariantRequests generated by the import job.
+enum InfoMergeOperation {
+  INFO_MERGE_OPERATION_UNSPECIFIED = 0;
+
+  // By default, Variant info fields are persisted if the Variant doesn't
+  // already exist in the variantset.  If the Variant is equivalent to a
+  // Variant already in the variantset, the incoming Variant's info field
+  // is ignored in favor of that of the already persisted Variant.
+  IGNORE_NEW = 1;
+
+  // This operation removes an info field from the incoming Variant
+  // and persists this info field in each of the incoming Variant's Calls.
+  MOVE_TO_CALLS = 2;
+}
--- a/google/genomics/v1alpha2/pipelines.proto
+++ b/google/genomics/v1alpha2/pipelines.proto
@ -0,0 +1,586 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.genomics.v1alpha2;
+
+import "google/api/annotations.proto";
+import "google/longrunning/operations.proto";
+import "google/protobuf/empty.proto";
+import "google/protobuf/timestamp.proto";
+import "google/rpc/code.proto";
+
+option cc_enable_arenas = true;
+option java_multiple_files = true;
+option java_outer_classname = "PipelinesProto";
+option java_package = "com.google.genomics.v1a";
+
+
+// A service for running genomics pipelines.
+service PipelinesV1Alpha2 {
+  // Creates a pipeline that can be run later. Create takes a Pipeline that
+  // has all fields other than `pipelineId` populated, and then returns
+  // the same pipeline with `pipelineId` populated. This id can be used
+  // to run the pipeline.
+  //
+  // Caller must have WRITE permission to the project.
+  rpc CreatePipeline(CreatePipelineRequest) returns (Pipeline) {
+    option (google.api.http) = { post: "/v1alpha2/pipelines" body: "pipeline" };
+  }
+
+  // Runs a pipeline. If `pipelineId` is specified in the request, then
+  // run a saved pipeline. If `ephemeralPipeline` is specified, then run
+  // that pipeline once without saving a copy.
+  //
+  // The caller must have READ permission to the project where the pipeline
+  // is stored and WRITE permission to the project where the pipeline will be
+  // run, as VMs will be created and storage will be used.
+  rpc RunPipeline(RunPipelineRequest) returns (google.longrunning.Operation) {
+    option (google.api.http) = { post: "/v1alpha2/pipelines:run" body: "*" };
+  }
+
+  // Retrieves a pipeline based on ID.
+  //
+  // Caller must have READ permission to the project.
+  rpc GetPipeline(GetPipelineRequest) returns (Pipeline) {
+    option (google.api.http) = { get: "/v1alpha2/pipelines/{pipeline_id}" };
+  }
+
+  // Lists pipelines.
+  //
+  // Caller must have READ permission to the project.
+  rpc ListPipelines(ListPipelinesRequest) returns (ListPipelinesResponse) {
+    option (google.api.http) = { get: "/v1alpha2/pipelines" };
+  }
+
+  // Deletes a pipeline based on ID.
+  //
+  // Caller must have WRITE permission to the project.
+  rpc DeletePipeline(DeletePipelineRequest) returns (google.protobuf.Empty) {
+    option (google.api.http) = { delete: "/v1alpha2/pipelines/{pipeline_id}" };
+  }
+
+  // Gets controller configuration information. Should only be called
+  // by VMs created by the Pipelines Service and not by end users.
+  rpc GetControllerConfig(GetControllerConfigRequest) returns (ControllerConfig) {
+    option (google.api.http) = { get: "/v1alpha2/pipelines:getControllerConfig" };
+  }
+
+  // Sets status of a given operation. All timestamps are sent on each
+  // call, and the whole series of events is replaced, in case
+  // intermediate calls are lost. Should only be called by VMs created
+  // by the Pipelines Service and not by end users.
+  rpc SetOperationStatus(SetOperationStatusRequest) returns (google.protobuf.Empty) {
+    option (google.api.http) = { put: "/v1alpha2/pipelines:setOperationStatus" body: "*" };
+  }
+}
+
+// Describes a GCE resource that is being managed by a running
+// [pipeline][google.genomics.v1alpha2.Pipeline].
+message GCE {
+  // The instance on which the operation is running.
+  string instance_name = 1;
+
+  // The availability zone in which the instance resides.
+  string zone = 2;
+
+  // The machine type of the instance.
+  string machine_type = 3;
+
+  // The names of the disks that were created for this pipeline.
+  repeated string disk_names = 4;
+}
+
+// Runtime metadata that will be populated in the
+// [runtimeMetadata][google.genomics.v1.OperationMetadata.runtime_metadata]
+// field of the Operation associated with a RunPipeline execution.
+message RuntimeMetadata {
+  // Execution information specific to Google Compute Engine.
+  GCE gce = 1;
+}
+
+// The pipeline object. Represents a transformation from a set of input
+// parameters to a set of output parameters. The transformation is defined
+// as a docker image and command to run within that image. Each pipeline
+// is run on a Google Compute Engine VM. A pipeline can be created with the
+// `create` method and then later run with the `run` method, or a pipeline can
+// be defined and run all at once with the `run` method.
+message Pipeline {
+  // Required. The project in which to create the pipeline. The caller must have
+  // WRITE access.
+  string project_id = 1;
+
+  // Required. A user specified pipeline name that does not have to be unique.
+  // This name can be used for filtering Pipelines in ListPipelines.
+  string name = 2;
+
+  // User-specified description.
+  string description = 3;
+
+  // Input parameters of the pipeline.
+  repeated PipelineParameter input_parameters = 8;
+
+  // Output parameters of the pipeline.
+  repeated PipelineParameter output_parameters = 9;
+
+  // Required. The executor indicates in which environment the pipeline runs.
+  oneof executor {
+    // Specifies the docker run information.
+    DockerExecutor docker = 5;
+  }
+
+  // Required. Specifies resource requirements for the pipeline run.
+  // Required fields:
+  //
+  // *
+  // [minimumCpuCores][google.genomics.v1alpha2.PipelineResources.minimum_cpu_cores]
+  //
+  // *
+  // [minimumRamGb][google.genomics.v1alpha2.PipelineResources.minimum_ram_gb]
+  PipelineResources resources = 6;
+
+  // Unique pipeline id that is generated by the service when CreatePipeline
+  // is called. Cannot be specified in the Pipeline used in the
+  // CreatePipelineRequest, and will be populated in the response to
+  // CreatePipeline and all subsequent Get and List calls. Indicates that the
+  // service has registered this pipeline.
+  string pipeline_id = 7;
+}
+
+// The request to create a pipeline. The pipeline field here should not have
+// `pipelineId` populated, as that will be populated by the server.
+message CreatePipelineRequest {
+  // The pipeline to create. Should not have `pipelineId` populated.
+  Pipeline pipeline = 1;
+}
+
+// The pipeline run arguments.
+message RunPipelineArgs {
+  // Required. The project in which to run the pipeline. The caller must have
+  // WRITER access to all Google Cloud services and resources (e.g. Google
+  // Compute Engine) will be used.
+  string project_id = 1;
+
+  // Pipeline input arguments; keys are defined in the pipeline documentation.
+  // All input parameters that do not have default values  must be specified.
+  // If parameters with defaults are specified here, the defaults will be
+  // overridden.
+  map<string, string> inputs = 2;
+
+  // Pipeline output arguments; keys are defined in the pipeline
+  // documentation.  All output parameters of without default values
+  // must be specified.  If parameters with defaults are specified
+  // here, the defaults will be overridden.
+  map<string, string> outputs = 3;
+
+  // The Google Cloud Service Account that will be used to access data and
+  // services. By default, the compute service account associated with
+  // `projectId` is used.
+  ServiceAccount service_account = 4;
+
+  // Client-specified pipeline operation identifier.
+  string client_id = 5;
+
+  // Specifies resource requirements/overrides for the pipeline run.
+  PipelineResources resources = 6;
+
+  // Required. Logging options. Used by the service to communicate results
+  // to the user.
+  LoggingOptions logging = 7;
+}
+
+// The request to run a pipeline. If `pipelineId` is specified, it
+// refers to a saved pipeline created with CreatePipeline and set as
+// the `pipelineId` of the returned Pipeline object. If
+// `ephemeralPipeline` is specified, that pipeline is run once
+// with the given args and not saved. It is an error to specify both
+// `pipelineId` and `ephemeralPipeline`. `pipelineArgs`
+// must be specified.
+message RunPipelineRequest {
+  oneof pipeline {
+    // The already created pipeline to run.
+    string pipeline_id = 1;
+
+    // A new pipeline object to run once and then delete.
+    Pipeline ephemeral_pipeline = 2;
+  }
+
+  // The arguments to use when running this pipeline.
+  RunPipelineArgs pipeline_args = 3;
+}
+
+// A request to get a saved pipeline by id.
+message GetPipelineRequest {
+  // Caller must have READ access to the project in which this pipeline
+  // is defined.
+  string pipeline_id = 1;
+}
+
+// A request to list pipelines in a given project. Pipelines can be
+// filtered by name using `namePrefix`: all pipelines with names that
+// begin with `namePrefix` will be returned. Uses standard pagination:
+// `pageSize` indicates how many pipelines to return, and
+// `pageToken` comes from a previous ListPipelinesResponse to
+// indicate offset.
+message ListPipelinesRequest {
+  // Required. The name of the project to search for pipelines. Caller
+  // must have READ access to this project.
+  string project_id = 1;
+
+  // Pipelines with names that match this prefix should be
+  // returned.  If unspecified, all pipelines in the project, up to
+  // `pageSize`, will be returned.
+  string name_prefix = 2;
+
+  // Number of pipelines to return at once. Defaults to 256, and max
+  // is 2048.
+  int32 page_size = 3;
+
+  // Token to use to indicate where to start getting results.
+  // If unspecified, returns the first page of results.
+  string page_token = 4;
+}
+
+// The response of ListPipelines. Contains at most `pageSize`
+// pipelines. If it contains `pageSize` pipelines, and more pipelines
+// exist, then `nextPageToken` will be populated and should be
+// used as the `pageToken` argument to a subsequent ListPipelines
+// request.
+message ListPipelinesResponse {
+  // The matched pipelines.
+  repeated Pipeline pipelines = 1;
+
+  // The token to use to get the next page of results.
+  string next_page_token = 2;
+}
+
+// The request to delete a saved pipeline by ID.
+message DeletePipelineRequest {
+  // Caller must have WRITE access to the project in which this pipeline
+  // is defined.
+  string pipeline_id = 1;
+}
+
+// Request to get controller configuation.  Should only be used
+// by VMs created by the Pipelines Service and not by end users.
+message GetControllerConfigRequest {
+  // The operation to retrieve controller configuration for.
+  string operation_id = 1;
+
+  uint64 validation_token = 2;
+}
+
+// Stores the information that the controller will fetch from the
+// server in order to run. Should only be used by VMs created by the
+// Pipelines Service and not by end users.
+message ControllerConfig {
+  message RepeatedString {
+    repeated string values = 1;
+  }
+
+  string image = 1;
+
+  string cmd = 2;
+
+  string gcs_log_path = 3;
+
+  string machine_type = 4;
+
+  map<string, string> vars = 5;
+
+  map<string, string> disks = 6;
+
+  map<string, RepeatedString> gcs_sources = 7;
+
+  map<string, RepeatedString> gcs_sinks = 8;
+}
+
+// Stores the list of events and times they occured for major events in job
+// execution.
+message TimestampEvent {
+  // String indicating the type of event
+  string description = 1;
+
+  // The time this event occured.
+  google.protobuf.Timestamp timestamp = 2;
+}
+
+// Request to set operation status. Should only be used by VMs
+// created by the Pipelines Service and not by end users.
+message SetOperationStatusRequest {
+  string operation_id = 1;
+
+  repeated TimestampEvent timestamp_events = 2;
+
+  google.rpc.Code error_code = 3;
+
+  string error_message = 4;
+
+  uint64 validation_token = 5;
+}
+
+// A Google Cloud Service Account.
+message ServiceAccount {
+  // Email address of the service account. Defaults to `default`,
+  // which uses the compute service account associated with the project.
+  string email = 1;
+
+  // List of scopes to be enabled for this service account on the
+  // pipeline virtual machine.
+  // The following scopes are automatically included:
+  // * https://www.googleapis.com/auth/genomics
+  // * https://www.googleapis.com/auth/compute
+  // * https://www.googleapis.com/auth/devstorage.full_control
+  repeated string scopes = 2;
+}
+
+// The logging options for the pipeline run.
+message LoggingOptions {
+  // The location in Google Cloud Storage to which the pipeline logs
+  // will be copied. Can be specified as a fully qualified directory
+  // path, in which case logs will be output with a unique identifier
+  // as the filename in that directory, or as a fully specified path,
+  // which must end in `.log`, in which case that path will be
+  // used, and the user must ensure that logs are not
+  // overwritten. Stdout and stderr logs from the run are also
+  // generated and output as `-stdout.log` and `-stderr.log`.
+  string gcs_path = 1;
+}
+
+// The system resources for the pipeline run.
+message PipelineResources {
+  // A Google Compute Engine disk resource specification.
+  message Disk {
+    // The types of disks that may be attached to VMs.
+    enum Type {
+      // Default disk type. Use one of the other options below.
+      TYPE_UNSPECIFIED = 0;
+
+      // Specifies a Google Compute Engine persistent hard disk. See
+      // https://cloud.google.com/compute/docs/disks/persistent-disks#typeofdisks
+      // for details.
+      PERSISTENT_HDD = 1;
+
+      // Specifies a Google Compute Engine persistent solid-state disk. See
+      // https://cloud.google.com/compute/docs/disks/persistent-disks#typeofdisks
+      // for details.
+      PERSISTENT_SSD = 2;
+
+      // Specifies a Google Compute Engine local SSD.
+      // See https://cloud.google.com/compute/docs/disks/local-ssd for details.
+      LOCAL_SSD = 3;
+    }
+
+    // Required. The name of the disk that can be used in the pipeline
+    // parameters. Must be 1 - 63 characters.
+    // The name "boot" is reserved for system use.
+    string name = 1;
+
+    // Required. The type of the disk to create.
+    Type type = 2;
+
+    // The size of the disk. Defaults to 500 (GB).
+    // This field is not applicable for local SSD.
+    int32 size_gb = 3;
+
+    // The full or partial URL of the persistent disk to attach. See
+    // https://cloud.google.com/compute/docs/reference/latest/instances#resource
+    // and
+    // https://cloud.google.com/compute/docs/disks/persistent-disks#snapshots
+    // for more details.
+    string source = 4;
+
+    // Specifies whether or not to delete the disk when the pipeline
+    // completes. This field is applicable only for newly created disks. See
+    // https://cloud.google.com/compute/docs/reference/latest/instances#resource
+    // for more details.
+    // By default, `autoDelete` is `false`. `autoDelete` will be enabled if set
+    // to `true` at create time or run time.
+    bool auto_delete = 6;
+
+    // Specifies how a sourced-base persistent disk will be mounted. See
+    // https://cloud.google.com/compute/docs/disks/persistent-disks#use_multi_instances
+    // for more details.
+    // Can only be set at create time.
+    bool read_only = 7;
+
+    // Required at create time and cannot be overridden at run time.
+    // Specifies the path in the docker container where files on
+    // this disk should be located. For example, if `mountPoint`
+    // is `/mnt/disk`, and the parameter has `localPath`
+    // `inputs/file.txt`, the docker container can access the data at
+    // `/mnt/disk/inputs/file.txt`.
+    string mount_point = 8;
+  }
+
+  // The minimum number of cores to use. Defaults to 1.
+  int32 minimum_cpu_cores = 1;
+
+  // At create time means that preemptible machines may be
+  // used for the run. At run time, means they should be used. Cannot
+  // be true at run time if false at create time.
+  // Defaults to `false`.
+  bool preemptible = 2;
+
+  // The minimum amount of RAM to use. Defaults to 3.75 (GB)
+  double minimum_ram_gb = 3;
+
+  // Disks to attach.
+  repeated Disk disks = 4;
+
+  // List of Google Compute Engine availability zones to which resource
+  // creation will restricted. If empty, any zone may be chosen.
+  repeated string zones = 5;
+
+  // The size of the boot disk. Defaults to 10 (GB).
+  int32 boot_disk_size_gb = 6;
+}
+
+// Parameters facilitate setting and delivering data into the
+// pipeline's execution environment. They are defined at create time,
+// with optional defaults, and can be overridden at run time.
+//
+// If `localCopy` is unset, then the parameter specifies a string that
+// is passed as-is into the pipeline, as the value of the environment
+// variable with the given name.  A default value can be optionally
+// specified at create time. The default can be overridden at run time
+// using the inputs map. If no default is given, a value must be
+// supplied at runtime.
+//
+// If `localCopy` is defined, then the parameter specifies a data
+// source or sink, both in Google Cloud Storage and on the Docker container
+// where the pipeline computation is run. The [service account associated with
+// the Pipeline][google.genomics.v1alpha2.RunPipelineArgs.service_account] (by
+// default the project's Compute Engine service account) must have access to the
+// Google Cloud Storage paths.
+//
+// At run time, the Google Cloud Storage paths can be overridden if a default
+// was provided at create time, or must be set otherwise. The pipeline runner
+// should add a key/value pair to either the inputs or outputs map. The
+// indicated data copies will be carried out before/after pipeline execution,
+// just as if the corresponding arguments were provided to `gsutil cp`.
+//
+// For example: Given the following `PipelineParameter`, specified
+// in the `inputParameters` list:
+//
+// ```
+// {name: "input_file", localCopy: {path: "file.txt", disk: "pd1"}}
+// ```
+//
+// where `disk` is defined in the `PipelineResources` object as:
+//
+// ```
+// {name: "pd1", mountPoint: "/mnt/disk/"}
+// ```
+//
+// We create a disk named `pd1`, mount it on the host VM, and map
+// `/mnt/pd1` to `/mnt/disk` in the docker container.  At
+// runtime, an entry for `input_file` would be required in the inputs
+// map, such as:
+//
+// ```
+//   inputs["input_file"] = "gs://my-bucket/bar.txt"
+// ```
+//
+// This would generate the following gsutil call:
+//
+// ```
+//   gsutil cp gs://my-bucket/bar.txt /mnt/pd1/file.txt
+// ```
+//
+// The file `/mnt/pd1/file.txt` maps to `/mnt/disk/file.txt` in the
+// Docker container. Acceptable paths are:
+//
+// <table>
+//   <thead>
+//     <tr><th>Google Cloud storage path</th><th>Local path</th></tr>
+//   </thead>
+//   <tbody>
+//     <tr><td>file</td><td>file</td></tr>
+//     <tr><td>glob</td><td>directory</td></tr>
+//   </tbody>
+// </table>
+//
+// For outputs, the direction of the copy is reversed:
+//
+// ```
+//   gsutil cp /mnt/disk/file.txt gs://my-bucket/bar.txt
+// ```
+//
+// Acceptable paths are:
+//
+// <table>
+//   <thead>
+//     <tr><th>Local path</th><th>Google Cloud Storage path</th></tr>
+//   </thead>
+//   <tbody>
+//     <tr><td>file</td><td>file</td></tr>
+//     <tr>
+//       <td>file</td>
+//       <td>directory - directory must already exist</td>
+//     </tr>
+//     <tr>
+//       <td>glob</td>
+//       <td>directory - directory will be created if it doesn't exist</td></tr>
+//   </tbody>
+// </table>
+//
+// One restriction due to docker limitations, is that for outputs that are found
+// on the boot disk, the local path cannot be a glob and must be a file.
+message PipelineParameter {
+  // LocalCopy defines how a remote file should be copied to and from the VM.
+  message LocalCopy {
+    // Required. The path within the user's docker container where
+    // this input should be localized to and from, relative to the specified
+    // disk's mount point. For example: file.txt,
+    string path = 1;
+
+    // Required. The name of the disk where this parameter is
+    // located. Can be the name of one of the disks specified in the
+    // Resources field, or "boot", which represents the Docker
+    // instance's boot disk and has a mount point of `/`.
+    string disk = 2;
+  }
+
+  // Required. Name of the parameter - the pipeline runner uses this string
+  // as the key to the input and output maps in RunPipeline.
+  string name = 1;
+
+  // Human-readable description.
+  string description = 2;
+
+  // The default value for this parameter. Can be overridden at runtime.
+  // If `localCopy` is present, then this must be a Google Cloud Storage path
+  // beginning with `gs://`.
+  string default_value = 5;
+
+  // If present, this parameter is marked for copying to and from the VM.
+  // `LocalCopy` indicates where on the VM the file should be. The value
+  // given to this parameter (either at runtime or using `defaultValue`)
+  // must be the remote path where the file should be.
+  LocalCopy local_copy = 6;
+}
+
+// The Docker execuctor specification.
+message DockerExecutor {
+  // Required. Image name from either Docker Hub or Google Container Repository.
+  // Users that run pipelines must have READ access to the image.
+  string image_name = 1;
+
+  // Required. The command string to run. Parameters that do not have
+  // `localCopy` specified should be used as environment variables, while
+  // those that do can be accessed at the defined paths.
+  string cmd = 2;
+}