Merge pull request #8666 from dgquintas/rr_fixall

Fixed wrong connectivity status updates for RR
pull/8760/head
David G. Quintas 8 years ago committed by GitHub
commit a1cf4dd607
  1. 2
      include/grpc/impl/codegen/connectivity_state.h
  2. 2
      src/core/ext/lb_policy/pick_first/pick_first.c
  3. 313
      src/core/ext/lb_policy/round_robin/round_robin.c
  4. 3
      src/core/lib/transport/connectivity_state.c
  5. 325
      test/core/client_channel/lb_policies_test.c

@ -40,6 +40,8 @@ extern "C" {
/** Connectivity state of a channel. */
typedef enum {
/** channel has just been initialized */
GRPC_CHANNEL_INIT = -1,
/** channel is idle */
GRPC_CHANNEL_IDLE,
/** channel is connecting */

@ -292,6 +292,8 @@ static void pf_connectivity_changed(grpc_exec_ctx *exec_ctx, void *arg,
} else {
loop:
switch (p->checking_connectivity) {
case GRPC_CHANNEL_INIT:
GPR_UNREACHABLE_CODE();
case GRPC_CHANNEL_READY:
grpc_connectivity_state_set(exec_ctx, &p->state_tracker,
GRPC_CHANNEL_READY, GRPC_ERROR_NONE,

@ -116,8 +116,13 @@ typedef struct {
grpc_closure connectivity_changed_closure;
/** this subchannels current position in subchannel->ready_list */
ready_list *ready_list_node;
/** last observed connectivity */
grpc_connectivity_state connectivity_state;
/** last observed connectivity. Not updated by
* \a grpc_subchannel_notify_on_state_change. Used to determine the previous
* state while processing the new state in \a rr_connectivity_changed */
grpc_connectivity_state prev_connectivity_state;
/** current connectivity state. Updated by \a
* grpc_subchannel_notify_on_state_change */
grpc_connectivity_state curr_connectivity_state;
/** the subchannel's target user data */
void *user_data;
/** vtable to operate over \a user_data */
@ -127,6 +132,7 @@ typedef struct {
struct round_robin_lb_policy {
/** base policy: must be first */
grpc_lb_policy base;
gpr_mu mu;
/** total number of addresses received at creation time */
size_t num_addresses;
@ -135,8 +141,11 @@ struct round_robin_lb_policy {
size_t num_subchannels;
subchannel_data **subchannels;
/** mutex protecting remaining members */
gpr_mu mu;
/** how many subchannels are in TRANSIENT_FAILURE */
size_t num_transient_failures;
/** how many subchannels are IDLE */
size_t num_idle;
/** have we started picking? */
int started_picking;
/** are we shutting down? */
@ -258,6 +267,10 @@ static void remove_disconnected_sc_locked(round_robin_lb_policy *p,
gpr_free(node);
}
static bool is_ready_list_empty(round_robin_lb_policy *p) {
return p->ready_list.prev == NULL;
}
static void rr_destroy(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
round_robin_lb_policy *p = (round_robin_lb_policy *)pol;
ready_list *elem;
@ -268,7 +281,7 @@ static void rr_destroy(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
for (size_t i = 0; i < p->num_subchannels; i++) {
subchannel_data *sd = p->subchannels[i];
GRPC_SUBCHANNEL_UNREF(exec_ctx, sd->subchannel, "round_robin_destroy");
GRPC_SUBCHANNEL_UNREF(exec_ctx, sd->subchannel, "rr_destroy");
if (sd->user_data != NULL) {
GPR_ASSERT(sd->user_data_vtable != NULL);
sd->user_data_vtable->destroy(sd->user_data);
@ -381,18 +394,18 @@ static void start_picking(grpc_exec_ctx *exec_ctx, round_robin_lb_policy *p) {
size_t i;
p->started_picking = 1;
if (grpc_lb_round_robin_trace) {
gpr_log(GPR_DEBUG, "LB_POLICY: p=%p num_subchannels=%" PRIuPTR, (void *)p,
p->num_subchannels);
}
for (i = 0; i < p->num_subchannels; i++) {
subchannel_data *sd = p->subchannels[i];
sd->connectivity_state = GRPC_CHANNEL_IDLE;
/* use some sentinel value outside of the range of grpc_connectivity_state
* to signal an undefined previous state. We won't be referring to this
* value again and it'll be overwritten after the first call to
* rr_connectivity_changed */
sd->prev_connectivity_state = GRPC_CHANNEL_INIT;
sd->curr_connectivity_state = GRPC_CHANNEL_IDLE;
GRPC_LB_POLICY_WEAK_REF(&p->base, "rr_connectivity");
grpc_subchannel_notify_on_state_change(
exec_ctx, sd->subchannel, p->base.interested_parties,
&sd->connectivity_state, &sd->connectivity_changed_closure);
GRPC_LB_POLICY_WEAK_REF(&p->base, "round_robin_connectivity");
&sd->curr_connectivity_state, &sd->connectivity_changed_closure);
}
}
@ -422,7 +435,7 @@ static int rr_pick(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
/* readily available, report right away */
*target = GRPC_CONNECTED_SUBCHANNEL_REF(
grpc_subchannel_get_connected_subchannel(selected->subchannel),
"picked");
"rr_picked");
if (user_data != NULL) {
*user_data = selected->user_data;
@ -453,125 +466,184 @@ static int rr_pick(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
}
}
static void update_state_counters(subchannel_data *sd) {
round_robin_lb_policy *p = sd->policy;
/* update p->num_transient_failures (resp. p->num_idle): if the previous
* state was TRANSIENT_FAILURE (resp. IDLE), decrement
* p->num_transient_failures (resp. p->num_idle). */
if (sd->prev_connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
GPR_ASSERT(p->num_transient_failures > 0);
--p->num_transient_failures;
} else if (sd->prev_connectivity_state == GRPC_CHANNEL_IDLE) {
GPR_ASSERT(p->num_idle > 0);
--p->num_idle;
}
}
/* sd is the subchannel_data associted with the updated subchannel.
* shutdown_error will only be used upon policy transition to TRANSIENT_FAILURE
* or SHUTDOWN */
static grpc_connectivity_state update_lb_connectivity_status(
grpc_exec_ctx *exec_ctx, subchannel_data *sd, grpc_error *error) {
/* In priority order. The first rule to match terminates the search (ie, if we
* are on rule n, all previous rules were unfulfilled).
*
* 1) RULE: ANY subchannel is READY => policy is READY.
* CHECK: At least one subchannel is ready iff p->ready_list is NOT empty.
*
* 2) RULE: ANY subchannel is CONNECTING => policy is CONNECTING.
* CHECK: sd->curr_connectivity_state == CONNECTING.
*
* 3) RULE: ALL subchannels are SHUTDOWN => policy is SHUTDOWN.
* CHECK: p->num_subchannels = 0.
*
* 4) RULE: ALL subchannels are TRANSIENT_FAILURE => policy is
* TRANSIENT_FAILURE.
* CHECK: p->num_transient_failures == p->num_subchannels.
*
* 5) RULE: ALL subchannels are IDLE => policy is IDLE.
* CHECK: p->num_idle == p->num_subchannels.
*/
round_robin_lb_policy *p = sd->policy;
if (!is_ready_list_empty(p)) { /* 1) READY */
grpc_connectivity_state_set(exec_ctx, &p->state_tracker, GRPC_CHANNEL_READY,
GRPC_ERROR_NONE, "rr_ready");
return GRPC_CHANNEL_READY;
} else if (sd->curr_connectivity_state ==
GRPC_CHANNEL_CONNECTING) { /* 2) CONNECTING */
grpc_connectivity_state_set(exec_ctx, &p->state_tracker,
GRPC_CHANNEL_CONNECTING, GRPC_ERROR_NONE,
"rr_connecting");
return GRPC_CHANNEL_CONNECTING;
} else if (p->num_subchannels == 0) { /* 3) SHUTDOWN */
grpc_connectivity_state_set(exec_ctx, &p->state_tracker,
GRPC_CHANNEL_SHUTDOWN, GRPC_ERROR_REF(error),
"rr_shutdown");
return GRPC_CHANNEL_SHUTDOWN;
} else if (p->num_transient_failures ==
p->num_subchannels) { /* 4) TRANSIENT_FAILURE */
grpc_connectivity_state_set(exec_ctx, &p->state_tracker,
GRPC_CHANNEL_TRANSIENT_FAILURE,
GRPC_ERROR_REF(error), "rr_transient_failure");
return GRPC_CHANNEL_TRANSIENT_FAILURE;
} else if (p->num_idle == p->num_subchannels) { /* 5) IDLE */
grpc_connectivity_state_set(exec_ctx, &p->state_tracker, GRPC_CHANNEL_IDLE,
GRPC_ERROR_NONE, "rr_idle");
return GRPC_CHANNEL_IDLE;
}
/* no change */
return sd->curr_connectivity_state;
}
static void rr_connectivity_changed(grpc_exec_ctx *exec_ctx, void *arg,
grpc_error *error) {
subchannel_data *sd = arg;
round_robin_lb_policy *p = sd->policy;
pending_pick *pp;
int unref = 0;
GRPC_ERROR_REF(error);
gpr_mu_lock(&p->mu);
if (p->shutdown) {
unref = 1;
} else {
switch (sd->connectivity_state) {
case GRPC_CHANNEL_READY:
grpc_connectivity_state_set(exec_ctx, &p->state_tracker,
GRPC_CHANNEL_READY, GRPC_ERROR_REF(error),
"connecting_ready");
/* add the newly connected subchannel to the list of connected ones.
* Note that it goes to the "end of the line". */
sd->ready_list_node = add_connected_sc_locked(p, sd);
/* at this point we know there's at least one suitable subchannel. Go
* ahead and pick one and notify the pending suitors in
* p->pending_picks. This preemtively replicates rr_pick()'s actions. */
ready_list *selected = peek_next_connected_locked(p);
GPR_ASSERT(selected != NULL);
if (p->pending_picks != NULL) {
/* if the selected subchannel is going to be used for the pending
* picks, update the last picked pointer */
advance_last_picked_locked(p);
gpr_mu_unlock(&p->mu);
GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &p->base, "rr_connectivity");
GRPC_ERROR_UNREF(error);
return;
}
switch (sd->curr_connectivity_state) {
case GRPC_CHANNEL_INIT:
GPR_UNREACHABLE_CODE();
case GRPC_CHANNEL_READY:
/* add the newly connected subchannel to the list of connected ones.
* Note that it goes to the "end of the line". */
sd->ready_list_node = add_connected_sc_locked(p, sd);
/* at this point we know there's at least one suitable subchannel. Go
* ahead and pick one and notify the pending suitors in
* p->pending_picks. This preemtively replicates rr_pick()'s actions. */
ready_list *selected = peek_next_connected_locked(p);
GPR_ASSERT(selected != NULL);
if (p->pending_picks != NULL) {
/* if the selected subchannel is going to be used for the pending
* picks, update the last picked pointer */
advance_last_picked_locked(p);
}
while ((pp = p->pending_picks)) {
p->pending_picks = pp->next;
*pp->target = GRPC_CONNECTED_SUBCHANNEL_REF(
grpc_subchannel_get_connected_subchannel(selected->subchannel),
"rr_picked");
if (pp->user_data != NULL) {
*pp->user_data = selected->user_data;
}
if (grpc_lb_round_robin_trace) {
gpr_log(GPR_DEBUG,
"[RR CONN CHANGED] TARGET <-- SUBCHANNEL %p (NODE %p)",
(void *)selected->subchannel, (void *)selected);
}
grpc_exec_ctx_sched(exec_ctx, pp->on_complete, GRPC_ERROR_NONE, NULL);
gpr_free(pp);
}
update_lb_connectivity_status(exec_ctx, sd, error);
sd->prev_connectivity_state = sd->curr_connectivity_state;
/* renew notification: reuses the "rr_connectivity" weak ref */
grpc_subchannel_notify_on_state_change(
exec_ctx, sd->subchannel, p->base.interested_parties,
&sd->curr_connectivity_state, &sd->connectivity_changed_closure);
break;
case GRPC_CHANNEL_IDLE:
++p->num_idle;
/* fallthrough */
case GRPC_CHANNEL_CONNECTING:
update_state_counters(sd);
update_lb_connectivity_status(exec_ctx, sd, error);
sd->prev_connectivity_state = sd->curr_connectivity_state;
/* renew notification: reuses the "rr_connectivity" weak ref */
grpc_subchannel_notify_on_state_change(
exec_ctx, sd->subchannel, p->base.interested_parties,
&sd->curr_connectivity_state, &sd->connectivity_changed_closure);
break;
case GRPC_CHANNEL_TRANSIENT_FAILURE:
++p->num_transient_failures;
/* remove from ready list if still present */
if (sd->ready_list_node != NULL) {
remove_disconnected_sc_locked(p, sd->ready_list_node);
sd->ready_list_node = NULL;
}
update_lb_connectivity_status(exec_ctx, sd, error);
sd->prev_connectivity_state = sd->curr_connectivity_state;
/* renew notification: reuses the "rr_connectivity" weak ref */
grpc_subchannel_notify_on_state_change(
exec_ctx, sd->subchannel, p->base.interested_parties,
&sd->curr_connectivity_state, &sd->connectivity_changed_closure);
break;
case GRPC_CHANNEL_SHUTDOWN:
update_state_counters(sd);
if (sd->ready_list_node != NULL) {
remove_disconnected_sc_locked(p, sd->ready_list_node);
sd->ready_list_node = NULL;
}
--p->num_subchannels;
GPR_SWAP(subchannel_data *, p->subchannels[sd->index],
p->subchannels[p->num_subchannels]);
GRPC_SUBCHANNEL_UNREF(exec_ctx, sd->subchannel, "rr_subchannel_shutdown");
p->subchannels[sd->index]->index = sd->index;
if (update_lb_connectivity_status(exec_ctx, sd, error) ==
GRPC_CHANNEL_SHUTDOWN) {
/* the policy is shutting down. Flush all the pending picks... */
while ((pp = p->pending_picks)) {
p->pending_picks = pp->next;
*pp->target = GRPC_CONNECTED_SUBCHANNEL_REF(
grpc_subchannel_get_connected_subchannel(selected->subchannel),
"picked");
if (pp->user_data != NULL) {
*pp->user_data = selected->user_data;
}
if (grpc_lb_round_robin_trace) {
gpr_log(GPR_DEBUG,
"[RR CONN CHANGED] TARGET <-- SUBCHANNEL %p (NODE %p)",
(void *)selected->subchannel, (void *)selected);
}
*pp->target = NULL;
grpc_exec_ctx_sched(exec_ctx, pp->on_complete, GRPC_ERROR_NONE, NULL);
gpr_free(pp);
}
grpc_subchannel_notify_on_state_change(
exec_ctx, sd->subchannel, p->base.interested_parties,
&sd->connectivity_state, &sd->connectivity_changed_closure);
break;
case GRPC_CHANNEL_CONNECTING:
case GRPC_CHANNEL_IDLE:
grpc_connectivity_state_set(
exec_ctx, &p->state_tracker, sd->connectivity_state,
GRPC_ERROR_REF(error), "connecting_changed");
grpc_subchannel_notify_on_state_change(
exec_ctx, sd->subchannel, p->base.interested_parties,
&sd->connectivity_state, &sd->connectivity_changed_closure);
break;
case GRPC_CHANNEL_TRANSIENT_FAILURE:
/* renew state notification */
grpc_subchannel_notify_on_state_change(
exec_ctx, sd->subchannel, p->base.interested_parties,
&sd->connectivity_state, &sd->connectivity_changed_closure);
/* remove from ready list if still present */
if (sd->ready_list_node != NULL) {
remove_disconnected_sc_locked(p, sd->ready_list_node);
sd->ready_list_node = NULL;
}
grpc_connectivity_state_set(
exec_ctx, &p->state_tracker, GRPC_CHANNEL_TRANSIENT_FAILURE,
GRPC_ERROR_REF(error), "connecting_transient_failure");
break;
case GRPC_CHANNEL_SHUTDOWN:
if (sd->ready_list_node != NULL) {
remove_disconnected_sc_locked(p, sd->ready_list_node);
sd->ready_list_node = NULL;
}
p->num_subchannels--;
GPR_SWAP(subchannel_data *, p->subchannels[sd->index],
p->subchannels[p->num_subchannels]);
GRPC_SUBCHANNEL_UNREF(exec_ctx, sd->subchannel, "round_robin");
p->subchannels[sd->index]->index = sd->index;
gpr_free(sd);
unref = 1;
if (p->num_subchannels == 0) {
grpc_connectivity_state_set(
exec_ctx, &p->state_tracker, GRPC_CHANNEL_SHUTDOWN,
GRPC_ERROR_CREATE_REFERENCING("Round Robin Channels Exhausted",
&error, 1),
"no_more_channels");
while ((pp = p->pending_picks)) {
p->pending_picks = pp->next;
*pp->target = NULL;
grpc_exec_ctx_sched(exec_ctx, pp->on_complete, GRPC_ERROR_NONE,
NULL);
gpr_free(pp);
}
} else {
grpc_connectivity_state_set(
exec_ctx, &p->state_tracker, GRPC_CHANNEL_TRANSIENT_FAILURE,
GRPC_ERROR_REF(error), "subchannel_failed");
}
} /* switch */
} /* !unref */
gpr_mu_unlock(&p->mu);
if (unref) {
GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &p->base, "round_robin_connectivity");
}
gpr_free(sd);
/* unref the "rr_connectivity" weak ref from start_picking */
GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &p->base, "rr_connectivity");
break;
}
gpr_mu_unlock(&p->mu);
GRPC_ERROR_UNREF(error);
}
@ -607,9 +679,9 @@ static void rr_ping_one(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
gpr_mu_unlock(&p->mu);
target = GRPC_CONNECTED_SUBCHANNEL_REF(
grpc_subchannel_get_connected_subchannel(selected->subchannel),
"picked");
"rr_picked");
grpc_connected_subchannel_ping(exec_ctx, target, closure);
GRPC_CONNECTED_SUBCHANNEL_UNREF(exec_ctx, target, "picked");
GRPC_CONNECTED_SUBCHANNEL_UNREF(exec_ctx, target, "rr_picked");
} else {
gpr_mu_unlock(&p->mu);
grpc_exec_ctx_sched(exec_ctx, closure,
@ -705,6 +777,11 @@ static grpc_lb_policy *round_robin_create(grpc_exec_ctx *exec_ctx,
grpc_lb_policy_init(&p->base, &round_robin_lb_policy_vtable);
grpc_connectivity_state_init(&p->state_tracker, GRPC_CHANNEL_IDLE,
"round_robin");
if (grpc_lb_round_robin_trace) {
gpr_log(GPR_DEBUG, "Created RR policy at %p with %lu subchannels",
(void *)p, (unsigned long)p->num_subchannels);
}
gpr_mu_init(&p->mu);
return &p->base;
}

@ -43,6 +43,8 @@ int grpc_connectivity_state_trace = 0;
const char *grpc_connectivity_state_name(grpc_connectivity_state state) {
switch (state) {
case GRPC_CHANNEL_INIT:
return "INIT";
case GRPC_CHANNEL_IDLE:
return "IDLE";
case GRPC_CHANNEL_CONNECTING:
@ -159,6 +161,7 @@ void grpc_connectivity_state_set(grpc_exec_ctx *exec_ctx,
grpc_error_free_string(error_string);
}
switch (state) {
case GRPC_CHANNEL_INIT:
case GRPC_CHANNEL_CONNECTING:
case GRPC_CHANNEL_IDLE:
case GRPC_CHANNEL_READY:

@ -62,8 +62,14 @@ typedef struct servers_fixture {
grpc_metadata_array *request_metadata_recv;
} servers_fixture;
typedef struct request_sequences {
size_t n;
int *connections;
int *connectivity_states;
} request_sequences;
typedef void (*verifier_fn)(const servers_fixture *, grpc_channel *,
const int *, const size_t);
const request_sequences *, const size_t);
typedef struct test_spec {
size_t num_iters;
@ -227,9 +233,24 @@ static void teardown_servers(servers_fixture *f) {
gpr_free(f);
}
static request_sequences request_sequences_create(size_t n) {
request_sequences res;
res.n = n;
res.connections = gpr_malloc(sizeof(*res.connections) * n);
res.connectivity_states = gpr_malloc(sizeof(*res.connectivity_states) * n);
return res;
}
static void request_sequences_destroy(const request_sequences *rseqs) {
gpr_free(rseqs->connections);
gpr_free(rseqs->connectivity_states);
}
/** Returns connection sequence (server indices), which must be freed */
static int *perform_request(servers_fixture *f, grpc_channel *client,
request_data *rdata, const test_spec *spec) {
static request_sequences perform_request(servers_fixture *f,
grpc_channel *client,
request_data *rdata,
const test_spec *spec) {
grpc_call *c;
int s_idx;
int *s_valid;
@ -239,11 +260,10 @@ static int *perform_request(servers_fixture *f, grpc_channel *client,
size_t i, iter_num;
grpc_event ev;
int read_tag;
int *connection_sequence;
int completed_client;
const request_sequences sequences = request_sequences_create(spec->num_iters);
s_valid = gpr_malloc(sizeof(int) * f->num_servers);
connection_sequence = gpr_malloc(sizeof(int) * spec->num_iters);
for (iter_num = 0; iter_num < spec->num_iters; iter_num++) {
cq_verifier *cqv = cq_verifier_create(f->cq);
@ -260,7 +280,7 @@ static int *perform_request(servers_fixture *f, grpc_channel *client,
}
}
connection_sequence[iter_num] = -1;
sequences.connections[iter_num] = -1;
grpc_metadata_array_init(&rdata->initial_metadata_recv);
grpc_metadata_array_init(&rdata->trailing_metadata_recv);
@ -305,12 +325,14 @@ static int *perform_request(servers_fixture *f, grpc_channel *client,
grpc_call_start_batch(c, ops, (size_t)(op - ops), tag(1), NULL));
s_idx = -1;
while (
(ev = grpc_completion_queue_next(
f->cq, GRPC_TIMEOUT_MILLIS_TO_DEADLINE(10 * RETRY_TIMEOUT), NULL))
.type != GRPC_QUEUE_TIMEOUT) {
while ((ev = grpc_completion_queue_next(
f->cq, GRPC_TIMEOUT_MILLIS_TO_DEADLINE(RETRY_TIMEOUT), NULL))
.type != GRPC_QUEUE_TIMEOUT) {
GPR_ASSERT(ev.type == GRPC_OP_COMPLETE);
read_tag = ((int)(intptr_t)ev.tag);
const grpc_connectivity_state conn_state =
grpc_channel_check_connectivity_state(client, 0);
sequences.connectivity_states[iter_num] = conn_state;
gpr_log(GPR_DEBUG, "EVENT: success:%d, type:%d, tag:%d iter:%" PRIuPTR,
ev.success, ev.type, read_tag, iter_num);
if (ev.success && read_tag >= 1000) {
@ -318,7 +340,7 @@ static int *perform_request(servers_fixture *f, grpc_channel *client,
/* only server notifications for non-shutdown events */
s_idx = read_tag - 1000;
s_valid[s_idx] = 1;
connection_sequence[iter_num] = s_idx;
sequences.connections[iter_num] = s_idx;
break;
} else if (read_tag == 1) {
gpr_log(GPR_DEBUG, "client timed out");
@ -381,10 +403,9 @@ static int *perform_request(servers_fixture *f, grpc_channel *client,
}
}
GPR_ASSERT(
grpc_completion_queue_next(
f->cq, GRPC_TIMEOUT_MILLIS_TO_DEADLINE(2 * RETRY_TIMEOUT), NULL)
.type == GRPC_QUEUE_TIMEOUT);
GPR_ASSERT(grpc_completion_queue_next(
f->cq, GRPC_TIMEOUT_MILLIS_TO_DEADLINE(RETRY_TIMEOUT), NULL)
.type == GRPC_QUEUE_TIMEOUT);
grpc_metadata_array_destroy(&rdata->initial_metadata_recv);
grpc_metadata_array_destroy(&rdata->trailing_metadata_recv);
@ -401,7 +422,7 @@ static int *perform_request(servers_fixture *f, grpc_channel *client,
gpr_free(s_valid);
return connection_sequence;
return sequences;
}
static grpc_call **perform_multirequest(servers_fixture *f,
@ -441,62 +462,10 @@ static grpc_call **perform_multirequest(servers_fixture *f,
return calls;
}
static void assert_channel_connectivity(grpc_channel *ch,
size_t num_accepted_conn_states,
int accepted_conn_state, ...) {
size_t i;
grpc_channel_stack *client_stack;
grpc_channel_element *client_channel_filter;
grpc_connectivity_state actual_conn_state;
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
va_list ap;
client_stack = grpc_channel_get_channel_stack(ch);
client_channel_filter = grpc_channel_stack_last_element(client_stack);
actual_conn_state = grpc_client_channel_check_connectivity_state(
&exec_ctx, client_channel_filter, 0 /* don't try to connect */);
grpc_exec_ctx_finish(&exec_ctx);
va_start(ap, accepted_conn_state);
for (i = 0; i < num_accepted_conn_states; i++) {
if ((int)actual_conn_state == accepted_conn_state) {
break;
}
accepted_conn_state = va_arg(ap, grpc_connectivity_state);
}
va_end(ap);
if (i == num_accepted_conn_states) {
char **accepted_strs =
gpr_malloc(sizeof(char *) * num_accepted_conn_states);
char *accepted_str_joined;
va_start(ap, accepted_conn_state);
for (i = 0; i < num_accepted_conn_states; i++) {
GPR_ASSERT(gpr_asprintf(&accepted_strs[i], "%d", accepted_conn_state) >
0);
accepted_conn_state = va_arg(ap, grpc_connectivity_state);
}
va_end(ap);
accepted_str_joined = gpr_strjoin_sep((const char **)accepted_strs,
num_accepted_conn_states, ", ", NULL);
gpr_log(
GPR_ERROR,
"Channel connectivity assertion failed: expected <one of [%s]>, got %d",
accepted_str_joined, actual_conn_state);
for (i = 0; i < num_accepted_conn_states; i++) {
gpr_free(accepted_strs[i]);
}
gpr_free(accepted_strs);
gpr_free(accepted_str_joined);
abort();
}
}
void run_spec(const test_spec *spec) {
grpc_channel *client;
char *client_hostport;
char *servers_hostports_str;
int *actual_connection_sequence;
request_data rdata;
servers_fixture *f;
grpc_channel_args args;
@ -524,14 +493,14 @@ void run_spec(const test_spec *spec) {
gpr_log(GPR_INFO, "Testing '%s' with servers=%s client=%s", spec->description,
servers_hostports_str, client_hostport);
actual_connection_sequence = perform_request(f, client, &rdata, spec);
const request_sequences sequences = perform_request(f, client, &rdata, spec);
spec->verifier(f, client, actual_connection_sequence, spec->num_iters);
spec->verifier(f, client, &sequences, spec->num_iters);
gpr_free(client_hostport);
gpr_free(servers_hostports_str);
gpr_free(actual_connection_sequence);
gpr_free(rdata.call_details);
request_sequences_destroy(&sequences);
grpc_channel_destroy(client); /* calls the LB's shutdown func */
teardown_servers(f);
@ -684,29 +653,43 @@ static void print_failed_expectations(const int *expected_connection_sequence,
static void verify_vanilla_round_robin(const servers_fixture *f,
grpc_channel *client,
const int *actual_connection_sequence,
const request_sequences *sequences,
const size_t num_iters) {
int *expected_connection_sequence;
size_t i;
const size_t expected_seq_length = f->num_servers;
/* verify conn. seq. expectation */
/* get the first sequence of "num_servers" elements */
expected_connection_sequence = gpr_malloc(sizeof(int) * expected_seq_length);
memcpy(expected_connection_sequence, actual_connection_sequence,
int *expected_connection_sequence =
gpr_malloc(sizeof(int) * expected_seq_length);
memcpy(expected_connection_sequence, sequences->connections,
sizeof(int) * expected_seq_length);
for (i = 0; i < num_iters; i++) {
const int actual = actual_connection_sequence[i];
for (size_t i = 0; i < num_iters; i++) {
const int actual = sequences->connections[i];
const int expected = expected_connection_sequence[i % expected_seq_length];
if (actual != expected) {
print_failed_expectations(expected_connection_sequence,
actual_connection_sequence, expected_seq_length,
num_iters);
gpr_log(
GPR_ERROR,
"CONNECTION SEQUENCE FAILURE: expected %d, got %d at iteration #%d",
expected, actual, (int)i);
abort();
}
}
/* All servers are available, therefore all client subchannels are READY, even
* when we only need one for the client channel state to be READY */
for (size_t i = 0; i < sequences->n; i++) {
const grpc_connectivity_state actual = sequences->connectivity_states[i];
const grpc_connectivity_state expected = GRPC_CHANNEL_READY;
if (actual != expected) {
gpr_log(GPR_ERROR,
"CONNECTIVITY STATUS SEQUENCE FAILURE: expected '%s', got '%s' "
"at iteration #%d",
grpc_connectivity_state_name(expected),
grpc_connectivity_state_name(actual), (int)i);
abort();
}
}
assert_channel_connectivity(client, 1, GRPC_CHANNEL_READY);
gpr_free(expected_connection_sequence);
}
@ -715,7 +698,7 @@ static void verify_vanilla_round_robin(const servers_fixture *f,
* given in "f") are killed */
static void verify_vanishing_floor_round_robin(
const servers_fixture *f, grpc_channel *client,
const int *actual_connection_sequence, const size_t num_iters) {
const request_sequences *sequences, const size_t num_iters) {
int *expected_connection_sequence;
const size_t expected_seq_length = 2;
size_t i;
@ -723,57 +706,83 @@ static void verify_vanishing_floor_round_robin(
/* verify conn. seq. expectation */
/* copy the first full sequence (without -1s) */
expected_connection_sequence = gpr_malloc(sizeof(int) * expected_seq_length);
memcpy(expected_connection_sequence, actual_connection_sequence + 2,
memcpy(expected_connection_sequence, sequences->connections + 2,
expected_seq_length * sizeof(int));
/* first two elements of the sequence should be [0 (1st server), -1 (failure)]
*/
GPR_ASSERT(actual_connection_sequence[0] == 0);
GPR_ASSERT(actual_connection_sequence[1] == -1);
GPR_ASSERT(sequences->connections[0] == 0);
GPR_ASSERT(sequences->connections[1] == -1);
/* the next two element must be [3, 0], repeating from that point: the 3 is
* brought forth by servers 1 and 2 disappearing after the intial pick of 0 */
GPR_ASSERT(actual_connection_sequence[2] == 3);
GPR_ASSERT(actual_connection_sequence[3] == 0);
GPR_ASSERT(sequences->connections[2] == 3);
GPR_ASSERT(sequences->connections[3] == 0);
/* make sure that the expectation obliges */
for (i = 2; i < num_iters; i++) {
const int actual = actual_connection_sequence[i];
const int actual = sequences->connections[i];
const int expected = expected_connection_sequence[i % expected_seq_length];
if (actual != expected) {
print_failed_expectations(expected_connection_sequence,
actual_connection_sequence, expected_seq_length,
sequences->connections, expected_seq_length,
num_iters);
abort();
}
}
/* There's always at least one subchannel READY (connected), therefore the
* overall state of the client channel is READY at all times. */
for (i = 0; i < sequences->n; i++) {
const grpc_connectivity_state actual = sequences->connectivity_states[i];
const grpc_connectivity_state expected = GRPC_CHANNEL_READY;
if (actual != expected) {
gpr_log(GPR_ERROR,
"CONNECTIVITY STATUS SEQUENCE FAILURE: expected '%s', got '%s' "
"at iteration #%d",
grpc_connectivity_state_name(expected),
grpc_connectivity_state_name(actual), (int)i);
abort();
}
}
gpr_free(expected_connection_sequence);
}
static void verify_total_carnage_round_robin(
const servers_fixture *f, grpc_channel *client,
const int *actual_connection_sequence, const size_t num_iters) {
size_t i;
for (i = 0; i < num_iters; i++) {
const int actual = actual_connection_sequence[i];
static void verify_total_carnage_round_robin(const servers_fixture *f,
grpc_channel *client,
const request_sequences *sequences,
const size_t num_iters) {
for (size_t i = 0; i < num_iters; i++) {
const int actual = sequences->connections[i];
const int expected = -1;
if (actual != expected) {
gpr_log(GPR_ERROR, "FAILURE: expected %d, actual %d at iter %" PRIuPTR,
expected, actual, i);
gpr_log(
GPR_ERROR,
"CONNECTION SEQUENCE FAILURE: expected %d, got %d at iteration #%d",
expected, actual, (int)i);
abort();
}
}
/* even though we know all the servers are dead, the client is still trying
* retrying, believing it's in a transient failure situation */
assert_channel_connectivity(client, 2, GRPC_CHANNEL_TRANSIENT_FAILURE,
GRPC_CHANNEL_CONNECTING);
/* no server is ever available. The persistent state is TRANSIENT_FAILURE */
for (size_t i = 0; i < sequences->n; i++) {
const grpc_connectivity_state actual = sequences->connectivity_states[i];
const grpc_connectivity_state expected = GRPC_CHANNEL_TRANSIENT_FAILURE;
if (actual != expected) {
gpr_log(GPR_ERROR,
"CONNECTIVITY STATUS SEQUENCE FAILURE: expected '%s', got '%s' "
"at iteration #%d",
grpc_connectivity_state_name(expected),
grpc_connectivity_state_name(actual), (int)i);
abort();
}
}
}
static void verify_partial_carnage_round_robin(
const servers_fixture *f, grpc_channel *client,
const int *actual_connection_sequence, const size_t num_iters) {
const request_sequences *sequences, const size_t num_iters) {
int *expected_connection_sequence;
size_t i;
const size_t expected_seq_length = f->num_servers;
@ -781,15 +790,15 @@ static void verify_partial_carnage_round_robin(
/* verify conn. seq. expectation */
/* get the first sequence of "num_servers" elements */
expected_connection_sequence = gpr_malloc(sizeof(int) * expected_seq_length);
memcpy(expected_connection_sequence, actual_connection_sequence,
memcpy(expected_connection_sequence, sequences->connections,
sizeof(int) * expected_seq_length);
for (i = 0; i < num_iters / 2; i++) {
const int actual = actual_connection_sequence[i];
const int actual = sequences->connections[i];
const int expected = expected_connection_sequence[i % expected_seq_length];
if (actual != expected) {
print_failed_expectations(expected_connection_sequence,
actual_connection_sequence, expected_seq_length,
sequences->connections, expected_seq_length,
num_iters);
abort();
}
@ -797,13 +806,34 @@ static void verify_partial_carnage_round_robin(
/* second half of the iterations go without response */
for (; i < num_iters; i++) {
GPR_ASSERT(actual_connection_sequence[i] == -1);
GPR_ASSERT(sequences->connections[i] == -1);
}
/* We can assert that the first client channel state should be READY, when all
* servers were available; and that the last one should be TRANSIENT_FAILURE,
* after all servers are gone. */
grpc_connectivity_state actual = sequences->connectivity_states[0];
grpc_connectivity_state expected = GRPC_CHANNEL_READY;
if (actual != expected) {
gpr_log(GPR_ERROR,
"CONNECTIVITY STATUS SEQUENCE FAILURE: expected '%s', got '%s' "
"at iteration #%d",
grpc_connectivity_state_name(expected),
grpc_connectivity_state_name(actual), 0);
abort();
}
actual = sequences->connectivity_states[num_iters - 1];
expected = GRPC_CHANNEL_TRANSIENT_FAILURE;
if (actual != expected) {
gpr_log(GPR_ERROR,
"CONNECTIVITY STATUS SEQUENCE FAILURE: expected '%s', got '%s' "
"at iteration #%d",
grpc_connectivity_state_name(expected),
grpc_connectivity_state_name(actual), (int)num_iters - 1);
abort();
}
/* even though we know all the servers are dead, the client is still trying
* retrying, believing it's in a transient failure situation */
assert_channel_connectivity(client, 2, GRPC_CHANNEL_TRANSIENT_FAILURE,
GRPC_CHANNEL_CONNECTING);
gpr_free(expected_connection_sequence);
}
@ -826,15 +856,14 @@ static void dump_array(const char *desc, const int *data, const size_t count) {
static void verify_rebirth_round_robin(const servers_fixture *f,
grpc_channel *client,
const int *actual_connection_sequence,
const request_sequences *sequences,
const size_t num_iters) {
int *expected_connection_sequence;
size_t i, j, unique_seq_last_idx, unique_seq_first_idx;
const size_t expected_seq_length = f->num_servers;
int *seen_elements;
dump_array("actual_connection_sequence", actual_connection_sequence,
num_iters);
dump_array("actual_connection_sequence", sequences->connections, num_iters);
/* verify conn. seq. expectation */
/* get the first unique run of length "num_servers". */
@ -845,13 +874,13 @@ static void verify_rebirth_round_robin(const servers_fixture *f,
memset(seen_elements, 0, sizeof(int) * expected_seq_length);
for (i = 0; i < num_iters; i++) {
if (actual_connection_sequence[i] < 0 ||
seen_elements[actual_connection_sequence[i]] != 0) {
if (sequences->connections[i] < 0 ||
seen_elements[sequences->connections[i]] != 0) {
/* if anything breaks the uniqueness of the run, back to square zero */
memset(seen_elements, 0, sizeof(int) * expected_seq_length);
continue;
}
seen_elements[actual_connection_sequence[i]] = 1;
seen_elements[sequences->connections[i]] = 1;
for (j = 0; j < expected_seq_length; j++) {
if (seen_elements[j] == 0) break;
}
@ -870,30 +899,72 @@ static void verify_rebirth_round_robin(const servers_fixture *f,
unique_seq_first_idx = (unique_seq_last_idx - expected_seq_length + 1);
memcpy(expected_connection_sequence,
actual_connection_sequence + unique_seq_first_idx,
sequences->connections + unique_seq_first_idx,
sizeof(int) * expected_seq_length);
/* first iteration succeeds */
GPR_ASSERT(actual_connection_sequence[0] != -1);
GPR_ASSERT(sequences->connections[0] != -1);
/* then we fail for a while... */
GPR_ASSERT(actual_connection_sequence[1] == -1);
GPR_ASSERT(sequences->connections[1] == -1);
/* ... but should be up at "unique_seq_first_idx" */
GPR_ASSERT(actual_connection_sequence[unique_seq_first_idx] != -1);
GPR_ASSERT(sequences->connections[unique_seq_first_idx] != -1);
for (j = 0, i = unique_seq_first_idx; i < num_iters; i++) {
const int actual = actual_connection_sequence[i];
const int actual = sequences->connections[i];
const int expected =
expected_connection_sequence[j++ % expected_seq_length];
if (actual != expected) {
print_failed_expectations(expected_connection_sequence,
actual_connection_sequence, expected_seq_length,
sequences->connections, expected_seq_length,
num_iters);
abort();
}
}
/* things are fine once the servers are brought back up */
assert_channel_connectivity(client, 1, GRPC_CHANNEL_READY);
/* We can assert that the first client channel state should be READY, when all
* servers were available; same thing for the last one. In the middle
* somewhere there must exist at least one TRANSIENT_FAILURE */
grpc_connectivity_state actual = sequences->connectivity_states[0];
grpc_connectivity_state expected = GRPC_CHANNEL_READY;
if (actual != expected) {
gpr_log(GPR_ERROR,
"CONNECTIVITY STATUS SEQUENCE FAILURE: expected '%s', got '%s' "
"at iteration #%d",
grpc_connectivity_state_name(expected),
grpc_connectivity_state_name(actual), 0);
abort();
}
actual = sequences->connectivity_states[num_iters - 1];
expected = GRPC_CHANNEL_READY;
if (actual != expected) {
gpr_log(GPR_ERROR,
"CONNECTIVITY STATUS SEQUENCE FAILURE: expected '%s', got '%s' "
"at iteration #%d",
grpc_connectivity_state_name(expected),
grpc_connectivity_state_name(actual), (int)num_iters - 1);
abort();
}
bool found_failure_status = false;
for (i = 1; i < sequences->n - 1; i++) {
if (sequences->connectivity_states[i] == GRPC_CHANNEL_TRANSIENT_FAILURE) {
found_failure_status = true;
break;
}
}
if (!found_failure_status) {
gpr_log(
GPR_ERROR,
"CONNECTIVITY STATUS SEQUENCE FAILURE: "
"GRPC_CHANNEL_TRANSIENT_FAILURE status not found. Got the following "
"instead:");
for (i = 0; i < num_iters; i++) {
gpr_log(GPR_ERROR, "[%d]: %s", (int)i,
grpc_connectivity_state_name(sequences->connectivity_states[i]));
}
}
gpr_free(expected_connection_sequence);
gpr_free(seen_elements);
}
@ -934,7 +1005,7 @@ int main(int argc, char **argv) {
* This should knock down the server bound to be selected next */
test_spec_reset(spec);
spec->verifier = verify_vanishing_floor_round_robin;
spec->description = "test_kill_all_server_at_2nd_iteration";
spec->description = "test_kill_middle_servers_at_2nd_iteration";
for (i = 1; i < NUM_SERVERS - 1; i++) {
spec->kill_at[1][i] = 1;
}

Loading…
Cancel
Save