Fix a TSAN reported race

I think this was the frequent crash in uds_cancel_after_invoke.

The race happens because a channel is deleted concurrently with an address being resolved (and UDS gets the resolution fast enough for this to actually happen).

The fix is to guarantee no callbacks will be made after cancel has been called (which was the original guaranteee that got lost somewhere).
pull/603/head
Craig Tiller 10 years ago
parent fb89ee38d5
commit 43a2b176f1
  1. 29
      src/core/channel/client_setup.c
  2. 6
      src/core/channel/client_setup.h
  3. 5
      src/core/surface/channel_create.c
  4. 5
      src/core/surface/secure_channel_create.c

@ -49,8 +49,11 @@ struct grpc_client_setup {
grpc_alarm backoff_alarm; grpc_alarm backoff_alarm;
gpr_timespec current_backoff_interval; gpr_timespec current_backoff_interval;
int in_alarm; int in_alarm;
int in_cb;
int cancelled;
gpr_mu mu; gpr_mu mu;
gpr_cv cv;
grpc_client_setup_request *active_request; grpc_client_setup_request *active_request;
int refs; int refs;
}; };
@ -67,6 +70,7 @@ gpr_timespec grpc_client_setup_request_deadline(grpc_client_setup_request *r) {
static void destroy_setup(grpc_client_setup *s) { static void destroy_setup(grpc_client_setup *s) {
gpr_mu_destroy(&s->mu); gpr_mu_destroy(&s->mu);
gpr_cv_destroy(&s->cv);
s->done(s->user_data); s->done(s->user_data);
grpc_channel_args_destroy(s->args); grpc_channel_args_destroy(s->args);
gpr_free(s); gpr_free(s);
@ -111,6 +115,10 @@ static void setup_cancel(grpc_transport_setup *sp) {
int cancel_alarm = 0; int cancel_alarm = 0;
gpr_mu_lock(&s->mu); gpr_mu_lock(&s->mu);
s->cancelled = 1;
while (s->in_cb) {
gpr_cv_wait(&s->cv, &s->mu, gpr_inf_future);
}
GPR_ASSERT(s->refs > 0); GPR_ASSERT(s->refs > 0);
/* effectively cancels the current request (if any) */ /* effectively cancels the current request (if any) */
@ -129,6 +137,24 @@ static void setup_cancel(grpc_transport_setup *sp) {
} }
} }
int grpc_client_setup_cb_begin(grpc_client_setup_request *r) {
gpr_mu_lock(&r->setup->mu);
if (r->setup->cancelled) {
gpr_mu_unlock(&r->setup->mu);
return 0;
}
r->setup->in_cb++;
gpr_mu_unlock(&r->setup->mu);
return 1;
}
void grpc_client_setup_cb_end(grpc_client_setup_request *r) {
gpr_mu_lock(&r->setup->mu);
r->setup->in_cb--;
if (r->setup->cancelled) gpr_cv_signal(&r->setup->cv);
gpr_mu_unlock(&r->setup->mu);
}
/* vtable for transport setup */ /* vtable for transport setup */
static const grpc_transport_setup_vtable setup_vtable = {setup_initiate, static const grpc_transport_setup_vtable setup_vtable = {setup_initiate,
setup_cancel}; setup_cancel};
@ -142,6 +168,7 @@ void grpc_client_setup_create_and_attach(
s->base.vtable = &setup_vtable; s->base.vtable = &setup_vtable;
gpr_mu_init(&s->mu); gpr_mu_init(&s->mu);
gpr_cv_init(&s->cv);
s->refs = 1; s->refs = 1;
s->mdctx = mdctx; s->mdctx = mdctx;
s->initiate = initiate; s->initiate = initiate;
@ -151,6 +178,8 @@ void grpc_client_setup_create_and_attach(
s->args = grpc_channel_args_copy(args); s->args = grpc_channel_args_copy(args);
s->current_backoff_interval = gpr_time_from_micros(1000000); s->current_backoff_interval = gpr_time_from_micros(1000000);
s->in_alarm = 0; s->in_alarm = 0;
s->in_cb = 0;
s->cancelled = 0;
grpc_client_channel_set_transport_setup(newly_minted_channel, &s->base); grpc_client_channel_set_transport_setup(newly_minted_channel, &s->base);
} }

@ -58,6 +58,12 @@ void grpc_client_setup_request_finish(grpc_client_setup_request *r,
const grpc_channel_args *grpc_client_setup_get_channel_args( const grpc_channel_args *grpc_client_setup_get_channel_args(
grpc_client_setup_request *r); grpc_client_setup_request *r);
/* Call before calling back into the setup listener, and call only if
this function returns 1. If it returns 1, also promise to call
grpc_client_setup_cb_end */
int grpc_client_setup_cb_begin(grpc_client_setup_request *r);
void grpc_client_setup_cb_end(grpc_client_setup_request *r);
/* Get the deadline for a request passed in to initiate. Implementations should /* Get the deadline for a request passed in to initiate. Implementations should
make a best effort to honor this deadline. */ make a best effort to honor this deadline. */
gpr_timespec grpc_client_setup_request_deadline(grpc_client_setup_request *r); gpr_timespec grpc_client_setup_request_deadline(grpc_client_setup_request *r);

@ -107,13 +107,16 @@ static void on_connect(void *rp, grpc_endpoint *tcp) {
} else { } else {
return; return;
} }
} else { } else if (grpc_client_setup_cb_begin(r->cs_request)) {
grpc_create_chttp2_transport( grpc_create_chttp2_transport(
r->setup->setup_callback, r->setup->setup_user_data, r->setup->setup_callback, r->setup->setup_user_data,
grpc_client_setup_get_channel_args(r->cs_request), tcp, NULL, 0, grpc_client_setup_get_channel_args(r->cs_request), tcp, NULL, 0,
grpc_client_setup_get_mdctx(r->cs_request), 1); grpc_client_setup_get_mdctx(r->cs_request), 1);
grpc_client_setup_cb_end(r->cs_request);
done(r, 1); done(r, 1);
return; return;
} else {
done(r, 0);
} }
} }

@ -97,12 +97,15 @@ static void on_secure_transport_setup_done(void *rp,
if (status != GRPC_SECURITY_OK) { if (status != GRPC_SECURITY_OK) {
gpr_log(GPR_ERROR, "Secure transport setup failed with error %d.", status); gpr_log(GPR_ERROR, "Secure transport setup failed with error %d.", status);
done(r, 0); done(r, 0);
} else { } else if (grpc_client_setup_cb_begin(r->cs_request)) {
grpc_create_chttp2_transport( grpc_create_chttp2_transport(
r->setup->setup_callback, r->setup->setup_user_data, r->setup->setup_callback, r->setup->setup_user_data,
grpc_client_setup_get_channel_args(r->cs_request), secure_endpoint, grpc_client_setup_get_channel_args(r->cs_request), secure_endpoint,
NULL, 0, grpc_client_setup_get_mdctx(r->cs_request), 1); NULL, 0, grpc_client_setup_get_mdctx(r->cs_request), 1);
grpc_client_setup_cb_end(r->cs_request);
done(r, 1); done(r, 1);
} else {
done(r, 0);
} }
} }

Loading…
Cancel
Save