From c546ee6ff78a5e742e6f73e4c64a53e50a63a047 Mon Sep 17 00:00:00 2001 From: nnorwitz Date: Thu, 22 Feb 2018 08:50:07 -0800 Subject: [PATCH] Use the total # of processors instead of # online. Use the total number of processors in the system since the number online can change from time to time. When running this on a Jetson TX-2 which can bring processors online and offline and running a program under valgrind, these errors are reported: ==4895== Invalid read of size 8 ==4895== at 0x13BA8BC: grpc_completion_queue_create_internal(grpc_cq_completion_type, grpc_cq_polling_type) (completion_queue.cc:440) ==4895== by 0x13B9B53: grpc_completion_queue_create (completion_queue_factory.cc:76) ==4895== by 0x1203993: CompletionQueue (completion_queue.h:240) ==4895== by 0x1203993: ServerCompletionQueue (completion_queue.h:378) ==4895== by 0x1203993: grpc::ServerBuilder::BuildAndStart() (server_builder.cc:258) ==4895== Address 0x269c9490 is 16 bytes after a block of size 29,952 alloc'd ==4895== at 0x7900C3C: calloc (vg_replace_malloc.c:711) ==4895== by 0x13E50DB: gpr_zalloc (alloc.cc:67) ==4895== by 0x13CB1D3: grpc_stats_init() (stats.cc:36) ==4895== by 0x1374C07: grpc_init (init.cc:127) ==4895== by 0x1203947: GrpcLibraryCodegen (grpc_library.h:45) ==4895== by 0x1203947: CompletionQueue (completion_queue.h:236) ==4895== by 0x1203947: ServerCompletionQueue (completion_queue.h:378) ==4895== by 0x1203947: grpc::ServerBuilder::BuildAndStart() (server_builder.cc:258) ==4895== ==4895== Invalid write of size 8 ==4895== at 0x13BA8C4: grpc_completion_queue_create_internal(grpc_cq_completion_type, grpc_cq_polling_type) (completion_queue.cc:440) ==4895== by 0x13B9B53: grpc_completion_queue_create (completion_queue_factory.cc:76) ==4895== by 0x1203993: CompletionQueue (completion_queue.h:240) ==4895== by 0x1203993: ServerCompletionQueue (completion_queue.h:378) ==4895== by 0x1203993: grpc::ServerBuilder::BuildAndStart() (server_builder.cc:258) ==4895== Address 0x269c9490 is 16 bytes after a block of size 29,952 alloc'd ==4895== at 0x7900C3C: calloc (vg_replace_malloc.c:711) ==4895== by 0x13E50DB: gpr_zalloc (alloc.cc:67) ==4895== by 0x13CB1D3: grpc_stats_init() (stats.cc:36) ==4895== by 0x1374C07: grpc_init (init.cc:127) ==4895== by 0x1203947: GrpcLibraryCodegen (grpc_library.h:45) ==4895== by 0x1203947: CompletionQueue (completion_queue.h:236) ==4895== by 0x1203947: ServerCompletionQueue (completion_queue.h:378) ==4895== by 0x1203947: grpc::ServerBuilder::BuildAndStart() (server_builder.cc:258) After this change, the errors are gone. We also had many crashes on startup that was likely caused by this problem. The Jetson has a user-level program that allows a user to changes CPUs to online or offline. So using the max # possible should reduce potentials for errors like these and at worse over-allocate just a few extra bytes. --- src/core/lib/gpr/cpu_linux.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/lib/gpr/cpu_linux.cc b/src/core/lib/gpr/cpu_linux.cc index 4782f9f7420..fda28916f82 100644 --- a/src/core/lib/gpr/cpu_linux.cc +++ b/src/core/lib/gpr/cpu_linux.cc @@ -45,7 +45,7 @@ static void init_num_cpus() { #endif /* This must be signed. sysconf returns -1 when the number cannot be determined */ - ncpus = static_cast(sysconf(_SC_NPROCESSORS_ONLN)); + ncpus = static_cast(sysconf(_SC_NPROCESSORS_CONF)); if (ncpus < 1) { gpr_log(GPR_ERROR, "Cannot determine number of CPUs: assuming 1"); ncpus = 1;