diff --git a/BUILD b/BUILD index e04d0df1a23..241f7c220d5 100644 --- a/BUILD +++ b/BUILD @@ -1230,9 +1230,6 @@ grpc_cc_library( grpc_cc_library( name = "grpc_lb_subchannel_list", - srcs = [ - "src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc", - ], hdrs = [ "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h", ], @@ -1285,6 +1282,20 @@ grpc_cc_library( ], ) +grpc_cc_library( + name = "lb_load_data_store", + srcs = [ + "src/cpp/server/load_reporter/load_data_store.cc", + ], + hdrs = [ + "src/cpp/server/load_reporter/load_data_store.h", + ], + language = "c++", + deps = [ + "grpc++", + ], +) + grpc_cc_library( name = "grpc_resolver_dns_native", srcs = [ diff --git a/CMakeLists.txt b/CMakeLists.txt index 3e0a93aff6b..5e64d2efbc4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -584,6 +584,7 @@ endif() if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) add_dependencies(buildtests_cxx json_run_localhost) endif() +add_dependencies(buildtests_cxx lb_load_data_store_test) add_dependencies(buildtests_cxx memory_test) add_dependencies(buildtests_cxx metrics_client) add_dependencies(buildtests_cxx mock_test) @@ -1193,7 +1194,6 @@ add_library(grpc src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc - src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.cc @@ -2499,7 +2499,6 @@ add_library(grpc_unsecure third_party/nanopb/pb_decode.c third_party/nanopb/pb_encode.c src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc - src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc src/core/ext/census/grpc_context.cc src/core/ext/filters/max_age/max_age_filter.cc @@ -4972,6 +4971,49 @@ target_link_libraries(interop_server_main ) +endif (gRPC_BUILD_TESTS) +if (gRPC_BUILD_TESTS) + +add_library(lb_load_data_store + src/cpp/server/load_reporter/load_data_store.cc +) + +if(WIN32 AND MSVC) + set_target_properties(lb_load_data_store PROPERTIES COMPILE_PDB_NAME "lb_load_data_store" + COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" + ) + if (gRPC_INSTALL) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/lb_load_data_store.pdb + DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL + ) + endif() +endif() + + +target_include_directories(lb_load_data_store + PUBLIC $ $ + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} + PRIVATE ${_gRPC_SSL_INCLUDE_DIR} + PRIVATE ${_gRPC_PROTOBUF_INCLUDE_DIR} + PRIVATE ${_gRPC_ZLIB_INCLUDE_DIR} + PRIVATE ${_gRPC_BENCHMARK_INCLUDE_DIR} + PRIVATE ${_gRPC_CARES_INCLUDE_DIR} + PRIVATE ${_gRPC_GFLAGS_INCLUDE_DIR} + PRIVATE ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR} + PRIVATE third_party/googletest/googletest/include + PRIVATE third_party/googletest/googletest + PRIVATE third_party/googletest/googlemock/include + PRIVATE third_party/googletest/googlemock + PRIVATE ${_gRPC_PROTO_GENS_DIR} +) + +target_link_libraries(lb_load_data_store + ${_gRPC_PROTOBUF_LIBRARIES} + ${_gRPC_ALLTARGETS_LIBRARIES} + grpc++ +) + + endif (gRPC_BUILD_TESTS) if (gRPC_BUILD_TESTS) @@ -12271,6 +12313,46 @@ endif() endif (gRPC_BUILD_TESTS) if (gRPC_BUILD_TESTS) +add_executable(lb_load_data_store_test + test/cpp/server/load_reporter/load_data_store_test.cc + third_party/googletest/googletest/src/gtest-all.cc + third_party/googletest/googlemock/src/gmock-all.cc +) + + +target_include_directories(lb_load_data_store_test + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include + PRIVATE ${_gRPC_SSL_INCLUDE_DIR} + PRIVATE ${_gRPC_PROTOBUF_INCLUDE_DIR} + PRIVATE ${_gRPC_ZLIB_INCLUDE_DIR} + PRIVATE ${_gRPC_BENCHMARK_INCLUDE_DIR} + PRIVATE ${_gRPC_CARES_INCLUDE_DIR} + PRIVATE ${_gRPC_GFLAGS_INCLUDE_DIR} + PRIVATE ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR} + PRIVATE third_party/googletest/googletest/include + PRIVATE third_party/googletest/googletest + PRIVATE third_party/googletest/googlemock/include + PRIVATE third_party/googletest/googlemock + PRIVATE ${_gRPC_PROTO_GENS_DIR} +) + +target_link_libraries(lb_load_data_store_test + ${_gRPC_PROTOBUF_LIBRARIES} + ${_gRPC_ALLTARGETS_LIBRARIES} + lb_load_data_store + grpc++_test_util + grpc_test_util + grpc++ + grpc + gpr_test_util + gpr + ${_gRPC_GFLAGS_LIBRARIES} +) + +endif (gRPC_BUILD_TESTS) +if (gRPC_BUILD_TESTS) + add_executable(memory_test test/core/gprpp/memory_test.cc third_party/googletest/googletest/src/gtest-all.cc diff --git a/Makefile b/Makefile index c6468263398..76fa95b4191 100644 --- a/Makefile +++ b/Makefile @@ -1178,6 +1178,7 @@ interop_client: $(BINDIR)/$(CONFIG)/interop_client interop_server: $(BINDIR)/$(CONFIG)/interop_server interop_test: $(BINDIR)/$(CONFIG)/interop_test json_run_localhost: $(BINDIR)/$(CONFIG)/json_run_localhost +lb_load_data_store_test: $(BINDIR)/$(CONFIG)/lb_load_data_store_test memory_test: $(BINDIR)/$(CONFIG)/memory_test metrics_client: $(BINDIR)/$(CONFIG)/metrics_client mock_test: $(BINDIR)/$(CONFIG)/mock_test @@ -1390,9 +1391,9 @@ pc_cxx: $(LIBDIR)/$(CONFIG)/pkgconfig/grpc++.pc pc_cxx_unsecure: $(LIBDIR)/$(CONFIG)/pkgconfig/grpc++_unsecure.pc ifeq ($(EMBED_OPENSSL),true) -privatelibs_cxx: $(LIBDIR)/$(CONFIG)/libgrpc++_core_stats.a $(LIBDIR)/$(CONFIG)/libgrpc++_proto_reflection_desc_db.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_config.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util_unsecure.a $(LIBDIR)/$(CONFIG)/libgrpc_cli_libs.a $(LIBDIR)/$(CONFIG)/libhttp2_client_main.a $(LIBDIR)/$(CONFIG)/libinterop_client_helper.a $(LIBDIR)/$(CONFIG)/libinterop_client_main.a $(LIBDIR)/$(CONFIG)/libinterop_server_helper.a $(LIBDIR)/$(CONFIG)/libinterop_server_lib.a $(LIBDIR)/$(CONFIG)/libinterop_server_main.a $(LIBDIR)/$(CONFIG)/libqps.a $(LIBDIR)/$(CONFIG)/libboringssl_test_util.a $(LIBDIR)/$(CONFIG)/libboringssl_crypto_test_data_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_asn1_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_base64_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_bio_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_buf_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_bytestring_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_chacha_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_aead_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_cipher_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_cmac_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_compiler_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_constant_time_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ed25519_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_spake25519_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_x25519_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_dh_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_digest_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_dsa_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ecdh_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_err_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_evp_extra_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_evp_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pbkdf_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_scrypt_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_aes_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_bn_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ec_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_p256-x86_64_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ecdsa_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_gcm_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ctrdrbg_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_hkdf_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_hmac_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_lhash_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_obj_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pkcs7_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pkcs12_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pkcs8_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_poly1305_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pool_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_refcount_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_rsa_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_file_test_gtest_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_gtest_main_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_thread_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_x509_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_tab_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_v3name_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_span_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ssl_test_lib.a $(LIBDIR)/$(CONFIG)/libbenchmark.a +privatelibs_cxx: $(LIBDIR)/$(CONFIG)/libgrpc++_core_stats.a $(LIBDIR)/$(CONFIG)/libgrpc++_proto_reflection_desc_db.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_config.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util_unsecure.a $(LIBDIR)/$(CONFIG)/libgrpc_cli_libs.a $(LIBDIR)/$(CONFIG)/libhttp2_client_main.a $(LIBDIR)/$(CONFIG)/libinterop_client_helper.a $(LIBDIR)/$(CONFIG)/libinterop_client_main.a $(LIBDIR)/$(CONFIG)/libinterop_server_helper.a $(LIBDIR)/$(CONFIG)/libinterop_server_lib.a $(LIBDIR)/$(CONFIG)/libinterop_server_main.a $(LIBDIR)/$(CONFIG)/liblb_load_data_store.a $(LIBDIR)/$(CONFIG)/libqps.a $(LIBDIR)/$(CONFIG)/libboringssl_test_util.a $(LIBDIR)/$(CONFIG)/libboringssl_crypto_test_data_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_asn1_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_base64_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_bio_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_buf_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_bytestring_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_chacha_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_aead_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_cipher_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_cmac_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_compiler_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_constant_time_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ed25519_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_spake25519_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_x25519_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_dh_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_digest_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_dsa_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ecdh_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_err_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_evp_extra_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_evp_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pbkdf_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_scrypt_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_aes_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_bn_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ec_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_p256-x86_64_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ecdsa_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_gcm_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ctrdrbg_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_hkdf_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_hmac_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_lhash_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_obj_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pkcs7_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pkcs12_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pkcs8_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_poly1305_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_pool_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_refcount_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_rsa_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_file_test_gtest_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_gtest_main_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_thread_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_x509_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_tab_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_v3name_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_span_test_lib.a $(LIBDIR)/$(CONFIG)/libboringssl_ssl_test_lib.a $(LIBDIR)/$(CONFIG)/libbenchmark.a else -privatelibs_cxx: $(LIBDIR)/$(CONFIG)/libgrpc++_core_stats.a $(LIBDIR)/$(CONFIG)/libgrpc++_proto_reflection_desc_db.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_config.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util_unsecure.a $(LIBDIR)/$(CONFIG)/libgrpc_cli_libs.a $(LIBDIR)/$(CONFIG)/libhttp2_client_main.a $(LIBDIR)/$(CONFIG)/libinterop_client_helper.a $(LIBDIR)/$(CONFIG)/libinterop_client_main.a $(LIBDIR)/$(CONFIG)/libinterop_server_helper.a $(LIBDIR)/$(CONFIG)/libinterop_server_lib.a $(LIBDIR)/$(CONFIG)/libinterop_server_main.a $(LIBDIR)/$(CONFIG)/libqps.a $(LIBDIR)/$(CONFIG)/libbenchmark.a +privatelibs_cxx: $(LIBDIR)/$(CONFIG)/libgrpc++_core_stats.a $(LIBDIR)/$(CONFIG)/libgrpc++_proto_reflection_desc_db.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_config.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util_unsecure.a $(LIBDIR)/$(CONFIG)/libgrpc_cli_libs.a $(LIBDIR)/$(CONFIG)/libhttp2_client_main.a $(LIBDIR)/$(CONFIG)/libinterop_client_helper.a $(LIBDIR)/$(CONFIG)/libinterop_client_main.a $(LIBDIR)/$(CONFIG)/libinterop_server_helper.a $(LIBDIR)/$(CONFIG)/libinterop_server_lib.a $(LIBDIR)/$(CONFIG)/libinterop_server_main.a $(LIBDIR)/$(CONFIG)/liblb_load_data_store.a $(LIBDIR)/$(CONFIG)/libqps.a $(LIBDIR)/$(CONFIG)/libbenchmark.a endif @@ -1660,6 +1661,7 @@ buildtests_cxx: privatelibs_cxx \ $(BINDIR)/$(CONFIG)/interop_server \ $(BINDIR)/$(CONFIG)/interop_test \ $(BINDIR)/$(CONFIG)/json_run_localhost \ + $(BINDIR)/$(CONFIG)/lb_load_data_store_test \ $(BINDIR)/$(CONFIG)/memory_test \ $(BINDIR)/$(CONFIG)/metrics_client \ $(BINDIR)/$(CONFIG)/mock_test \ @@ -1831,6 +1833,7 @@ buildtests_cxx: privatelibs_cxx \ $(BINDIR)/$(CONFIG)/interop_server \ $(BINDIR)/$(CONFIG)/interop_test \ $(BINDIR)/$(CONFIG)/json_run_localhost \ + $(BINDIR)/$(CONFIG)/lb_load_data_store_test \ $(BINDIR)/$(CONFIG)/memory_test \ $(BINDIR)/$(CONFIG)/metrics_client \ $(BINDIR)/$(CONFIG)/mock_test \ @@ -2283,6 +2286,8 @@ test_cxx: buildtests_cxx $(Q) $(BINDIR)/$(CONFIG)/inproc_sync_unary_ping_pong_test || ( echo test inproc_sync_unary_ping_pong_test failed ; exit 1 ) $(E) "[RUN] Testing interop_test" $(Q) $(BINDIR)/$(CONFIG)/interop_test || ( echo test interop_test failed ; exit 1 ) + $(E) "[RUN] Testing lb_load_data_store_test" + $(Q) $(BINDIR)/$(CONFIG)/lb_load_data_store_test || ( echo test lb_load_data_store_test failed ; exit 1 ) $(E) "[RUN] Testing memory_test" $(Q) $(BINDIR)/$(CONFIG)/memory_test || ( echo test memory_test failed ; exit 1 ) $(E) "[RUN] Testing mock_test" @@ -3575,7 +3580,6 @@ LIBGRPC_SRC = \ src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c \ src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc \ src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc \ - src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc \ src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc \ src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc \ src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.cc \ @@ -4851,7 +4855,6 @@ LIBGRPC_UNSECURE_SRC = \ third_party/nanopb/pb_decode.c \ third_party/nanopb/pb_encode.c \ src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc \ - src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc \ src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc \ src/core/ext/census/grpc_context.cc \ src/core/ext/filters/max_age/max_age_filter.cc \ @@ -7207,6 +7210,55 @@ endif endif +LIBLB_LOAD_DATA_STORE_SRC = \ + src/cpp/server/load_reporter/load_data_store.cc \ + +PUBLIC_HEADERS_CXX += \ + +LIBLB_LOAD_DATA_STORE_OBJS = $(addprefix $(OBJDIR)/$(CONFIG)/, $(addsuffix .o, $(basename $(LIBLB_LOAD_DATA_STORE_SRC)))) + + +ifeq ($(NO_SECURE),true) + +# You can't build secure libraries if you don't have OpenSSL. + +$(LIBDIR)/$(CONFIG)/liblb_load_data_store.a: openssl_dep_error + + +else + +ifeq ($(NO_PROTOBUF),true) + +# You can't build a C++ library if you don't have protobuf - a bit overreached, but still okay. + +$(LIBDIR)/$(CONFIG)/liblb_load_data_store.a: protobuf_dep_error + + +else + +$(LIBDIR)/$(CONFIG)/liblb_load_data_store.a: $(ZLIB_DEP) $(OPENSSL_DEP) $(CARES_DEP) $(ADDRESS_SORTING_DEP) $(PROTOBUF_DEP) $(LIBLB_LOAD_DATA_STORE_OBJS) + $(E) "[AR] Creating $@" + $(Q) mkdir -p `dirname $@` + $(Q) rm -f $(LIBDIR)/$(CONFIG)/liblb_load_data_store.a + $(Q) $(AR) $(AROPTS) $(LIBDIR)/$(CONFIG)/liblb_load_data_store.a $(LIBLB_LOAD_DATA_STORE_OBJS) +ifeq ($(SYSTEM),Darwin) + $(Q) ranlib -no_warning_for_no_symbols $(LIBDIR)/$(CONFIG)/liblb_load_data_store.a +endif + + + + +endif + +endif + +ifneq ($(NO_SECURE),true) +ifneq ($(NO_DEPS),true) +-include $(LIBLB_LOAD_DATA_STORE_OBJS:.o=.dep) +endif +endif + + LIBQPS_SRC = \ $(GENDIR)/src/proto/grpc/testing/messages.pb.cc $(GENDIR)/src/proto/grpc/testing/messages.grpc.pb.cc \ $(GENDIR)/src/proto/grpc/testing/payloads.pb.cc $(GENDIR)/src/proto/grpc/testing/payloads.grpc.pb.cc \ @@ -18027,6 +18079,49 @@ endif endif +LB_LOAD_DATA_STORE_TEST_SRC = \ + test/cpp/server/load_reporter/load_data_store_test.cc \ + +LB_LOAD_DATA_STORE_TEST_OBJS = $(addprefix $(OBJDIR)/$(CONFIG)/, $(addsuffix .o, $(basename $(LB_LOAD_DATA_STORE_TEST_SRC)))) +ifeq ($(NO_SECURE),true) + +# You can't build secure targets if you don't have OpenSSL. + +$(BINDIR)/$(CONFIG)/lb_load_data_store_test: openssl_dep_error + +else + + + + +ifeq ($(NO_PROTOBUF),true) + +# You can't build the protoc plugins or protobuf-enabled targets if you don't have protobuf 3.5.0+. + +$(BINDIR)/$(CONFIG)/lb_load_data_store_test: protobuf_dep_error + +else + +$(BINDIR)/$(CONFIG)/lb_load_data_store_test: $(PROTOBUF_DEP) $(LB_LOAD_DATA_STORE_TEST_OBJS) $(LIBDIR)/$(CONFIG)/liblb_load_data_store.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc++.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a + $(E) "[LD] Linking $@" + $(Q) mkdir -p `dirname $@` + $(Q) $(LDXX) $(LDFLAGS) $(LB_LOAD_DATA_STORE_TEST_OBJS) $(LIBDIR)/$(CONFIG)/liblb_load_data_store.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc++.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a $(LDLIBSXX) $(LDLIBS_PROTOBUF) $(LDLIBS) $(LDLIBS_SECURE) $(GTEST_LIB) -o $(BINDIR)/$(CONFIG)/lb_load_data_store_test + +endif + +endif + +$(OBJDIR)/$(CONFIG)/test/cpp/server/load_reporter/load_data_store_test.o: $(LIBDIR)/$(CONFIG)/liblb_load_data_store.a $(LIBDIR)/$(CONFIG)/libgrpc++_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc++.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a + +deps_lb_load_data_store_test: $(LB_LOAD_DATA_STORE_TEST_OBJS:.o=.dep) + +ifneq ($(NO_SECURE),true) +ifneq ($(NO_DEPS),true) +-include $(LB_LOAD_DATA_STORE_TEST_OBJS:.o=.dep) +endif +endif + + MEMORY_TEST_SRC = \ test/core/gprpp/memory_test.cc \ @@ -23895,6 +23990,7 @@ src/cpp/common/secure_channel_arguments.cc: $(OPENSSL_DEP) src/cpp/common/secure_create_auth_context.cc: $(OPENSSL_DEP) src/cpp/ext/proto_server_reflection.cc: $(OPENSSL_DEP) src/cpp/ext/proto_server_reflection_plugin.cc: $(OPENSSL_DEP) +src/cpp/server/load_reporter/load_data_store.cc: $(OPENSSL_DEP) src/cpp/server/secure_server_credentials.cc: $(OPENSSL_DEP) src/cpp/util/core_stats.cc: $(OPENSSL_DEP) src/cpp/util/error_details.cc: $(OPENSSL_DEP) diff --git a/build.yaml b/build.yaml index 374d5a03247..c63ca5de5ba 100644 --- a/build.yaml +++ b/build.yaml @@ -685,8 +685,6 @@ filegroups: - name: grpc_lb_subchannel_list headers: - src/core/ext/filters/client_channel/lb_policy/subchannel_list.h - src: - - src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc uses: - grpc_base - grpc_client_channel @@ -1890,6 +1888,15 @@ libs: - test/cpp/interop/interop_server_bootstrap.cc deps: - interop_server_lib +- name: lb_load_data_store + build: private + language: c++ + headers: + - src/cpp/server/load_reporter/load_data_store.h + src: + - src/cpp/server/load_reporter/load_data_store.cc + deps: + - grpc++ - name: qps build: private language: c++ @@ -4766,6 +4773,20 @@ targets: - mac - linux - posix +- name: lb_load_data_store_test + gtest: true + build: test + language: c++ + src: + - test/cpp/server/load_reporter/load_data_store_test.cc + deps: + - lb_load_data_store + - grpc++_test_util + - grpc_test_util + - grpc++ + - grpc + - gpr_test_util + - gpr - name: memory_test gtest: true build: test diff --git a/config.m4 b/config.m4 index 0dc4825191b..df06259606a 100644 --- a/config.m4 +++ b/config.m4 @@ -369,7 +369,6 @@ if test "$PHP_GRPC" != "no"; then src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c \ src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc \ src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc \ - src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc \ src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc \ src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc \ src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.cc \ @@ -650,7 +649,6 @@ if test "$PHP_GRPC" != "no"; then PHP_ADD_BUILD_DIR($ext_builddir/src/boringssl) PHP_ADD_BUILD_DIR($ext_builddir/src/core/ext/census) PHP_ADD_BUILD_DIR($ext_builddir/src/core/ext/filters/client_channel) - PHP_ADD_BUILD_DIR($ext_builddir/src/core/ext/filters/client_channel/lb_policy) PHP_ADD_BUILD_DIR($ext_builddir/src/core/ext/filters/client_channel/lb_policy/grpclb) PHP_ADD_BUILD_DIR($ext_builddir/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1) PHP_ADD_BUILD_DIR($ext_builddir/src/core/ext/filters/client_channel/lb_policy/pick_first) diff --git a/config.w32 b/config.w32 index 1a09accd8a1..f60a5b746d7 100644 --- a/config.w32 +++ b/config.w32 @@ -345,7 +345,6 @@ if (PHP_GRPC != "no") { "src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\proto\\grpc\\lb\\v1\\load_balancer.pb.c " + "src\\core\\ext\\filters\\client_channel\\resolver\\fake\\fake_resolver.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\pick_first\\pick_first.cc " + - "src\\core\\ext\\filters\\client_channel\\lb_policy\\subchannel_list.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\round_robin\\round_robin.cc " + "src\\core\\ext\\filters\\client_channel\\resolver\\dns\\c_ares\\dns_resolver_ares.cc " + "src\\core\\ext\\filters\\client_channel\\resolver\\dns\\c_ares\\grpc_ares_ev_driver_posix.cc " + diff --git a/gRPC-Core.podspec b/gRPC-Core.podspec index fc6b31fd23c..a1e0fd2ca1a 100644 --- a/gRPC-Core.podspec +++ b/gRPC-Core.podspec @@ -784,7 +784,6 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c', 'src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc', - 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc', 'src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc', 'src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc', 'src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.cc', diff --git a/grpc.gemspec b/grpc.gemspec index 4e309f15a98..16810335292 100644 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -722,7 +722,6 @@ Gem::Specification.new do |s| s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c ) s.files += %w( src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc ) - s.files += %w( src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc ) s.files += %w( src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc ) s.files += %w( src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.cc ) diff --git a/grpc.gyp b/grpc.gyp index b36b166175e..0c567d9da41 100644 --- a/grpc.gyp +++ b/grpc.gyp @@ -529,7 +529,6 @@ 'src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c', 'src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc', - 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc', 'src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc', 'src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc', 'src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.cc', @@ -1258,7 +1257,6 @@ 'third_party/nanopb/pb_decode.c', 'third_party/nanopb/pb_encode.c', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc', - 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc', 'src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc', 'src/core/ext/census/grpc_context.cc', 'src/core/ext/filters/max_age/max_age_filter.cc', @@ -1637,6 +1635,16 @@ 'test/cpp/interop/interop_server_bootstrap.cc', ], }, + { + 'target_name': 'lb_load_data_store', + 'type': 'static_library', + 'dependencies': [ + 'grpc++', + ], + 'sources': [ + 'src/cpp/server/load_reporter/load_data_store.cc', + ], + }, { 'target_name': 'qps', 'type': 'static_library', diff --git a/package.xml b/package.xml index 0777ee00505..b9cb22b647c 100644 --- a/package.xml +++ b/package.xml @@ -729,7 +729,6 @@ - diff --git a/src/compiler/objective_c_generator.cc b/src/compiler/objective_c_generator.cc index 39f68cb9565..7d4d4d1f5a4 100644 --- a/src/compiler/objective_c_generator.cc +++ b/src/compiler/objective_c_generator.cc @@ -248,7 +248,7 @@ void PrintMethodImplementations(Printer* printer, " */\n"); printer.Print(vars, "@interface $service_class$ :" - " GRPCProtoService<$service_class$>\n"); + " GRPCProtoService<$service_class$, GRPCProtoServiceInit>\n"); printer.Print( "- (instancetype)initWithHost:(NSString *)host" " NS_DESIGNATED_INITIALIZER;\n"); diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index a07e90322c5..76df9766983 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -62,31 +62,65 @@ class PickFirst : public LoadBalancingPolicy { private: ~PickFirst(); + class PickFirstSubchannelList; + + class PickFirstSubchannelData + : public SubchannelData { + public: + PickFirstSubchannelData(PickFirstSubchannelList* subchannel_list, + const grpc_lb_user_data_vtable* user_data_vtable, + const grpc_lb_address& address, + grpc_subchannel* subchannel, + grpc_combiner* combiner) + : SubchannelData(subchannel_list, user_data_vtable, address, subchannel, + combiner) {} + + void ProcessConnectivityChangeLocked( + grpc_connectivity_state connectivity_state, grpc_error* error) override; + }; + + class PickFirstSubchannelList + : public SubchannelList { + public: + PickFirstSubchannelList(PickFirst* policy, TraceFlag* tracer, + const grpc_lb_addresses* addresses, + grpc_combiner* combiner, + grpc_client_channel_factory* client_channel_factory, + const grpc_channel_args& args) + : SubchannelList(policy, tracer, addresses, combiner, + client_channel_factory, args) { + // Need to maintain a ref to the LB policy as long as we maintain + // any references to subchannels, since the subchannels' + // pollset_sets will include the LB policy's pollset_set. + policy->Ref(DEBUG_LOCATION, "subchannel_list").release(); + } + + ~PickFirstSubchannelList() { + PickFirst* p = static_cast(policy()); + p->Unref(DEBUG_LOCATION, "subchannel_list"); + } + }; + void ShutdownLocked() override; void StartPickingLocked(); void DestroyUnselectedSubchannelsLocked(); - static void OnConnectivityChangedLocked(void* arg, grpc_error* error); - - void SubchannelListRefForConnectivityWatch( - grpc_lb_subchannel_list* subchannel_list, const char* reason); - void SubchannelListUnrefForConnectivityWatch( - grpc_lb_subchannel_list* subchannel_list, const char* reason); - - /** all our subchannels */ - grpc_lb_subchannel_list* subchannel_list_ = nullptr; - /** latest pending subchannel list */ - grpc_lb_subchannel_list* latest_pending_subchannel_list_ = nullptr; - /** selected subchannel in \a subchannel_list */ - grpc_lb_subchannel_data* selected_ = nullptr; - /** have we started picking? */ + // All our subchannels. + OrphanablePtr subchannel_list_; + // Latest pending subchannel list. + OrphanablePtr latest_pending_subchannel_list_; + // Selected subchannel in \a subchannel_list_. + PickFirstSubchannelData* selected_ = nullptr; + // Have we started picking? bool started_picking_ = false; - /** are we shut down? */ + // Are we shut down? bool shutdown_ = false; - /** list of picks that are waiting on connectivity */ + // List of picks that are waiting on connectivity. PickState* pending_picks_ = nullptr; - /** our connectivity state tracker */ + // Our connectivity state tracker. grpc_connectivity_state_tracker state_tracker_; }; @@ -137,15 +171,8 @@ void PickFirst::ShutdownLocked() { } grpc_connectivity_state_set(&state_tracker_, GRPC_CHANNEL_SHUTDOWN, GRPC_ERROR_REF(error), "shutdown"); - if (subchannel_list_ != nullptr) { - grpc_lb_subchannel_list_shutdown_and_unref(subchannel_list_, "pf_shutdown"); - subchannel_list_ = nullptr; - } - if (latest_pending_subchannel_list_ != nullptr) { - grpc_lb_subchannel_list_shutdown_and_unref(latest_pending_subchannel_list_, - "pf_shutdown"); - latest_pending_subchannel_list_ = nullptr; - } + subchannel_list_.reset(); + latest_pending_subchannel_list_.reset(); TryReresolutionLocked(&grpc_lb_pick_first_trace, GRPC_ERROR_CANCELLED); GRPC_ERROR_UNREF(error); } @@ -192,14 +219,10 @@ void PickFirst::CancelMatchingPicksLocked(uint32_t initial_metadata_flags_mask, void PickFirst::StartPickingLocked() { started_picking_ = true; - if (subchannel_list_ != nullptr && subchannel_list_->num_subchannels > 0) { - subchannel_list_->checking_subchannel = 0; - for (size_t i = 0; i < subchannel_list_->num_subchannels; ++i) { - if (subchannel_list_->subchannels[i].subchannel != nullptr) { - SubchannelListRefForConnectivityWatch( - subchannel_list_, "connectivity_watch+start_picking"); - grpc_lb_subchannel_data_start_connectivity_watch( - &subchannel_list_->subchannels[i]); + if (subchannel_list_ != nullptr) { + for (size_t i = 0; i < subchannel_list_->num_subchannels(); ++i) { + if (subchannel_list_->subchannel(i)->subchannel() != nullptr) { + subchannel_list_->subchannel(i)->StartConnectivityWatchLocked(); break; } } @@ -215,7 +238,7 @@ void PickFirst::ExitIdleLocked() { bool PickFirst::PickLocked(PickState* pick) { // If we have a selected subchannel already, return synchronously. if (selected_ != nullptr) { - pick->connected_subchannel = selected_->connected_subchannel; + pick->connected_subchannel = selected_->connected_subchannel()->Ref(); return true; } // No subchannel selected yet, so handle asynchronously. @@ -228,11 +251,10 @@ bool PickFirst::PickLocked(PickState* pick) { } void PickFirst::DestroyUnselectedSubchannelsLocked() { - for (size_t i = 0; i < subchannel_list_->num_subchannels; ++i) { - grpc_lb_subchannel_data* sd = &subchannel_list_->subchannels[i]; + for (size_t i = 0; i < subchannel_list_->num_subchannels(); ++i) { + PickFirstSubchannelData* sd = subchannel_list_->subchannel(i); if (selected_ != sd) { - grpc_lb_subchannel_data_unref_subchannel(sd, - "selected_different_subchannel"); + sd->UnrefSubchannelLocked("selected_different_subchannel"); } } } @@ -249,7 +271,7 @@ void PickFirst::NotifyOnStateChangeLocked(grpc_connectivity_state* current, void PickFirst::PingOneLocked(grpc_closure* on_initiate, grpc_closure* on_ack) { if (selected_ != nullptr) { - selected_->connected_subchannel->Ping(on_initiate, on_ack); + selected_->connected_subchannel()->Ping(on_initiate, on_ack); } else { GRPC_CLOSURE_SCHED(on_initiate, GRPC_ERROR_CREATE_FROM_STATIC_STRING("Not connected")); @@ -258,24 +280,6 @@ void PickFirst::PingOneLocked(grpc_closure* on_initiate, grpc_closure* on_ack) { } } -void PickFirst::SubchannelListRefForConnectivityWatch( - grpc_lb_subchannel_list* subchannel_list, const char* reason) { - // TODO(roth): We currently track this ref manually. Once the new - // ClosureRef API is ready and the subchannel_list code has been - // converted to a C++ API, find a way to hold the RefCountedPtr<> - // somewhere (maybe in the subchannel_data object) instead of doing - // this manually. - auto self = Ref(DEBUG_LOCATION, reason); - self.release(); - grpc_lb_subchannel_list_ref(subchannel_list, reason); -} - -void PickFirst::SubchannelListUnrefForConnectivityWatch( - grpc_lb_subchannel_list* subchannel_list, const char* reason) { - Unref(DEBUG_LOCATION, reason); - grpc_lb_subchannel_list_unref(subchannel_list, reason); -} - void PickFirst::UpdateLocked(const grpc_channel_args& args) { const grpc_arg* arg = grpc_channel_args_find(&args, GRPC_ARG_LB_ADDRESSES); if (arg == nullptr || arg->type != GRPC_ARG_POINTER) { @@ -295,75 +299,67 @@ void PickFirst::UpdateLocked(const grpc_channel_args& args) { return; } const grpc_lb_addresses* addresses = - (const grpc_lb_addresses*)arg->value.pointer.p; + static_cast(arg->value.pointer.p); if (grpc_lb_pick_first_trace.enabled()) { gpr_log(GPR_INFO, "Pick First %p received update with %" PRIuPTR " addresses", this, addresses->num_addresses); } - grpc_lb_subchannel_list* subchannel_list = grpc_lb_subchannel_list_create( + auto subchannel_list = MakeOrphanable( this, &grpc_lb_pick_first_trace, addresses, combiner(), - client_channel_factory(), args, &PickFirst::OnConnectivityChangedLocked); - if (subchannel_list->num_subchannels == 0) { + client_channel_factory(), args); + if (subchannel_list->num_subchannels() == 0) { // Empty update or no valid subchannels. Unsubscribe from all current // subchannels and put the channel in TRANSIENT_FAILURE. grpc_connectivity_state_set( &state_tracker_, GRPC_CHANNEL_TRANSIENT_FAILURE, GRPC_ERROR_CREATE_FROM_STATIC_STRING("Empty update"), "pf_update_empty"); - if (subchannel_list_ != nullptr) { - grpc_lb_subchannel_list_shutdown_and_unref(subchannel_list_, - "sl_shutdown_empty_update"); - } - subchannel_list_ = subchannel_list; // Empty list. + subchannel_list_ = std::move(subchannel_list); // Empty list. selected_ = nullptr; return; } if (selected_ == nullptr) { // We don't yet have a selected subchannel, so replace the current // subchannel list immediately. - if (subchannel_list_ != nullptr) { - grpc_lb_subchannel_list_shutdown_and_unref(subchannel_list_, - "pf_update_before_selected"); + subchannel_list_ = std::move(subchannel_list); + // If we've started picking, start trying to connect to the first + // subchannel in the new list. + if (started_picking_) { + subchannel_list_->subchannel(0)->StartConnectivityWatchLocked(); } - subchannel_list_ = subchannel_list; } else { // We do have a selected subchannel. // Check if it's present in the new list. If so, we're done. - for (size_t i = 0; i < subchannel_list->num_subchannels; ++i) { - grpc_lb_subchannel_data* sd = &subchannel_list->subchannels[i]; - if (sd->subchannel == selected_->subchannel) { + for (size_t i = 0; i < subchannel_list->num_subchannels(); ++i) { + PickFirstSubchannelData* sd = subchannel_list->subchannel(i); + if (sd->subchannel() == selected_->subchannel()) { // The currently selected subchannel is in the update: we are done. if (grpc_lb_pick_first_trace.enabled()) { gpr_log(GPR_INFO, "Pick First %p found already selected subchannel %p " "at update index %" PRIuPTR " of %" PRIuPTR "; update done", - this, selected_->subchannel, i, - subchannel_list->num_subchannels); - } - if (selected_->connected_subchannel != nullptr) { - sd->connected_subchannel = selected_->connected_subchannel; - } - selected_ = sd; - if (subchannel_list_ != nullptr) { - grpc_lb_subchannel_list_shutdown_and_unref( - subchannel_list_, "pf_update_includes_selected"); + this, selected_->subchannel(), i, + subchannel_list->num_subchannels()); } - subchannel_list_ = subchannel_list; - DestroyUnselectedSubchannelsLocked(); - SubchannelListRefForConnectivityWatch( - subchannel_list, "connectivity_watch+replace_selected"); - grpc_lb_subchannel_data_start_connectivity_watch(sd); - // If there was a previously pending update (which may or may - // not have contained the currently selected subchannel), drop - // it, so that it doesn't override what we've done here. - if (latest_pending_subchannel_list_ != nullptr) { - grpc_lb_subchannel_list_shutdown_and_unref( - latest_pending_subchannel_list_, - "pf_update_includes_selected+outdated"); - latest_pending_subchannel_list_ = nullptr; + // Make sure it's in state READY. It might not be if we grabbed + // the combiner while a connectivity state notification + // informing us otherwise is pending. + // Note that CheckConnectivityStateLocked() also takes a ref to + // the connected subchannel. + grpc_error* error = GRPC_ERROR_NONE; + if (sd->CheckConnectivityStateLocked(&error) == GRPC_CHANNEL_READY) { + selected_ = sd; + subchannel_list_ = std::move(subchannel_list); + DestroyUnselectedSubchannelsLocked(); + sd->StartConnectivityWatchLocked(); + // If there was a previously pending update (which may or may + // not have contained the currently selected subchannel), drop + // it, so that it doesn't override what we've done here. + latest_pending_subchannel_list_.reset(); + return; } - return; + GRPC_ERROR_UNREF(error); } } // Not keeping the previous selected subchannel, so set the latest @@ -375,85 +371,63 @@ void PickFirst::UpdateLocked(const grpc_channel_args& args) { gpr_log(GPR_INFO, "Pick First %p Shutting down latest pending subchannel list " "%p, about to be replaced by newer latest %p", - this, latest_pending_subchannel_list_, subchannel_list); + this, latest_pending_subchannel_list_.get(), + subchannel_list.get()); } - grpc_lb_subchannel_list_shutdown_and_unref( - latest_pending_subchannel_list_, "sl_outdated_dont_smash"); } - latest_pending_subchannel_list_ = subchannel_list; - } - // If we've started picking, start trying to connect to the first - // subchannel in the new list. - if (started_picking_) { - SubchannelListRefForConnectivityWatch(subchannel_list, - "connectivity_watch+update"); - grpc_lb_subchannel_data_start_connectivity_watch( - &subchannel_list->subchannels[0]); + latest_pending_subchannel_list_ = std::move(subchannel_list); + // If we've started picking, start trying to connect to the first + // subchannel in the new list. + if (started_picking_) { + latest_pending_subchannel_list_->subchannel(0) + ->StartConnectivityWatchLocked(); + } } } -void PickFirst::OnConnectivityChangedLocked(void* arg, grpc_error* error) { - grpc_lb_subchannel_data* sd = static_cast(arg); - PickFirst* p = static_cast(sd->subchannel_list->policy); - if (grpc_lb_pick_first_trace.enabled()) { - gpr_log(GPR_INFO, - "Pick First %p connectivity changed for subchannel %p (%" PRIuPTR - " of %" PRIuPTR - "), subchannel_list %p: state=%s p->shutdown_=%d " - "sd->subchannel_list->shutting_down=%d error=%s", - p, sd->subchannel, sd->subchannel_list->checking_subchannel, - sd->subchannel_list->num_subchannels, sd->subchannel_list, - grpc_connectivity_state_name(sd->pending_connectivity_state_unsafe), - p->shutdown_, sd->subchannel_list->shutting_down, - grpc_error_string(error)); - } - // If the policy is shutting down, unref and return. - if (p->shutdown_) { - grpc_lb_subchannel_data_stop_connectivity_watch(sd); - grpc_lb_subchannel_data_unref_subchannel(sd, "pf_shutdown"); - p->SubchannelListUnrefForConnectivityWatch(sd->subchannel_list, - "pf_shutdown"); - return; - } - // If the subchannel list is shutting down, stop watching. - if (sd->subchannel_list->shutting_down || error == GRPC_ERROR_CANCELLED) { - grpc_lb_subchannel_data_stop_connectivity_watch(sd); - grpc_lb_subchannel_data_unref_subchannel(sd, "pf_sl_shutdown"); - p->SubchannelListUnrefForConnectivityWatch(sd->subchannel_list, - "pf_sl_shutdown"); - return; - } - // If we're still here, the notification must be for a subchannel in - // either the current or latest pending subchannel lists. - GPR_ASSERT(sd->subchannel_list == p->subchannel_list_ || - sd->subchannel_list == p->latest_pending_subchannel_list_); - // Update state. - sd->curr_connectivity_state = sd->pending_connectivity_state_unsafe; +void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( + grpc_connectivity_state connectivity_state, grpc_error* error) { + PickFirst* p = static_cast(subchannel_list()->policy()); + // The notification must be for a subchannel in either the current or + // latest pending subchannel lists. + GPR_ASSERT(subchannel_list() == p->subchannel_list_.get() || + subchannel_list() == p->latest_pending_subchannel_list_.get()); // Handle updates for the currently selected subchannel. - if (p->selected_ == sd) { + if (p->selected_ == this) { + if (grpc_lb_pick_first_trace.enabled()) { + gpr_log(GPR_INFO, + "Pick First %p connectivity changed for selected subchannel", p); + } // If the new state is anything other than READY and there is a // pending update, switch to the pending update. - if (sd->curr_connectivity_state != GRPC_CHANNEL_READY && + if (connectivity_state != GRPC_CHANNEL_READY && p->latest_pending_subchannel_list_ != nullptr) { + if (grpc_lb_pick_first_trace.enabled()) { + gpr_log(GPR_INFO, + "Pick First %p promoting pending subchannel list %p to " + "replace %p", + p, p->latest_pending_subchannel_list_.get(), + p->subchannel_list_.get()); + } p->selected_ = nullptr; - grpc_lb_subchannel_data_stop_connectivity_watch(sd); - p->SubchannelListUnrefForConnectivityWatch( - sd->subchannel_list, "selected_not_ready+switch_to_update"); - grpc_lb_subchannel_list_shutdown_and_unref( - p->subchannel_list_, "selected_not_ready+switch_to_update"); - p->subchannel_list_ = p->latest_pending_subchannel_list_; - p->latest_pending_subchannel_list_ = nullptr; + StopConnectivityWatchLocked(); + p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); grpc_connectivity_state_set( &p->state_tracker_, GRPC_CHANNEL_TRANSIENT_FAILURE, - GRPC_ERROR_REF(error), "selected_not_ready+switch_to_update"); + error != GRPC_ERROR_NONE + ? GRPC_ERROR_REF(error) + : GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "selected subchannel not ready; switching to pending " + "update"), + "selected_not_ready+switch_to_update"); } else { // TODO(juanlishen): we re-resolve when the selected subchannel goes to // TRANSIENT_FAILURE because we used to shut down in this case before // re-resolution is introduced. But we need to investigate whether we // really want to take any action instead of waiting for the selected // subchannel reconnecting. - GPR_ASSERT(sd->curr_connectivity_state != GRPC_CHANNEL_SHUTDOWN); - if (sd->curr_connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { + GPR_ASSERT(connectivity_state != GRPC_CHANNEL_SHUTDOWN); + if (connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { // If the selected channel goes bad, request a re-resolution. grpc_connectivity_state_set(&p->state_tracker_, GRPC_CHANNEL_IDLE, GRPC_ERROR_NONE, @@ -462,19 +436,16 @@ void PickFirst::OnConnectivityChangedLocked(void* arg, grpc_error* error) { p->TryReresolutionLocked(&grpc_lb_pick_first_trace, GRPC_ERROR_NONE); // In transient failure. Rely on re-resolution to recover. p->selected_ = nullptr; - grpc_lb_subchannel_data_stop_connectivity_watch(sd); - p->SubchannelListUnrefForConnectivityWatch(sd->subchannel_list, - "pf_selected_shutdown"); - grpc_lb_subchannel_data_unref_subchannel( - sd, "pf_selected_shutdown"); // Unrefs connected subchannel + UnrefSubchannelLocked("pf_selected_shutdown"); + StopConnectivityWatchLocked(); } else { - grpc_connectivity_state_set(&p->state_tracker_, - sd->curr_connectivity_state, + grpc_connectivity_state_set(&p->state_tracker_, connectivity_state, GRPC_ERROR_REF(error), "selected_changed"); // Renew notification. - grpc_lb_subchannel_data_start_connectivity_watch(sd); + RenewConnectivityWatchLocked(); } } + GRPC_ERROR_UNREF(error); return; } // If we get here, there are two possible cases: @@ -486,26 +457,27 @@ void PickFirst::OnConnectivityChangedLocked(void* arg, grpc_error* error) { // for a subchannel in p->latest_pending_subchannel_list_. The // goal here is to find a subchannel from the update that we can // select in place of the current one. - switch (sd->curr_connectivity_state) { + switch (connectivity_state) { case GRPC_CHANNEL_READY: { // Case 2. Promote p->latest_pending_subchannel_list_ to // p->subchannel_list_. - sd->connected_subchannel = - grpc_subchannel_get_connected_subchannel(sd->subchannel); - if (sd->subchannel_list == p->latest_pending_subchannel_list_) { - GPR_ASSERT(p->subchannel_list_ != nullptr); - grpc_lb_subchannel_list_shutdown_and_unref(p->subchannel_list_, - "finish_update"); - p->subchannel_list_ = p->latest_pending_subchannel_list_; - p->latest_pending_subchannel_list_ = nullptr; + if (subchannel_list() == p->latest_pending_subchannel_list_.get()) { + if (grpc_lb_pick_first_trace.enabled()) { + gpr_log(GPR_INFO, + "Pick First %p promoting pending subchannel list %p to " + "replace %p", + p, p->latest_pending_subchannel_list_.get(), + p->subchannel_list_.get()); + } + p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); } // Cases 1 and 2. grpc_connectivity_state_set(&p->state_tracker_, GRPC_CHANNEL_READY, GRPC_ERROR_NONE, "connecting_ready"); - p->selected_ = sd; + p->selected_ = this; if (grpc_lb_pick_first_trace.enabled()) { gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", p, - sd->subchannel); + subchannel()); } // Drop all other subchannels, since we are now connected. p->DestroyUnselectedSubchannelsLocked(); @@ -513,7 +485,8 @@ void PickFirst::OnConnectivityChangedLocked(void* arg, grpc_error* error) { PickState* pick; while ((pick = p->pending_picks_)) { p->pending_picks_ = pick->next; - pick->connected_subchannel = p->selected_->connected_subchannel; + pick->connected_subchannel = + p->selected_->connected_subchannel()->Ref(); if (grpc_lb_pick_first_trace.enabled()) { gpr_log(GPR_INFO, "Servicing pending pick with selected subchannel %p", @@ -522,45 +495,43 @@ void PickFirst::OnConnectivityChangedLocked(void* arg, grpc_error* error) { GRPC_CLOSURE_SCHED(pick->on_complete, GRPC_ERROR_NONE); } // Renew notification. - grpc_lb_subchannel_data_start_connectivity_watch(sd); + RenewConnectivityWatchLocked(); break; } case GRPC_CHANNEL_TRANSIENT_FAILURE: { - grpc_lb_subchannel_data_stop_connectivity_watch(sd); + StopConnectivityWatchLocked(); + PickFirstSubchannelData* sd = this; do { - sd->subchannel_list->checking_subchannel = - (sd->subchannel_list->checking_subchannel + 1) % - sd->subchannel_list->num_subchannels; - sd = &sd->subchannel_list - ->subchannels[sd->subchannel_list->checking_subchannel]; - } while (sd->subchannel == nullptr); + size_t next_index = + (sd->Index() + 1) % subchannel_list()->num_subchannels(); + sd = subchannel_list()->subchannel(next_index); + } while (sd->subchannel() == nullptr); // Case 1: Only set state to TRANSIENT_FAILURE if we've tried // all subchannels. - if (sd->subchannel_list->checking_subchannel == 0 && - sd->subchannel_list == p->subchannel_list_) { + if (sd->Index() == 0 && subchannel_list() == p->subchannel_list_.get()) { grpc_connectivity_state_set( &p->state_tracker_, GRPC_CHANNEL_TRANSIENT_FAILURE, GRPC_ERROR_REF(error), "connecting_transient_failure"); } - // Reuses the connectivity refs from the previous watch. - grpc_lb_subchannel_data_start_connectivity_watch(sd); + sd->StartConnectivityWatchLocked(); break; } case GRPC_CHANNEL_CONNECTING: case GRPC_CHANNEL_IDLE: { // Only update connectivity state in case 1. - if (sd->subchannel_list == p->subchannel_list_) { + if (subchannel_list() == p->subchannel_list_.get()) { grpc_connectivity_state_set(&p->state_tracker_, GRPC_CHANNEL_CONNECTING, GRPC_ERROR_REF(error), "connecting_changed"); } // Renew notification. - grpc_lb_subchannel_data_start_connectivity_watch(sd); + RenewConnectivityWatchLocked(); break; } case GRPC_CHANNEL_SHUTDOWN: GPR_UNREACHABLE_CODE(break); } + GRPC_ERROR_UNREF(error); } // diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index b470016bd99..79e8ad56633 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -73,23 +73,127 @@ class RoundRobin : public LoadBalancingPolicy { private: ~RoundRobin(); - void ShutdownLocked() override; + // Forward declaration. + class RoundRobinSubchannelList; + + // Data for a particular subchannel in a subchannel list. + // This subclass adds the following functionality: + // - Tracks user_data associated with each address, which will be + // returned along with picks that select the subchannel. + // - Tracks the previous connectivity state of the subchannel, so that + // we know how many subchannels are in each state. + class RoundRobinSubchannelData + : public SubchannelData { + public: + RoundRobinSubchannelData(RoundRobinSubchannelList* subchannel_list, + const grpc_lb_user_data_vtable* user_data_vtable, + const grpc_lb_address& address, + grpc_subchannel* subchannel, + grpc_combiner* combiner) + : SubchannelData(subchannel_list, user_data_vtable, address, subchannel, + combiner), + user_data_vtable_(user_data_vtable), + user_data_(user_data_vtable_ != nullptr + ? user_data_vtable_->copy(address.user_data) + : nullptr) {} + + void UnrefSubchannelLocked(const char* reason) override { + SubchannelData::UnrefSubchannelLocked(reason); + if (user_data_ != nullptr) { + GPR_ASSERT(user_data_vtable_ != nullptr); + user_data_vtable_->destroy(user_data_); + user_data_ = nullptr; + } + } - void StartPickingLocked(); - size_t GetNextReadySubchannelIndexLocked(); - void UpdateLastReadySubchannelIndexLocked(size_t last_ready_index); - void UpdateConnectivityStatusLocked(grpc_lb_subchannel_data* sd, - grpc_error* error); + void* user_data() const { return user_data_; } + + grpc_connectivity_state connectivity_state() const { + return last_connectivity_state_; + } + + void UpdateConnectivityStateLocked( + grpc_connectivity_state connectivity_state, grpc_error* error); + + private: + void ProcessConnectivityChangeLocked( + grpc_connectivity_state connectivity_state, grpc_error* error) override; + + const grpc_lb_user_data_vtable* user_data_vtable_; + void* user_data_ = nullptr; + grpc_connectivity_state last_connectivity_state_ = GRPC_CHANNEL_IDLE; + }; + + // A list of subchannels. + class RoundRobinSubchannelList + : public SubchannelList { + public: + RoundRobinSubchannelList( + RoundRobin* policy, TraceFlag* tracer, + const grpc_lb_addresses* addresses, grpc_combiner* combiner, + grpc_client_channel_factory* client_channel_factory, + const grpc_channel_args& args) + : SubchannelList(policy, tracer, addresses, combiner, + client_channel_factory, args) { + // Need to maintain a ref to the LB policy as long as we maintain + // any references to subchannels, since the subchannels' + // pollset_sets will include the LB policy's pollset_set. + policy->Ref(DEBUG_LOCATION, "subchannel_list").release(); + } + + ~RoundRobinSubchannelList() { + GRPC_ERROR_UNREF(last_transient_failure_error_); + RoundRobin* p = static_cast(policy()); + p->Unref(DEBUG_LOCATION, "subchannel_list"); + } - static void OnConnectivityChangedLocked(void* arg, grpc_error* error); + // Starts watching the subchannels in this list. + void StartWatchingLocked(); + + // Updates the counters of subchannels in each state when a + // subchannel transitions from old_state to new_state. + // transient_failure_error is the error that is reported when + // new_state is TRANSIENT_FAILURE. + void UpdateStateCountersLocked(grpc_connectivity_state old_state, + grpc_connectivity_state new_state, + grpc_error* transient_failure_error); + + // If this subchannel list is the RR policy's current subchannel + // list, updates the RR policy's connectivity state based on the + // subchannel list's state counters. + void MaybeUpdateRoundRobinConnectivityStateLocked(); + + // Updates the RR policy's overall state based on the counters of + // subchannels in each state. + void UpdateRoundRobinStateFromSubchannelStateCountsLocked(); + + size_t GetNextReadySubchannelIndexLocked(); + void UpdateLastReadySubchannelIndexLocked(size_t last_ready_index); + + private: + size_t num_ready_ = 0; + size_t num_connecting_ = 0; + size_t num_transient_failure_ = 0; + grpc_error* last_transient_failure_error_ = GRPC_ERROR_NONE; + size_t last_ready_index_ = -1; // Index into list of last pick. + }; - void SubchannelListRefForConnectivityWatch( - grpc_lb_subchannel_list* subchannel_list, const char* reason); - void SubchannelListUnrefForConnectivityWatch( - grpc_lb_subchannel_list* subchannel_list, const char* reason); + void ShutdownLocked() override; + + void StartPickingLocked(); + bool DoPickLocked(PickState* pick); + void DrainPendingPicksLocked(); /** list of subchannels */ - grpc_lb_subchannel_list* subchannel_list_ = nullptr; + OrphanablePtr subchannel_list_; + /** Latest version of the subchannel list. + * Subchannel connectivity callbacks will only promote updated subchannel + * lists if they equal \a latest_pending_subchannel_list. In other words, + * racing callbacks that reference outdated subchannel lists won't perform any + * update. */ + OrphanablePtr latest_pending_subchannel_list_; /** have we started picking? */ bool started_picking_ = false; /** are we shutting down? */ @@ -98,14 +202,6 @@ class RoundRobin : public LoadBalancingPolicy { PickState* pending_picks_ = nullptr; /** our connectivity state tracker */ grpc_connectivity_state_tracker state_tracker_; - /** Index into subchannels for last pick. */ - size_t last_ready_subchannel_index_ = 0; - /** Latest version of the subchannel list. - * Subchannel connectivity callbacks will only promote updated subchannel - * lists if they equal \a latest_pending_subchannel_list. In other words, - * racing callbacks that reference outdated subchannel lists won't perform any - * update. */ - grpc_lb_subchannel_list* latest_pending_subchannel_list_ = nullptr; }; RoundRobin::RoundRobin(const Args& args) : LoadBalancingPolicy(args) { @@ -115,7 +211,7 @@ RoundRobin::RoundRobin(const Args& args) : LoadBalancingPolicy(args) { UpdateLocked(*args.args); if (grpc_lb_round_robin_trace.enabled()) { gpr_log(GPR_INFO, "[RR %p] Created with %" PRIuPTR " subchannels", this, - subchannel_list_->num_subchannels); + subchannel_list_->num_subchannels()); } grpc_subchannel_index_ref(); } @@ -131,68 +227,6 @@ RoundRobin::~RoundRobin() { grpc_subchannel_index_unref(); } -/** Returns the index into p->subchannel_list->subchannels of the next - * subchannel in READY state, or p->subchannel_list->num_subchannels if no - * subchannel is READY. - * - * Note that this function does *not* update p->last_ready_subchannel_index. - * The caller must do that if it returns a pick. */ -size_t RoundRobin::GetNextReadySubchannelIndexLocked() { - GPR_ASSERT(subchannel_list_ != nullptr); - if (grpc_lb_round_robin_trace.enabled()) { - gpr_log(GPR_INFO, - "[RR %p] getting next ready subchannel (out of %" PRIuPTR - "), " - "last_ready_subchannel_index=%" PRIuPTR, - this, subchannel_list_->num_subchannels, - last_ready_subchannel_index_); - } - for (size_t i = 0; i < subchannel_list_->num_subchannels; ++i) { - const size_t index = (i + last_ready_subchannel_index_ + 1) % - subchannel_list_->num_subchannels; - if (grpc_lb_round_robin_trace.enabled()) { - gpr_log( - GPR_INFO, - "[RR %p] checking subchannel %p, subchannel_list %p, index %" PRIuPTR - ": state=%s", - this, subchannel_list_->subchannels[index].subchannel, - subchannel_list_, index, - grpc_connectivity_state_name( - subchannel_list_->subchannels[index].curr_connectivity_state)); - } - if (subchannel_list_->subchannels[index].curr_connectivity_state == - GRPC_CHANNEL_READY) { - if (grpc_lb_round_robin_trace.enabled()) { - gpr_log(GPR_INFO, - "[RR %p] found next ready subchannel (%p) at index %" PRIuPTR - " of subchannel_list %p", - this, subchannel_list_->subchannels[index].subchannel, index, - subchannel_list_); - } - return index; - } - } - if (grpc_lb_round_robin_trace.enabled()) { - gpr_log(GPR_INFO, "[RR %p] no subchannels in ready state", this); - } - return subchannel_list_->num_subchannels; -} - -// Sets last_ready_subchannel_index_ to last_ready_index. -void RoundRobin::UpdateLastReadySubchannelIndexLocked(size_t last_ready_index) { - GPR_ASSERT(last_ready_index < subchannel_list_->num_subchannels); - last_ready_subchannel_index_ = last_ready_index; - if (grpc_lb_round_robin_trace.enabled()) { - gpr_log(GPR_INFO, - "[RR %p] setting last_ready_subchannel_index=%" PRIuPTR - " (SC %p, CSC %p)", - this, last_ready_index, - subchannel_list_->subchannels[last_ready_index].subchannel, - subchannel_list_->subchannels[last_ready_index] - .connected_subchannel.get()); - } -} - void RoundRobin::HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) { PickState* pick; while ((pick = pending_picks_) != nullptr) { @@ -218,16 +252,8 @@ void RoundRobin::ShutdownLocked() { } grpc_connectivity_state_set(&state_tracker_, GRPC_CHANNEL_SHUTDOWN, GRPC_ERROR_REF(error), "rr_shutdown"); - if (subchannel_list_ != nullptr) { - grpc_lb_subchannel_list_shutdown_and_unref(subchannel_list_, - "sl_shutdown_rr_shutdown"); - subchannel_list_ = nullptr; - } - if (latest_pending_subchannel_list_ != nullptr) { - grpc_lb_subchannel_list_shutdown_and_unref( - latest_pending_subchannel_list_, "sl_shutdown_pending_rr_shutdown"); - latest_pending_subchannel_list_ = nullptr; - } + subchannel_list_.reset(); + latest_pending_subchannel_list_.reset(); TryReresolutionLocked(&grpc_lb_round_robin_trace, GRPC_ERROR_CANCELLED); GRPC_ERROR_UNREF(error); } @@ -273,39 +299,49 @@ void RoundRobin::CancelMatchingPicksLocked(uint32_t initial_metadata_flags_mask, GRPC_ERROR_UNREF(error); } -void RoundRobin::SubchannelListRefForConnectivityWatch( - grpc_lb_subchannel_list* subchannel_list, const char* reason) { - // TODO(roth): We currently track this ref manually. Once the new - // ClosureRef API is ready and the subchannel_list code has been - // converted to a C++ API, find a way to hold the RefCountedPtr<> - // somewhere (maybe in the subchannel_data object) instead of doing - // this manually. - auto self = Ref(DEBUG_LOCATION, reason); - self.release(); - grpc_lb_subchannel_list_ref(subchannel_list, reason); +void RoundRobin::StartPickingLocked() { + started_picking_ = true; + subchannel_list_->StartWatchingLocked(); } -void RoundRobin::SubchannelListUnrefForConnectivityWatch( - grpc_lb_subchannel_list* subchannel_list, const char* reason) { - Unref(DEBUG_LOCATION, reason); - grpc_lb_subchannel_list_unref(subchannel_list, reason); +void RoundRobin::ExitIdleLocked() { + if (!started_picking_) { + StartPickingLocked(); + } } -void RoundRobin::StartPickingLocked() { - started_picking_ = true; - for (size_t i = 0; i < subchannel_list_->num_subchannels; i++) { - if (subchannel_list_->subchannels[i].subchannel != nullptr) { - SubchannelListRefForConnectivityWatch(subchannel_list_, - "connectivity_watch"); - grpc_lb_subchannel_data_start_connectivity_watch( - &subchannel_list_->subchannels[i]); +bool RoundRobin::DoPickLocked(PickState* pick) { + const size_t next_ready_index = + subchannel_list_->GetNextReadySubchannelIndexLocked(); + if (next_ready_index < subchannel_list_->num_subchannels()) { + /* readily available, report right away */ + RoundRobinSubchannelData* sd = + subchannel_list_->subchannel(next_ready_index); + GPR_ASSERT(sd->connected_subchannel() != nullptr); + pick->connected_subchannel = sd->connected_subchannel()->Ref(); + if (pick->user_data != nullptr) { + *pick->user_data = sd->user_data(); } + if (grpc_lb_round_robin_trace.enabled()) { + gpr_log(GPR_INFO, + "[RR %p] Picked target <-- Subchannel %p (connected %p) (sl %p, " + "index %" PRIuPTR ")", + this, sd->subchannel(), pick->connected_subchannel.get(), + sd->subchannel_list(), next_ready_index); + } + /* only advance the last picked pointer if the selection was used */ + subchannel_list_->UpdateLastReadySubchannelIndexLocked(next_ready_index); + return true; } + return false; } -void RoundRobin::ExitIdleLocked() { - if (!started_picking_) { - StartPickingLocked(); +void RoundRobin::DrainPendingPicksLocked() { + PickState* pick; + while ((pick = pending_picks_)) { + pending_picks_ = pick->next; + GPR_ASSERT(DoPickLocked(pick)); + GRPC_CLOSURE_SCHED(pick->on_complete, GRPC_ERROR_NONE); } } @@ -315,27 +351,7 @@ bool RoundRobin::PickLocked(PickState* pick) { } GPR_ASSERT(!shutdown_); if (subchannel_list_ != nullptr) { - const size_t next_ready_index = GetNextReadySubchannelIndexLocked(); - if (next_ready_index < subchannel_list_->num_subchannels) { - /* readily available, report right away */ - grpc_lb_subchannel_data* sd = - &subchannel_list_->subchannels[next_ready_index]; - pick->connected_subchannel = sd->connected_subchannel; - if (pick->user_data != nullptr) { - *pick->user_data = sd->user_data; - } - if (grpc_lb_round_robin_trace.enabled()) { - gpr_log( - GPR_INFO, - "[RR %p] Picked target <-- Subchannel %p (connected %p) (sl %p, " - "index %" PRIuPTR ")", - this, sd->subchannel, pick->connected_subchannel.get(), - sd->subchannel_list, next_ready_index); - } - /* only advance the last picked pointer if the selection was used */ - UpdateLastReadySubchannelIndexLocked(next_ready_index); - return true; - } + if (DoPickLocked(pick)) return true; } /* no pick currently available. Save for later in list of pending picks */ if (!started_picking_) { @@ -346,36 +362,62 @@ bool RoundRobin::PickLocked(PickState* pick) { return false; } -void UpdateStateCountersLocked(grpc_lb_subchannel_data* sd) { - grpc_lb_subchannel_list* subchannel_list = sd->subchannel_list; - GPR_ASSERT(sd->prev_connectivity_state != GRPC_CHANNEL_SHUTDOWN); - GPR_ASSERT(sd->curr_connectivity_state != GRPC_CHANNEL_SHUTDOWN); - if (sd->prev_connectivity_state == GRPC_CHANNEL_READY) { - GPR_ASSERT(subchannel_list->num_ready > 0); - --subchannel_list->num_ready; - } else if (sd->prev_connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - GPR_ASSERT(subchannel_list->num_transient_failures > 0); - --subchannel_list->num_transient_failures; - } else if (sd->prev_connectivity_state == GRPC_CHANNEL_IDLE) { - GPR_ASSERT(subchannel_list->num_idle > 0); - --subchannel_list->num_idle; +void RoundRobin::RoundRobinSubchannelList::StartWatchingLocked() { + if (num_subchannels() == 0) return; + // Check current state of each subchannel synchronously, since any + // subchannel already used by some other channel may have a non-IDLE + // state. + for (size_t i = 0; i < num_subchannels(); ++i) { + grpc_error* error = GRPC_ERROR_NONE; + grpc_connectivity_state state = + subchannel(i)->CheckConnectivityStateLocked(&error); + if (state != GRPC_CHANNEL_IDLE) { + subchannel(i)->UpdateConnectivityStateLocked(state, error); + } } - sd->prev_connectivity_state = sd->curr_connectivity_state; - if (sd->curr_connectivity_state == GRPC_CHANNEL_READY) { - ++subchannel_list->num_ready; - } else if (sd->curr_connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - ++subchannel_list->num_transient_failures; - } else if (sd->curr_connectivity_state == GRPC_CHANNEL_IDLE) { - ++subchannel_list->num_idle; + // Now set the LB policy's state based on the subchannels' states. + UpdateRoundRobinStateFromSubchannelStateCountsLocked(); + // Start connectivity watch for each subchannel. + for (size_t i = 0; i < num_subchannels(); i++) { + if (subchannel(i)->subchannel() != nullptr) { + subchannel(i)->StartConnectivityWatchLocked(); + } } } -/** Sets the policy's connectivity status based on that of the passed-in \a sd - * (the grpc_lb_subchannel_data associated with the updated subchannel) and the - * subchannel list \a sd belongs to (sd->subchannel_list). \a error will be used - * only if the policy transitions to state TRANSIENT_FAILURE. */ -void RoundRobin::UpdateConnectivityStatusLocked(grpc_lb_subchannel_data* sd, - grpc_error* error) { +void RoundRobin::RoundRobinSubchannelList::UpdateStateCountersLocked( + grpc_connectivity_state old_state, grpc_connectivity_state new_state, + grpc_error* transient_failure_error) { + GPR_ASSERT(old_state != GRPC_CHANNEL_SHUTDOWN); + GPR_ASSERT(new_state != GRPC_CHANNEL_SHUTDOWN); + if (old_state == GRPC_CHANNEL_READY) { + GPR_ASSERT(num_ready_ > 0); + --num_ready_; + } else if (old_state == GRPC_CHANNEL_CONNECTING) { + GPR_ASSERT(num_connecting_ > 0); + --num_connecting_; + } else if (old_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { + GPR_ASSERT(num_transient_failure_ > 0); + --num_transient_failure_; + } + if (new_state == GRPC_CHANNEL_READY) { + ++num_ready_; + } else if (new_state == GRPC_CHANNEL_CONNECTING) { + ++num_connecting_; + } else if (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { + ++num_transient_failure_; + } + GRPC_ERROR_UNREF(last_transient_failure_error_); + last_transient_failure_error_ = transient_failure_error; +} + +// Sets the RR policy's connectivity state based on the current +// subchannel list. +void RoundRobin::RoundRobinSubchannelList:: + MaybeUpdateRoundRobinConnectivityStateLocked() { + RoundRobin* p = static_cast(policy()); + // Only set connectivity state if this is the current subchannel list. + if (p->subchannel_list_.get() != this) return; /* In priority order. The first rule to match terminates the search (ie, if we * are on rule n, all previous rules were unfulfilled). * @@ -390,155 +432,151 @@ void RoundRobin::UpdateConnectivityStatusLocked(grpc_lb_subchannel_data* sd, * CHECK: subchannel_list->num_transient_failures == * subchannel_list->num_subchannels. */ - grpc_lb_subchannel_list* subchannel_list = sd->subchannel_list; - GPR_ASSERT(sd->curr_connectivity_state != GRPC_CHANNEL_IDLE); - if (subchannel_list->num_ready > 0) { + if (num_ready_ > 0) { /* 1) READY */ - grpc_connectivity_state_set(&state_tracker_, GRPC_CHANNEL_READY, + grpc_connectivity_state_set(&p->state_tracker_, GRPC_CHANNEL_READY, GRPC_ERROR_NONE, "rr_ready"); - } else if (sd->curr_connectivity_state == GRPC_CHANNEL_CONNECTING) { + } else if (num_connecting_ > 0) { /* 2) CONNECTING */ - grpc_connectivity_state_set(&state_tracker_, GRPC_CHANNEL_CONNECTING, + grpc_connectivity_state_set(&p->state_tracker_, GRPC_CHANNEL_CONNECTING, GRPC_ERROR_NONE, "rr_connecting"); - } else if (subchannel_list->num_transient_failures == - subchannel_list->num_subchannels) { + } else if (num_transient_failure_ == num_subchannels()) { /* 3) TRANSIENT_FAILURE */ - grpc_connectivity_state_set(&state_tracker_, GRPC_CHANNEL_TRANSIENT_FAILURE, - GRPC_ERROR_REF(error), + grpc_connectivity_state_set(&p->state_tracker_, + GRPC_CHANNEL_TRANSIENT_FAILURE, + GRPC_ERROR_REF(last_transient_failure_error_), "rr_exhausted_subchannels"); } - GRPC_ERROR_UNREF(error); } -void RoundRobin::OnConnectivityChangedLocked(void* arg, grpc_error* error) { - grpc_lb_subchannel_data* sd = static_cast(arg); - RoundRobin* p = static_cast(sd->subchannel_list->policy); +void RoundRobin::RoundRobinSubchannelList:: + UpdateRoundRobinStateFromSubchannelStateCountsLocked() { + RoundRobin* p = static_cast(policy()); + if (num_ready_ > 0) { + if (p->subchannel_list_.get() != this) { + // Promote this list to p->subchannel_list_. + // This list must be p->latest_pending_subchannel_list_, because + // any previous update would have been shut down already and + // therefore we would not be receiving a notification for them. + GPR_ASSERT(p->latest_pending_subchannel_list_.get() == this); + GPR_ASSERT(!shutting_down()); + if (grpc_lb_round_robin_trace.enabled()) { + const size_t old_num_subchannels = + p->subchannel_list_ != nullptr + ? p->subchannel_list_->num_subchannels() + : 0; + gpr_log(GPR_INFO, + "[RR %p] phasing out subchannel list %p (size %" PRIuPTR + ") in favor of %p (size %" PRIuPTR ")", + p, p->subchannel_list_.get(), old_num_subchannels, this, + num_subchannels()); + } + p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); + } + // Drain pending picks. + p->DrainPendingPicksLocked(); + } + // Update the RR policy's connectivity state if needed. + MaybeUpdateRoundRobinConnectivityStateLocked(); +} + +void RoundRobin::RoundRobinSubchannelData::UpdateConnectivityStateLocked( + grpc_connectivity_state connectivity_state, grpc_error* error) { + RoundRobin* p = static_cast(subchannel_list()->policy()); if (grpc_lb_round_robin_trace.enabled()) { gpr_log( GPR_INFO, - "[RR %p] connectivity changed for subchannel %p, subchannel_list %p: " - "prev_state=%s new_state=%s p->shutdown=%d " - "sd->subchannel_list->shutting_down=%d error=%s", - p, sd->subchannel, sd->subchannel_list, - grpc_connectivity_state_name(sd->prev_connectivity_state), - grpc_connectivity_state_name(sd->pending_connectivity_state_unsafe), - p->shutdown_, sd->subchannel_list->shutting_down, - grpc_error_string(error)); - } - GPR_ASSERT(sd->subchannel != nullptr); - // If the policy is shutting down, unref and return. - if (p->shutdown_) { - grpc_lb_subchannel_data_stop_connectivity_watch(sd); - grpc_lb_subchannel_data_unref_subchannel(sd, "rr_shutdown"); - p->SubchannelListUnrefForConnectivityWatch(sd->subchannel_list, - "rr_shutdown"); - return; + "[RR %p] connectivity changed for subchannel %p, subchannel_list %p " + "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s", + p, subchannel(), subchannel_list(), Index(), + subchannel_list()->num_subchannels(), + grpc_connectivity_state_name(last_connectivity_state_), + grpc_connectivity_state_name(connectivity_state)); + } + subchannel_list()->UpdateStateCountersLocked(last_connectivity_state_, + connectivity_state, error); + last_connectivity_state_ = connectivity_state; +} + +void RoundRobin::RoundRobinSubchannelData::ProcessConnectivityChangeLocked( + grpc_connectivity_state connectivity_state, grpc_error* error) { + RoundRobin* p = static_cast(subchannel_list()->policy()); + GPR_ASSERT(subchannel() != nullptr); + // If the new state is TRANSIENT_FAILURE, re-resolve. + // Only do this if we've started watching, not at startup time. + // Otherwise, if the subchannel was already in state TRANSIENT_FAILURE + // when the subchannel list was created, we'd wind up in a constant + // loop of re-resolution. + if (connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { + if (grpc_lb_round_robin_trace.enabled()) { + gpr_log(GPR_INFO, + "[RR %p] Subchannel %p has gone into TRANSIENT_FAILURE. " + "Requesting re-resolution", + p, subchannel()); + } + p->TryReresolutionLocked(&grpc_lb_round_robin_trace, GRPC_ERROR_NONE); } - // If the subchannel list is shutting down, stop watching. - if (sd->subchannel_list->shutting_down || error == GRPC_ERROR_CANCELLED) { - grpc_lb_subchannel_data_stop_connectivity_watch(sd); - grpc_lb_subchannel_data_unref_subchannel(sd, "rr_sl_shutdown"); - p->SubchannelListUnrefForConnectivityWatch(sd->subchannel_list, - "rr_sl_shutdown"); - return; + // Update state counters. + UpdateConnectivityStateLocked(connectivity_state, error); + // Update overall state and renew notification. + subchannel_list()->UpdateRoundRobinStateFromSubchannelStateCountsLocked(); + RenewConnectivityWatchLocked(); +} + +/** Returns the index into p->subchannel_list->subchannels of the next + * subchannel in READY state, or p->subchannel_list->num_subchannels if no + * subchannel is READY. + * + * Note that this function does *not* update p->last_ready_subchannel_index. + * The caller must do that if it returns a pick. */ +size_t +RoundRobin::RoundRobinSubchannelList::GetNextReadySubchannelIndexLocked() { + if (grpc_lb_round_robin_trace.enabled()) { + gpr_log(GPR_INFO, + "[RR %p] getting next ready subchannel (out of %" PRIuPTR + "), last_ready_index=%" PRIuPTR, + policy(), num_subchannels(), last_ready_index_); } - // If we're still here, the notification must be for a subchannel in - // either the current or latest pending subchannel lists. - GPR_ASSERT(sd->subchannel_list == p->subchannel_list_ || - sd->subchannel_list == p->latest_pending_subchannel_list_); - GPR_ASSERT(sd->pending_connectivity_state_unsafe != GRPC_CHANNEL_SHUTDOWN); - // Now that we're inside the combiner, copy the pending connectivity - // state (which was set by the connectivity state watcher) to - // curr_connectivity_state, which is what we use inside of the combiner. - sd->curr_connectivity_state = sd->pending_connectivity_state_unsafe; - // If the sd's new state is TRANSIENT_FAILURE, unref the *connected* - // subchannel, if any. - switch (sd->curr_connectivity_state) { - case GRPC_CHANNEL_TRANSIENT_FAILURE: { - sd->connected_subchannel.reset(); + for (size_t i = 0; i < num_subchannels(); ++i) { + const size_t index = (i + last_ready_index_ + 1) % num_subchannels(); + if (grpc_lb_round_robin_trace.enabled()) { + gpr_log( + GPR_INFO, + "[RR %p] checking subchannel %p, subchannel_list %p, index %" PRIuPTR + ": state=%s", + policy(), subchannel(index)->subchannel(), this, index, + grpc_connectivity_state_name( + subchannel(index)->connectivity_state())); + } + if (subchannel(index)->connectivity_state() == GRPC_CHANNEL_READY) { if (grpc_lb_round_robin_trace.enabled()) { gpr_log(GPR_INFO, - "[RR %p] Subchannel %p has gone into TRANSIENT_FAILURE. " - "Requesting re-resolution", - p, sd->subchannel); - } - p->TryReresolutionLocked(&grpc_lb_round_robin_trace, GRPC_ERROR_NONE); - break; - } - case GRPC_CHANNEL_READY: { - if (sd->connected_subchannel == nullptr) { - sd->connected_subchannel = - grpc_subchannel_get_connected_subchannel(sd->subchannel); - } - if (sd->subchannel_list != p->subchannel_list_) { - // promote sd->subchannel_list to p->subchannel_list_. - // sd->subchannel_list must be equal to - // p->latest_pending_subchannel_list_ because we have already filtered - // for sds belonging to outdated subchannel lists. - GPR_ASSERT(sd->subchannel_list == p->latest_pending_subchannel_list_); - GPR_ASSERT(!sd->subchannel_list->shutting_down); - if (grpc_lb_round_robin_trace.enabled()) { - const size_t num_subchannels = - p->subchannel_list_ != nullptr - ? p->subchannel_list_->num_subchannels - : 0; - gpr_log(GPR_INFO, - "[RR %p] phasing out subchannel list %p (size %" PRIuPTR - ") in favor of %p (size %" PRIuPTR ")", - p, p->subchannel_list_, num_subchannels, sd->subchannel_list, - num_subchannels); - } - if (p->subchannel_list_ != nullptr) { - // dispose of the current subchannel_list - grpc_lb_subchannel_list_shutdown_and_unref(p->subchannel_list_, - "sl_phase_out_shutdown"); - } - p->subchannel_list_ = p->latest_pending_subchannel_list_; - p->latest_pending_subchannel_list_ = nullptr; - } - /* at this point we know there's at least one suitable subchannel. Go - * ahead and pick one and notify the pending suitors in - * p->pending_picks. This preemptively replicates rr_pick()'s actions. */ - const size_t next_ready_index = p->GetNextReadySubchannelIndexLocked(); - GPR_ASSERT(next_ready_index < p->subchannel_list_->num_subchannels); - grpc_lb_subchannel_data* selected = - &p->subchannel_list_->subchannels[next_ready_index]; - if (p->pending_picks_ != nullptr) { - // if the selected subchannel is going to be used for the pending - // picks, update the last picked pointer - p->UpdateLastReadySubchannelIndexLocked(next_ready_index); - } - PickState* pick; - while ((pick = p->pending_picks_)) { - p->pending_picks_ = pick->next; - pick->connected_subchannel = selected->connected_subchannel; - if (pick->user_data != nullptr) { - *pick->user_data = selected->user_data; - } - if (grpc_lb_round_robin_trace.enabled()) { - gpr_log(GPR_INFO, - "[RR %p] Fulfilling pending pick. Target <-- subchannel %p " - "(subchannel_list %p, index %" PRIuPTR ")", - p, selected->subchannel, p->subchannel_list_, - next_ready_index); - } - GRPC_CLOSURE_SCHED(pick->on_complete, GRPC_ERROR_NONE); + "[RR %p] found next ready subchannel (%p) at index %" PRIuPTR + " of subchannel_list %p", + policy(), subchannel(index)->subchannel(), index, this); } - break; + return index; } - case GRPC_CHANNEL_SHUTDOWN: - GPR_UNREACHABLE_CODE(return ); - case GRPC_CHANNEL_CONNECTING: - case GRPC_CHANNEL_IDLE:; // fallthrough } - // Update state counters. - UpdateStateCountersLocked(sd); - // Only update connectivity based on the selected subchannel list. - if (sd->subchannel_list == p->subchannel_list_) { - p->UpdateConnectivityStatusLocked(sd, GRPC_ERROR_REF(error)); + if (grpc_lb_round_robin_trace.enabled()) { + gpr_log(GPR_INFO, "[RR %p] no subchannels in ready state", this); + } + return num_subchannels(); +} + +// Sets last_ready_index_ to last_ready_index. +void RoundRobin::RoundRobinSubchannelList::UpdateLastReadySubchannelIndexLocked( + size_t last_ready_index) { + GPR_ASSERT(last_ready_index < num_subchannels()); + last_ready_index_ = last_ready_index; + if (grpc_lb_round_robin_trace.enabled()) { + gpr_log(GPR_INFO, + "[RR %p] setting last_ready_subchannel_index=%" PRIuPTR + " (SC %p, CSC %p)", + policy(), last_ready_index, + subchannel(last_ready_index)->subchannel(), + subchannel(last_ready_index)->connected_subchannel()); } - // Renew notification. - grpc_lb_subchannel_data_start_connectivity_watch(sd); } grpc_connectivity_state RoundRobin::CheckConnectivityLocked( @@ -554,11 +592,12 @@ void RoundRobin::NotifyOnStateChangeLocked(grpc_connectivity_state* current, void RoundRobin::PingOneLocked(grpc_closure* on_initiate, grpc_closure* on_ack) { - const size_t next_ready_index = GetNextReadySubchannelIndexLocked(); - if (next_ready_index < subchannel_list_->num_subchannels) { - grpc_lb_subchannel_data* selected = - &subchannel_list_->subchannels[next_ready_index]; - selected->connected_subchannel->Ping(on_initiate, on_ack); + const size_t next_ready_index = + subchannel_list_->GetNextReadySubchannelIndexLocked(); + if (next_ready_index < subchannel_list_->num_subchannels()) { + RoundRobinSubchannelData* selected = + subchannel_list_->subchannel(next_ready_index); + selected->connected_subchannel()->Ping(on_initiate, on_ack); } else { GRPC_CLOSURE_SCHED(on_initiate, GRPC_ERROR_CREATE_FROM_STATIC_STRING( "Round Robin not connected")); @@ -581,80 +620,37 @@ void RoundRobin::UpdateLocked(const grpc_channel_args& args) { } return; } - grpc_lb_addresses* addresses = (grpc_lb_addresses*)arg->value.pointer.p; + grpc_lb_addresses* addresses = + static_cast(arg->value.pointer.p); if (grpc_lb_round_robin_trace.enabled()) { gpr_log(GPR_INFO, "[RR %p] received update with %" PRIuPTR " addresses", this, addresses->num_addresses); } - grpc_lb_subchannel_list* subchannel_list = grpc_lb_subchannel_list_create( - this, &grpc_lb_round_robin_trace, addresses, combiner(), - client_channel_factory(), args, &RoundRobin::OnConnectivityChangedLocked); - if (subchannel_list->num_subchannels == 0) { - grpc_connectivity_state_set( - &state_tracker_, GRPC_CHANNEL_TRANSIENT_FAILURE, - GRPC_ERROR_CREATE_FROM_STATIC_STRING("Empty update"), - "rr_update_empty"); - if (subchannel_list_ != nullptr) { - grpc_lb_subchannel_list_shutdown_and_unref(subchannel_list_, - "sl_shutdown_empty_update"); + // Replace latest_pending_subchannel_list_. + if (latest_pending_subchannel_list_ != nullptr) { + if (grpc_lb_round_robin_trace.enabled()) { + gpr_log(GPR_INFO, + "[RR %p] Shutting down previous pending subchannel list %p", this, + latest_pending_subchannel_list_.get()); } - subchannel_list_ = subchannel_list; // empty list - return; } - if (started_picking_) { - for (size_t i = 0; i < subchannel_list->num_subchannels; ++i) { - const grpc_connectivity_state subchannel_state = - grpc_subchannel_check_connectivity( - subchannel_list->subchannels[i].subchannel, nullptr); - // Override the default setting of IDLE for connectivity notification - // purposes if the subchannel is already in transient failure. Otherwise - // we'd be immediately notified of the IDLE-TRANSIENT_FAILURE - // discrepancy, attempt to re-resolve and end up here again. - // TODO(roth): As part of C++-ifying the subchannel_list API, design a - // better API for notifying the LB policy of subchannel states, which can - // be used both for the subchannel's initial state and for subsequent - // state changes. This will allow us to handle this more generally instead - // of special-casing TRANSIENT_FAILURE (e.g., we can also distribute any - // pending picks across all READY subchannels rather than sending them all - // to the first one). - if (subchannel_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - subchannel_list->subchannels[i].pending_connectivity_state_unsafe = - subchannel_list->subchannels[i].curr_connectivity_state = - subchannel_list->subchannels[i].prev_connectivity_state = - subchannel_state; - --subchannel_list->num_idle; - ++subchannel_list->num_transient_failures; - } - } - if (latest_pending_subchannel_list_ != nullptr) { - if (grpc_lb_round_robin_trace.enabled()) { - gpr_log(GPR_INFO, - "[RR %p] Shutting down latest pending subchannel list %p, " - "about to be replaced by newer latest %p", - this, latest_pending_subchannel_list_, subchannel_list); - } - grpc_lb_subchannel_list_shutdown_and_unref( - latest_pending_subchannel_list_, "sl_outdated"); - } - latest_pending_subchannel_list_ = subchannel_list; - for (size_t i = 0; i < subchannel_list->num_subchannels; ++i) { - /* Watch every new subchannel. A subchannel list becomes active the - * moment one of its subchannels is READY. At that moment, we swap - * p->subchannel_list for sd->subchannel_list, provided the subchannel - * list is still valid (ie, isn't shutting down) */ - SubchannelListRefForConnectivityWatch(subchannel_list, - "connectivity_watch"); - grpc_lb_subchannel_data_start_connectivity_watch( - &subchannel_list->subchannels[i]); + latest_pending_subchannel_list_ = MakeOrphanable( + this, &grpc_lb_round_robin_trace, addresses, combiner(), + client_channel_factory(), args); + // If we haven't started picking yet or the new list is empty, + // immediately promote the new list to the current list. + if (!started_picking_ || + latest_pending_subchannel_list_->num_subchannels() == 0) { + if (latest_pending_subchannel_list_->num_subchannels() == 0) { + grpc_connectivity_state_set( + &state_tracker_, GRPC_CHANNEL_TRANSIENT_FAILURE, + GRPC_ERROR_CREATE_FROM_STATIC_STRING("Empty update"), + "rr_update_empty"); } + subchannel_list_ = std::move(latest_pending_subchannel_list_); } else { - // The policy isn't picking yet. Save the update for later, disposing of - // previous version if any. - if (subchannel_list_ != nullptr) { - grpc_lb_subchannel_list_shutdown_and_unref( - subchannel_list_, "rr_update_before_started_picking"); - } - subchannel_list_ = subchannel_list; + // If we've started picking, start watching the new list. + latest_pending_subchannel_list_->StartWatchingLocked(); } } diff --git a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc deleted file mode 100644 index 257db57575f..00000000000 --- a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc +++ /dev/null @@ -1,253 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include - -#include - -#include - -#include "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h" -#include "src/core/lib/channel/channel_args.h" -#include "src/core/lib/debug/trace.h" -#include "src/core/lib/iomgr/closure.h" -#include "src/core/lib/iomgr/combiner.h" -#include "src/core/lib/iomgr/sockaddr_utils.h" -#include "src/core/lib/transport/connectivity_state.h" - -void grpc_lb_subchannel_data_unref_subchannel(grpc_lb_subchannel_data* sd, - const char* reason) { - if (sd->subchannel != nullptr) { - if (sd->subchannel_list->tracer->enabled()) { - gpr_log(GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): unreffing subchannel", - sd->subchannel_list->tracer->name(), sd->subchannel_list->policy, - sd->subchannel_list, - static_cast(sd - sd->subchannel_list->subchannels), - sd->subchannel_list->num_subchannels, sd->subchannel); - } - GRPC_SUBCHANNEL_UNREF(sd->subchannel, reason); - sd->subchannel = nullptr; - sd->connected_subchannel.reset(); - if (sd->user_data != nullptr) { - GPR_ASSERT(sd->user_data_vtable != nullptr); - sd->user_data_vtable->destroy(sd->user_data); - sd->user_data = nullptr; - } - } -} - -void grpc_lb_subchannel_data_start_connectivity_watch( - grpc_lb_subchannel_data* sd) { - if (sd->subchannel_list->tracer->enabled()) { - gpr_log( - GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): requesting connectivity change " - "notification (from %s)", - sd->subchannel_list->tracer->name(), sd->subchannel_list->policy, - sd->subchannel_list, - static_cast(sd - sd->subchannel_list->subchannels), - sd->subchannel_list->num_subchannels, sd->subchannel, - grpc_connectivity_state_name(sd->pending_connectivity_state_unsafe)); - } - sd->connectivity_notification_pending = true; - grpc_subchannel_notify_on_state_change( - sd->subchannel, sd->subchannel_list->policy->interested_parties(), - &sd->pending_connectivity_state_unsafe, - &sd->connectivity_changed_closure); -} - -void grpc_lb_subchannel_data_stop_connectivity_watch( - grpc_lb_subchannel_data* sd) { - if (sd->subchannel_list->tracer->enabled()) { - gpr_log(GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): stopping connectivity watch", - sd->subchannel_list->tracer->name(), sd->subchannel_list->policy, - sd->subchannel_list, - static_cast(sd - sd->subchannel_list->subchannels), - sd->subchannel_list->num_subchannels, sd->subchannel); - } - GPR_ASSERT(sd->connectivity_notification_pending); - sd->connectivity_notification_pending = false; -} - -grpc_lb_subchannel_list* grpc_lb_subchannel_list_create( - grpc_core::LoadBalancingPolicy* p, grpc_core::TraceFlag* tracer, - const grpc_lb_addresses* addresses, grpc_combiner* combiner, - grpc_client_channel_factory* client_channel_factory, - const grpc_channel_args& args, grpc_iomgr_cb_func connectivity_changed_cb) { - grpc_lb_subchannel_list* subchannel_list = - static_cast( - gpr_zalloc(sizeof(*subchannel_list))); - if (tracer->enabled()) { - gpr_log(GPR_INFO, - "[%s %p] Creating subchannel list %p for %" PRIuPTR " subchannels", - tracer->name(), p, subchannel_list, addresses->num_addresses); - } - subchannel_list->policy = p; - subchannel_list->tracer = tracer; - gpr_ref_init(&subchannel_list->refcount, 1); - subchannel_list->subchannels = static_cast( - gpr_zalloc(sizeof(grpc_lb_subchannel_data) * addresses->num_addresses)); - // We need to remove the LB addresses in order to be able to compare the - // subchannel keys of subchannels from a different batch of addresses. - static const char* keys_to_remove[] = {GRPC_ARG_SUBCHANNEL_ADDRESS, - GRPC_ARG_LB_ADDRESSES}; - // Create a subchannel for each address. - grpc_subchannel_args sc_args; - size_t subchannel_index = 0; - for (size_t i = 0; i < addresses->num_addresses; i++) { - // If there were any balancer, we would have chosen grpclb policy instead. - GPR_ASSERT(!addresses->addresses[i].is_balancer); - memset(&sc_args, 0, sizeof(grpc_subchannel_args)); - grpc_arg addr_arg = - grpc_create_subchannel_address_arg(&addresses->addresses[i].address); - grpc_channel_args* new_args = grpc_channel_args_copy_and_add_and_remove( - &args, keys_to_remove, GPR_ARRAY_SIZE(keys_to_remove), &addr_arg, 1); - gpr_free(addr_arg.value.string); - sc_args.args = new_args; - grpc_subchannel* subchannel = grpc_client_channel_factory_create_subchannel( - client_channel_factory, &sc_args); - grpc_channel_args_destroy(new_args); - if (subchannel == nullptr) { - // Subchannel could not be created. - if (tracer->enabled()) { - char* address_uri = - grpc_sockaddr_to_uri(&addresses->addresses[i].address); - gpr_log(GPR_INFO, - "[%s %p] could not create subchannel for address uri %s, " - "ignoring", - tracer->name(), subchannel_list->policy, address_uri); - gpr_free(address_uri); - } - continue; - } - if (tracer->enabled()) { - char* address_uri = - grpc_sockaddr_to_uri(&addresses->addresses[i].address); - gpr_log(GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR - ": Created subchannel %p for address uri %s", - tracer->name(), p, subchannel_list, subchannel_index, subchannel, - address_uri); - gpr_free(address_uri); - } - grpc_lb_subchannel_data* sd = - &subchannel_list->subchannels[subchannel_index++]; - sd->subchannel_list = subchannel_list; - sd->subchannel = subchannel; - GRPC_CLOSURE_INIT(&sd->connectivity_changed_closure, - connectivity_changed_cb, sd, - grpc_combiner_scheduler(combiner)); - // We assume that the current state is IDLE. If not, we'll get a - // callback telling us that. - sd->prev_connectivity_state = GRPC_CHANNEL_IDLE; - sd->curr_connectivity_state = GRPC_CHANNEL_IDLE; - sd->pending_connectivity_state_unsafe = GRPC_CHANNEL_IDLE; - sd->user_data_vtable = addresses->user_data_vtable; - if (sd->user_data_vtable != nullptr) { - sd->user_data = - sd->user_data_vtable->copy(addresses->addresses[i].user_data); - } - } - subchannel_list->num_subchannels = subchannel_index; - subchannel_list->num_idle = subchannel_index; - return subchannel_list; -} - -static void subchannel_list_destroy(grpc_lb_subchannel_list* subchannel_list) { - if (subchannel_list->tracer->enabled()) { - gpr_log(GPR_INFO, "[%s %p] Destroying subchannel_list %p", - subchannel_list->tracer->name(), subchannel_list->policy, - subchannel_list); - } - for (size_t i = 0; i < subchannel_list->num_subchannels; i++) { - grpc_lb_subchannel_data* sd = &subchannel_list->subchannels[i]; - grpc_lb_subchannel_data_unref_subchannel(sd, "subchannel_list_destroy"); - } - gpr_free(subchannel_list->subchannels); - gpr_free(subchannel_list); -} - -void grpc_lb_subchannel_list_ref(grpc_lb_subchannel_list* subchannel_list, - const char* reason) { - gpr_ref_non_zero(&subchannel_list->refcount); - if (subchannel_list->tracer->enabled()) { - const gpr_atm count = gpr_atm_acq_load(&subchannel_list->refcount.count); - gpr_log(GPR_INFO, "[%s %p] subchannel_list %p REF %lu->%lu (%s)", - subchannel_list->tracer->name(), subchannel_list->policy, - subchannel_list, static_cast(count - 1), - static_cast(count), reason); - } -} - -void grpc_lb_subchannel_list_unref(grpc_lb_subchannel_list* subchannel_list, - const char* reason) { - const bool done = gpr_unref(&subchannel_list->refcount); - if (subchannel_list->tracer->enabled()) { - const gpr_atm count = gpr_atm_acq_load(&subchannel_list->refcount.count); - gpr_log(GPR_INFO, "[%s %p] subchannel_list %p UNREF %lu->%lu (%s)", - subchannel_list->tracer->name(), subchannel_list->policy, - subchannel_list, static_cast(count + 1), - static_cast(count), reason); - } - if (done) { - subchannel_list_destroy(subchannel_list); - } -} - -static void subchannel_data_cancel_connectivity_watch( - grpc_lb_subchannel_data* sd, const char* reason) { - if (sd->subchannel_list->tracer->enabled()) { - gpr_log(GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): canceling connectivity watch (%s)", - sd->subchannel_list->tracer->name(), sd->subchannel_list->policy, - sd->subchannel_list, - static_cast(sd - sd->subchannel_list->subchannels), - sd->subchannel_list->num_subchannels, sd->subchannel, reason); - } - grpc_subchannel_notify_on_state_change(sd->subchannel, nullptr, nullptr, - &sd->connectivity_changed_closure); -} - -void grpc_lb_subchannel_list_shutdown_and_unref( - grpc_lb_subchannel_list* subchannel_list, const char* reason) { - if (subchannel_list->tracer->enabled()) { - gpr_log(GPR_INFO, "[%s %p] Shutting down subchannel_list %p (%s)", - subchannel_list->tracer->name(), subchannel_list->policy, - subchannel_list, reason); - } - GPR_ASSERT(!subchannel_list->shutting_down); - subchannel_list->shutting_down = true; - for (size_t i = 0; i < subchannel_list->num_subchannels; i++) { - grpc_lb_subchannel_data* sd = &subchannel_list->subchannels[i]; - // If there's a pending notification for this subchannel, cancel it; - // the callback is responsible for unreffing the subchannel. - // Otherwise, unref the subchannel directly. - if (sd->connectivity_notification_pending) { - subchannel_data_cancel_connectivity_watch(sd, reason); - } else if (sd->subchannel != nullptr) { - grpc_lb_subchannel_data_unref_subchannel(sd, reason); - } - } - grpc_lb_subchannel_list_unref(subchannel_list, reason); -} diff --git a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h index 6889d596ac9..7e2046bcdc0 100644 --- a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h @@ -21,116 +21,516 @@ #include +#include + +#include + #include "src/core/ext/filters/client_channel/lb_policy_registry.h" #include "src/core/ext/filters/client_channel/subchannel.h" +#include "src/core/lib/channel/channel_args.h" #include "src/core/lib/debug/trace.h" +#include "src/core/lib/gprpp/abstract.h" +#include "src/core/lib/gprpp/inlined_vector.h" +#include "src/core/lib/gprpp/orphanable.h" +#include "src/core/lib/gprpp/ref_counted.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/iomgr/closure.h" +#include "src/core/lib/iomgr/combiner.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" #include "src/core/lib/transport/connectivity_state.h" -// TODO(roth): This code is intended to be shared between pick_first and -// round_robin. However, the interface needs more work to provide clean -// encapsulation. For example, the structs here have some fields that are -// only used in one of the two (e.g., the state counters in -// grpc_lb_subchannel_list and the prev_connectivity_state field in -// grpc_lb_subchannel_data are only used in round_robin, and the -// checking_subchannel field in grpc_lb_subchannel_list is only used by -// pick_first). Also, there is probably some code duplication between the -// connectivity state notification callback code in both pick_first and -// round_robin that could be refactored and moved here. In a future PR, -// need to clean this up. - -typedef struct grpc_lb_subchannel_list grpc_lb_subchannel_list; - -typedef struct { - /** backpointer to owning subchannel list */ - grpc_lb_subchannel_list* subchannel_list; - /** subchannel itself */ - grpc_subchannel* subchannel; - grpc_core::RefCountedPtr connected_subchannel; - /** Is a connectivity notification pending? */ - bool connectivity_notification_pending; - /** notification that connectivity has changed on subchannel */ - grpc_closure connectivity_changed_closure; - /** previous and current connectivity states. Updated by \a - * \a connectivity_changed_closure based on - * \a pending_connectivity_state_unsafe. */ - grpc_connectivity_state prev_connectivity_state; - grpc_connectivity_state curr_connectivity_state; - /** connectivity state to be updated by - * grpc_subchannel_notify_on_state_change(), not guarded by - * the combiner. To be copied to \a curr_connectivity_state by - * \a connectivity_changed_closure. */ - grpc_connectivity_state pending_connectivity_state_unsafe; - /** the subchannel's target user data */ - void* user_data; - /** vtable to operate over \a user_data */ - const grpc_lb_user_data_vtable* user_data_vtable; -} grpc_lb_subchannel_data; - -/// Unrefs the subchannel contained in sd. -void grpc_lb_subchannel_data_unref_subchannel(grpc_lb_subchannel_data* sd, - const char* reason); - -/// Starts watching the connectivity state of the subchannel. -/// The connectivity_changed_cb callback must invoke either -/// grpc_lb_subchannel_data_stop_connectivity_watch() or again call -/// grpc_lb_subchannel_data_start_connectivity_watch(). -void grpc_lb_subchannel_data_start_connectivity_watch( - grpc_lb_subchannel_data* sd); - -/// Stops watching the connectivity state of the subchannel. -void grpc_lb_subchannel_data_stop_connectivity_watch( - grpc_lb_subchannel_data* sd); - -struct grpc_lb_subchannel_list { - /** backpointer to owning policy */ - grpc_core::LoadBalancingPolicy* policy; - - grpc_core::TraceFlag* tracer; - - /** all our subchannels */ - size_t num_subchannels; - grpc_lb_subchannel_data* subchannels; - - /** Index into subchannels of the one we're currently checking. - * Used when connecting to subchannels serially instead of in parallel. */ - // TODO(roth): When we have time, we can probably make this go away - // and compute the index dynamically by subtracting - // subchannel_list->subchannels from the subchannel_data pointer. - size_t checking_subchannel; - - /** how many subchannels are in state READY */ - size_t num_ready; - /** how many subchannels are in state TRANSIENT_FAILURE */ - size_t num_transient_failures; - /** how many subchannels are in state IDLE */ - size_t num_idle; - - /** There will be one ref for each entry in subchannels for which there is a - * pending connectivity state watcher callback. */ - gpr_refcount refcount; - - /** Is this list shutting down? This may be true due to the shutdown of the - * policy itself or because a newer update has arrived while this one hadn't - * finished processing. */ - bool shutting_down; +// Code for maintaining a list of subchannels within an LB policy. +// +// To use this, callers must create their own subclasses, like so: +/* + +class MySubchannelList; // Forward declaration. + +class MySubchannelData + : public SubchannelData { + public: + void ProcessConnectivityChangeLocked( + grpc_connectivity_state connectivity_state, grpc_error* error) override { + // ...code to handle connectivity changes... + } +}; + +class MySubchannelList + : public SubchannelList { }; -grpc_lb_subchannel_list* grpc_lb_subchannel_list_create( - grpc_core::LoadBalancingPolicy* p, grpc_core::TraceFlag* tracer, +*/ +// All methods with a Locked() suffix must be called from within the +// client_channel combiner. + +namespace grpc_core { + +// Stores data for a particular subchannel in a subchannel list. +// Callers must create a subclass that implements the +// ProcessConnectivityChangeLocked() method. +template +class SubchannelData { + public: + // Returns a pointer to the subchannel list containing this object. + SubchannelListType* subchannel_list() const { return subchannel_list_; } + + // Returns the index into the subchannel list of this object. + size_t Index() const { + return static_cast(static_cast(this) - + subchannel_list_->subchannel(0)); + } + + // Returns a pointer to the subchannel. + grpc_subchannel* subchannel() const { return subchannel_; } + + // Returns the connected subchannel. Will be null if the subchannel + // is not connected. + ConnectedSubchannel* connected_subchannel() const { + return connected_subchannel_.get(); + } + + // Synchronously checks the subchannel's connectivity state. + // Must not be called while there is a connectivity notification + // pending (i.e., between calling StartConnectivityWatchLocked() or + // RenewConnectivityWatchLocked() and the resulting invocation of + // ProcessConnectivityChangeLocked()). + grpc_connectivity_state CheckConnectivityStateLocked(grpc_error** error) { + GPR_ASSERT(!connectivity_notification_pending_); + pending_connectivity_state_unsafe_ = + grpc_subchannel_check_connectivity(subchannel(), error); + UpdateConnectedSubchannelLocked(); + return pending_connectivity_state_unsafe_; + } + + // Unrefs the subchannel. May be used if an individual subchannel is + // no longer needed even though the subchannel list as a whole is not + // being unreffed. + virtual void UnrefSubchannelLocked(const char* reason); + + // Starts watching the connectivity state of the subchannel. + // ProcessConnectivityChangeLocked() will be called when the + // connectivity state changes. + void StartConnectivityWatchLocked(); + + // Renews watching the connectivity state of the subchannel. + void RenewConnectivityWatchLocked(); + + // Stops watching the connectivity state of the subchannel. + void StopConnectivityWatchLocked(); + + // Cancels watching the connectivity state of the subchannel. + // Must be called only while there is a connectivity notification + // pending (i.e., between calling StartConnectivityWatchLocked() or + // RenewConnectivityWatchLocked() and the resulting invocation of + // ProcessConnectivityChangeLocked()). + // From within ProcessConnectivityChangeLocked(), use + // StopConnectivityWatchLocked() instead. + void CancelConnectivityWatchLocked(const char* reason); + + // Cancels any pending connectivity watch and unrefs the subchannel. + void ShutdownLocked(); + + GRPC_ABSTRACT_BASE_CLASS + + protected: + SubchannelData(SubchannelListType* subchannel_list, + const grpc_lb_user_data_vtable* user_data_vtable, + const grpc_lb_address& address, grpc_subchannel* subchannel, + grpc_combiner* combiner); + + virtual ~SubchannelData(); + + // After StartConnectivityWatchLocked() or RenewConnectivityWatchLocked() + // is called, this method will be invoked when the subchannel's connectivity + // state changes. + // Implementations must invoke either RenewConnectivityWatchLocked() or + // StopConnectivityWatchLocked() before returning. + virtual void ProcessConnectivityChangeLocked( + grpc_connectivity_state connectivity_state, + grpc_error* error) GRPC_ABSTRACT; + + private: + // Updates connected_subchannel_ based on pending_connectivity_state_unsafe_. + // Returns true if the connectivity state should be reported. + bool UpdateConnectedSubchannelLocked(); + + static void OnConnectivityChangedLocked(void* arg, grpc_error* error); + + // Backpointer to owning subchannel list. Not owned. + SubchannelListType* subchannel_list_; + + // The subchannel and connected subchannel. + grpc_subchannel* subchannel_; + RefCountedPtr connected_subchannel_; + + // Notification that connectivity has changed on subchannel. + grpc_closure connectivity_changed_closure_; + // Is a connectivity notification pending? + bool connectivity_notification_pending_ = false; + // Connectivity state to be updated by + // grpc_subchannel_notify_on_state_change(), not guarded by + // the combiner. + grpc_connectivity_state pending_connectivity_state_unsafe_; +}; + +// A list of subchannels. +template +class SubchannelList + : public InternallyRefCountedWithTracing { + public: + typedef InlinedVector SubchannelVector; + + // The number of subchannels in the list. + size_t num_subchannels() const { return subchannels_.size(); } + + // The data for the subchannel at a particular index. + SubchannelDataType* subchannel(size_t index) { return &subchannels_[index]; } + + // Returns true if the subchannel list is shutting down. + bool shutting_down() const { return shutting_down_; } + + // Accessors. + LoadBalancingPolicy* policy() const { return policy_; } + TraceFlag* tracer() const { return tracer_; } + + // Note: Caller must ensure that this is invoked inside of the combiner. + void Orphan() override { + ShutdownLocked(); + InternallyRefCountedWithTracing::Unref(DEBUG_LOCATION, + "shutdown"); + } + + GRPC_ABSTRACT_BASE_CLASS + + protected: + SubchannelList(LoadBalancingPolicy* policy, TraceFlag* tracer, + const grpc_lb_addresses* addresses, grpc_combiner* combiner, + grpc_client_channel_factory* client_channel_factory, + const grpc_channel_args& args); + + virtual ~SubchannelList(); + + private: + // So New() can call our private ctor. + template + friend T* New(Args&&... args); + + // For accessing Ref() and Unref(). + friend class SubchannelData; + + void ShutdownLocked(); + + // Backpointer to owning policy. + LoadBalancingPolicy* policy_; + + TraceFlag* tracer_; + + grpc_combiner* combiner_; + + // The list of subchannels. + SubchannelVector subchannels_; + + // Is this list shutting down? This may be true due to the shutdown of the + // policy itself or because a newer update has arrived while this one hadn't + // finished processing. + bool shutting_down_ = false; +}; + +// +// implementation -- no user-servicable parts below +// + +// +// SubchannelData +// + +template +SubchannelData::SubchannelData( + SubchannelListType* subchannel_list, + const grpc_lb_user_data_vtable* user_data_vtable, + const grpc_lb_address& address, grpc_subchannel* subchannel, + grpc_combiner* combiner) + : subchannel_list_(subchannel_list), + subchannel_(subchannel), + // We assume that the current state is IDLE. If not, we'll get a + // callback telling us that. + pending_connectivity_state_unsafe_(GRPC_CHANNEL_IDLE) { + GRPC_CLOSURE_INIT( + &connectivity_changed_closure_, + (&SubchannelData::OnConnectivityChangedLocked), + this, grpc_combiner_scheduler(combiner)); +} + +template +SubchannelData::~SubchannelData() { + UnrefSubchannelLocked("subchannel_data_destroy"); +} + +template +void SubchannelData:: + UnrefSubchannelLocked(const char* reason) { + if (subchannel_ != nullptr) { + if (subchannel_list_->tracer()->enabled()) { + gpr_log(GPR_INFO, + "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR + " (subchannel %p): unreffing subchannel", + subchannel_list_->tracer()->name(), subchannel_list_->policy(), + subchannel_list_, Index(), subchannel_list_->num_subchannels(), + subchannel_); + } + GRPC_SUBCHANNEL_UNREF(subchannel_, reason); + subchannel_ = nullptr; + connected_subchannel_.reset(); + } +} + +template +void SubchannelData::StartConnectivityWatchLocked() { + if (subchannel_list_->tracer()->enabled()) { + gpr_log(GPR_INFO, + "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR + " (subchannel %p): starting watch: requesting connectivity change " + "notification (from %s)", + subchannel_list_->tracer()->name(), subchannel_list_->policy(), + subchannel_list_, Index(), subchannel_list_->num_subchannels(), + subchannel_, + grpc_connectivity_state_name(pending_connectivity_state_unsafe_)); + } + GPR_ASSERT(!connectivity_notification_pending_); + connectivity_notification_pending_ = true; + subchannel_list()->Ref(DEBUG_LOCATION, "connectivity_watch").release(); + grpc_subchannel_notify_on_state_change( + subchannel_, subchannel_list_->policy()->interested_parties(), + &pending_connectivity_state_unsafe_, &connectivity_changed_closure_); +} + +template +void SubchannelData::RenewConnectivityWatchLocked() { + if (subchannel_list_->tracer()->enabled()) { + gpr_log(GPR_INFO, + "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR + " (subchannel %p): renewing watch: requesting connectivity change " + "notification (from %s)", + subchannel_list_->tracer()->name(), subchannel_list_->policy(), + subchannel_list_, Index(), subchannel_list_->num_subchannels(), + subchannel_, + grpc_connectivity_state_name(pending_connectivity_state_unsafe_)); + } + GPR_ASSERT(connectivity_notification_pending_); + grpc_subchannel_notify_on_state_change( + subchannel_, subchannel_list_->policy()->interested_parties(), + &pending_connectivity_state_unsafe_, &connectivity_changed_closure_); +} + +template +void SubchannelData::StopConnectivityWatchLocked() { + if (subchannel_list_->tracer()->enabled()) { + gpr_log(GPR_INFO, + "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR + " (subchannel %p): stopping connectivity watch", + subchannel_list_->tracer()->name(), subchannel_list_->policy(), + subchannel_list_, Index(), subchannel_list_->num_subchannels(), + subchannel_); + } + GPR_ASSERT(connectivity_notification_pending_); + connectivity_notification_pending_ = false; + subchannel_list()->Unref(DEBUG_LOCATION, "connectivity_watch"); +} + +template +void SubchannelData:: + CancelConnectivityWatchLocked(const char* reason) { + if (subchannel_list_->tracer()->enabled()) { + gpr_log(GPR_INFO, + "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR + " (subchannel %p): canceling connectivity watch (%s)", + subchannel_list_->tracer()->name(), subchannel_list_->policy(), + subchannel_list_, Index(), subchannel_list_->num_subchannels(), + subchannel_, reason); + } + GPR_ASSERT(connectivity_notification_pending_); + grpc_subchannel_notify_on_state_change(subchannel_, nullptr, nullptr, + &connectivity_changed_closure_); +} + +template +bool SubchannelData::UpdateConnectedSubchannelLocked() { + // If the subchannel is READY, take a ref to the connected subchannel. + if (pending_connectivity_state_unsafe_ == GRPC_CHANNEL_READY) { + connected_subchannel_ = + grpc_subchannel_get_connected_subchannel(subchannel_); + // If the subchannel became disconnected between the time that READY + // was reported and the time we got here (e.g., between when a + // notification callback is scheduled and when it was actually run in + // the combiner), then the connected subchannel may have disappeared out + // from under us. In that case, we don't actually want to consider the + // subchannel to be in state READY. Instead, we use IDLE as the + // basis for any future connectivity watch; this is the one state that + // the subchannel will never transition back into, so this ensures + // that we will get a notification for the next state, even if that state + // is READY again (e.g., if the subchannel has transitioned back to + // READY before the next watch gets requested). + if (connected_subchannel_ == nullptr) { + if (subchannel_list_->tracer()->enabled()) { + gpr_log(GPR_INFO, + "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR + " (subchannel %p): state is READY but connected subchannel is " + "null; moving to state IDLE", + subchannel_list_->tracer()->name(), subchannel_list_->policy(), + subchannel_list_, Index(), subchannel_list_->num_subchannels(), + subchannel_); + } + pending_connectivity_state_unsafe_ = GRPC_CHANNEL_IDLE; + return false; + } + } else { + // For any state other than READY, unref the connected subchannel. + connected_subchannel_.reset(); + } + return true; +} + +template +void SubchannelData:: + OnConnectivityChangedLocked(void* arg, grpc_error* error) { + SubchannelData* sd = static_cast(arg); + if (sd->subchannel_list_->tracer()->enabled()) { + gpr_log( + GPR_INFO, + "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR + " (subchannel %p): connectivity changed: state=%s, error=%s, " + "shutting_down=%d", + sd->subchannel_list_->tracer()->name(), sd->subchannel_list_->policy(), + sd->subchannel_list_, sd->Index(), + sd->subchannel_list_->num_subchannels(), sd->subchannel_, + grpc_connectivity_state_name(sd->pending_connectivity_state_unsafe_), + grpc_error_string(error), sd->subchannel_list_->shutting_down()); + } + // If shutting down, unref subchannel and stop watching. + if (sd->subchannel_list_->shutting_down() || error == GRPC_ERROR_CANCELLED) { + sd->UnrefSubchannelLocked("connectivity_shutdown"); + sd->StopConnectivityWatchLocked(); + return; + } + // Get or release ref to connected subchannel. + if (!sd->UpdateConnectedSubchannelLocked()) { + // We don't want to report this connectivity state, so renew the watch. + sd->RenewConnectivityWatchLocked(); + return; + } + // Call the subclass's ProcessConnectivityChangeLocked() method. + sd->ProcessConnectivityChangeLocked(sd->pending_connectivity_state_unsafe_, + GRPC_ERROR_REF(error)); +} + +template +void SubchannelData::ShutdownLocked() { + // If there's a pending notification for this subchannel, cancel it; + // the callback is responsible for unreffing the subchannel. + // Otherwise, unref the subchannel directly. + if (connectivity_notification_pending_) { + CancelConnectivityWatchLocked("shutdown"); + } else if (subchannel_ != nullptr) { + UnrefSubchannelLocked("shutdown"); + } +} + +// +// SubchannelList +// + +template +SubchannelList::SubchannelList( + LoadBalancingPolicy* policy, TraceFlag* tracer, const grpc_lb_addresses* addresses, grpc_combiner* combiner, grpc_client_channel_factory* client_channel_factory, - const grpc_channel_args& args, grpc_iomgr_cb_func connectivity_changed_cb); + const grpc_channel_args& args) + : InternallyRefCountedWithTracing(tracer), + policy_(policy), + tracer_(tracer), + combiner_(GRPC_COMBINER_REF(combiner, "subchannel_list")) { + if (tracer_->enabled()) { + gpr_log(GPR_INFO, + "[%s %p] Creating subchannel list %p for %" PRIuPTR " subchannels", + tracer_->name(), policy, this, addresses->num_addresses); + } + subchannels_.reserve(addresses->num_addresses); + // We need to remove the LB addresses in order to be able to compare the + // subchannel keys of subchannels from a different batch of addresses. + static const char* keys_to_remove[] = {GRPC_ARG_SUBCHANNEL_ADDRESS, + GRPC_ARG_LB_ADDRESSES}; + // Create a subchannel for each address. + grpc_subchannel_args sc_args; + for (size_t i = 0; i < addresses->num_addresses; i++) { + // If there were any balancer, we would have chosen grpclb policy instead. + GPR_ASSERT(!addresses->addresses[i].is_balancer); + memset(&sc_args, 0, sizeof(grpc_subchannel_args)); + grpc_arg addr_arg = + grpc_create_subchannel_address_arg(&addresses->addresses[i].address); + grpc_channel_args* new_args = grpc_channel_args_copy_and_add_and_remove( + &args, keys_to_remove, GPR_ARRAY_SIZE(keys_to_remove), &addr_arg, 1); + gpr_free(addr_arg.value.string); + sc_args.args = new_args; + grpc_subchannel* subchannel = grpc_client_channel_factory_create_subchannel( + client_channel_factory, &sc_args); + grpc_channel_args_destroy(new_args); + if (subchannel == nullptr) { + // Subchannel could not be created. + if (tracer_->enabled()) { + char* address_uri = + grpc_sockaddr_to_uri(&addresses->addresses[i].address); + gpr_log(GPR_INFO, + "[%s %p] could not create subchannel for address uri %s, " + "ignoring", + tracer_->name(), policy_, address_uri); + gpr_free(address_uri); + } + continue; + } + if (tracer_->enabled()) { + char* address_uri = + grpc_sockaddr_to_uri(&addresses->addresses[i].address); + gpr_log(GPR_INFO, + "[%s %p] subchannel list %p index %" PRIuPTR + ": Created subchannel %p for address uri %s", + tracer_->name(), policy_, this, subchannels_.size(), subchannel, + address_uri); + gpr_free(address_uri); + } + subchannels_.emplace_back(static_cast(this), + addresses->user_data_vtable, + addresses->addresses[i], subchannel, combiner); + } +} -void grpc_lb_subchannel_list_ref(grpc_lb_subchannel_list* subchannel_list, - const char* reason); +template +SubchannelList::~SubchannelList() { + if (tracer_->enabled()) { + gpr_log(GPR_INFO, "[%s %p] Destroying subchannel_list %p", tracer_->name(), + policy_, this); + } + GRPC_COMBINER_UNREF(combiner_, "subchannel_list"); +} -void grpc_lb_subchannel_list_unref(grpc_lb_subchannel_list* subchannel_list, - const char* reason); +template +void SubchannelList::ShutdownLocked() { + if (tracer_->enabled()) { + gpr_log(GPR_INFO, "[%s %p] Shutting down subchannel_list %p", + tracer_->name(), policy_, this); + } + GPR_ASSERT(!shutting_down_); + shutting_down_ = true; + for (size_t i = 0; i < subchannels_.size(); i++) { + SubchannelDataType* sd = &subchannels_[i]; + sd->ShutdownLocked(); + } +} -/// Mark subchannel_list as discarded. Unsubscribes all its subchannels. The -/// connectivity state notification callback will ultimately unref it. -void grpc_lb_subchannel_list_shutdown_and_unref( - grpc_lb_subchannel_list* subchannel_list, const char* reason); +} // namespace grpc_core #endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_SUBCHANNEL_LIST_H */ diff --git a/src/cpp/server/load_reporter/load_data_store.cc b/src/cpp/server/load_reporter/load_data_store.cc new file mode 100644 index 00000000000..70f12c1102d --- /dev/null +++ b/src/cpp/server/load_reporter/load_data_store.cc @@ -0,0 +1,273 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include +#include + +#include "src/cpp/server/load_reporter/load_data_store.h" + +namespace grpc { +namespace load_reporter { + +// Some helper functions. +namespace { + +// Given a map from type K to a set of value type V, finds the set associated +// with the given key and erases the value from the set. If the set becomes +// empty, also erases the key-set pair. Returns true if the value is erased +// successfully. +template +bool UnorderedMapOfSetEraseKeyValue(std::unordered_map>& map, + const K& key, const V& value) { + auto it = map.find(key); + if (it != map.end()) { + size_t erased = it->second.erase(value); + if (it->second.size() == 0) { + map.erase(it); + } + return erased; + } + return false; +}; + +// Given a map from type K to a set of value type V, removes the given key and +// the associated set, and returns the set. Returns an empty set if the key is +// not found. +template +std::set UnorderedMapOfSetExtract(std::unordered_map>& map, + const K& key) { + auto it = map.find(key); + if (it != map.end()) { + auto set = std::move(it->second); + map.erase(it); + return set; + } + return {}; +}; + +// From a non-empty container, returns a pointer to a random element. +template +const typename C::value_type* RandomElement(const C& container) { + GPR_ASSERT(!container.empty()); + auto it = container.begin(); + std::advance(it, std::rand() % container.size()); + return &(*it); +} + +} // namespace + +void PerBalancerStore::MergeRow(const LoadRecordKey& key, + const LoadRecordValue& value) { + // During suspension, the load data received will be dropped. + if (!suspended_) { + load_record_map_[key].MergeFrom(value); + gpr_log(GPR_DEBUG, + "[PerBalancerStore %p] Load data merged (Key: %s, Value: %s).", + this, key.ToString().c_str(), value.ToString().c_str()); + } else { + gpr_log(GPR_DEBUG, + "[PerBalancerStore %p] Load data dropped (Key: %s, Value: %s).", + this, key.ToString().c_str(), value.ToString().c_str()); + } + // We always keep track of num_calls_in_progress_, so that when this + // store is resumed, we still have a correct value of + // num_calls_in_progress_. + GPR_ASSERT(static_cast(num_calls_in_progress_) + + value.GetNumCallsInProgressDelta() >= + 0); + num_calls_in_progress_ += value.GetNumCallsInProgressDelta(); +} + +void PerBalancerStore::Suspend() { + suspended_ = true; + load_record_map_.clear(); + gpr_log(GPR_DEBUG, "[PerBalancerStore %p] Suspended.", this); +} + +void PerBalancerStore::Resume() { + suspended_ = false; + gpr_log(GPR_DEBUG, "[PerBalancerStore %p] Resumed.", this); +} + +uint64_t PerBalancerStore::GetNumCallsInProgressForReport() { + GPR_ASSERT(!suspended_); + last_reported_num_calls_in_progress_ = num_calls_in_progress_; + return num_calls_in_progress_; +} + +void PerHostStore::ReportStreamCreated(const grpc::string& lb_id, + const grpc::string& load_key) { + GPR_ASSERT(lb_id != kInvalidLbId); + SetUpForNewLbId(lb_id, load_key); + // Prior to this one, there was no load balancer receiving report, so we may + // have unassigned orphaned stores to assign to this new balancer. + // TODO(juanlishen): If the load key of this new stream is the same with + // some previously adopted orphan store, we may want to take the orphan to + // this stream. Need to discuss with LB team. + if (assigned_stores_.size() == 1) { + for (const auto& p : per_balancer_stores_) { + const grpc::string& other_lb_id = p.first; + const std::unique_ptr& orphaned_store = p.second; + if (other_lb_id != lb_id) { + orphaned_store->Resume(); + AssignOrphanedStore(orphaned_store.get(), lb_id); + } + } + } + // The first connected balancer will adopt the kInvalidLbId. + if (per_balancer_stores_.size() == 1) { + SetUpForNewLbId(kInvalidLbId, ""); + ReportStreamClosed(kInvalidLbId); + } +} + +void PerHostStore::ReportStreamClosed(const grpc::string& lb_id) { + auto it_store_for_gone_lb = per_balancer_stores_.find(lb_id); + GPR_ASSERT(it_store_for_gone_lb != per_balancer_stores_.end()); + // Remove this closed stream from our records. + GPR_ASSERT(UnorderedMapOfSetEraseKeyValue( + load_key_to_receiving_lb_ids_, it_store_for_gone_lb->second->load_key(), + lb_id)); + std::set orphaned_stores = + UnorderedMapOfSetExtract(assigned_stores_, lb_id); + // The stores that were assigned to this balancer are orphaned now. They + // should be re-assigned to other balancers which are still receiving reports. + for (PerBalancerStore* orphaned_store : orphaned_stores) { + const grpc::string* new_receiver = nullptr; + auto it = load_key_to_receiving_lb_ids_.find(orphaned_store->load_key()); + if (it != load_key_to_receiving_lb_ids_.end()) { + // First, try to pick from the active balancers with the same load key. + new_receiver = RandomElement(it->second); + } else if (!assigned_stores_.empty()) { + // If failed, pick from all the remaining active balancers. + new_receiver = &(RandomElement(assigned_stores_)->first); + } + if (new_receiver != nullptr) { + AssignOrphanedStore(orphaned_store, *new_receiver); + } else { + // Load data for an LB ID that can't be assigned to any stream should + // be dropped. + orphaned_store->Suspend(); + } + } +} + +PerBalancerStore* PerHostStore::FindPerBalancerStore( + const grpc::string& lb_id) const { + return per_balancer_stores_.find(lb_id) != per_balancer_stores_.end() + ? per_balancer_stores_.find(lb_id)->second.get() + : nullptr; +} + +const std::set* PerHostStore::GetAssignedStores( + const grpc::string& lb_id) const { + auto it = assigned_stores_.find(lb_id); + if (it == assigned_stores_.end()) return nullptr; + return &(it->second); +} + +void PerHostStore::AssignOrphanedStore(PerBalancerStore* orphaned_store, + const grpc::string& new_receiver) { + auto it = assigned_stores_.find(new_receiver); + GPR_ASSERT(it != assigned_stores_.end()); + it->second.insert(orphaned_store); + gpr_log(GPR_INFO, + "[PerHostStore %p] Re-assigned orphaned store (%p) with original LB" + " ID of %s to new receiver %s", + this, orphaned_store, orphaned_store->lb_id().c_str(), + new_receiver.c_str()); +} + +void PerHostStore::SetUpForNewLbId(const grpc::string& lb_id, + const grpc::string& load_key) { + // The top-level caller (i.e., LoadReportService) should guarantee the + // lb_id is unique for each reporting stream. + GPR_ASSERT(per_balancer_stores_.find(lb_id) == per_balancer_stores_.end()); + GPR_ASSERT(assigned_stores_.find(lb_id) == assigned_stores_.end()); + load_key_to_receiving_lb_ids_[load_key].insert(lb_id); + std::unique_ptr per_balancer_store( + new PerBalancerStore(lb_id, load_key)); + assigned_stores_[lb_id] = {per_balancer_store.get()}; + per_balancer_stores_[lb_id] = std::move(per_balancer_store); +} + +PerBalancerStore* LoadDataStore::FindPerBalancerStore( + const string& hostname, const string& lb_id) const { + auto it = per_host_stores_.find(hostname); + if (it != per_host_stores_.end()) { + const PerHostStore& per_host_store = it->second; + return per_host_store.FindPerBalancerStore(lb_id); + } else { + return nullptr; + } +} + +void LoadDataStore::MergeRow(const grpc::string& hostname, + const LoadRecordKey& key, + const LoadRecordValue& value) { + PerBalancerStore* per_balancer_store = + FindPerBalancerStore(hostname, key.lb_id()); + if (per_balancer_store != nullptr) { + per_balancer_store->MergeRow(key, value); + return; + } + // Unknown LB ID. Track it until its number of in-progress calls drops to + // zero. + int64_t in_progress_delta = value.GetNumCallsInProgressDelta(); + if (in_progress_delta != 0) { + auto it_tracker = unknown_balancer_id_trackers_.find(key.lb_id()); + if (it_tracker == unknown_balancer_id_trackers_.end()) { + gpr_log( + GPR_DEBUG, + "[LoadDataStore %p] Start tracking unknown balancer (lb_id_: %s).", + this, key.lb_id().c_str()); + unknown_balancer_id_trackers_.insert( + {key.lb_id(), static_cast(in_progress_delta)}); + } else if ((it_tracker->second += in_progress_delta) == 0) { + unknown_balancer_id_trackers_.erase(it_tracker); + gpr_log(GPR_DEBUG, + "[LoadDataStore %p] Stop tracking unknown balancer (lb_id_: %s).", + this, key.lb_id().c_str()); + } + } +} + +const std::set* LoadDataStore::GetAssignedStores( + const grpc::string& hostname, const grpc::string& lb_id) { + auto it = per_host_stores_.find(hostname); + if (it == per_host_stores_.end()) return nullptr; + return it->second.GetAssignedStores(lb_id); +} + +void LoadDataStore::ReportStreamCreated(const grpc::string& hostname, + const grpc::string& lb_id, + const grpc::string& load_key) { + per_host_stores_[hostname].ReportStreamCreated(lb_id, load_key); +} + +void LoadDataStore::ReportStreamClosed(const grpc::string& hostname, + const grpc::string& lb_id) { + auto it_per_host_store = per_host_stores_.find(hostname); + GPR_ASSERT(it_per_host_store != per_host_stores_.end()); + it_per_host_store->second.ReportStreamClosed(lb_id); +} + +} // namespace load_reporter +} // namespace grpc diff --git a/src/cpp/server/load_reporter/load_data_store.h b/src/cpp/server/load_reporter/load_data_store.h new file mode 100644 index 00000000000..feb8b2fd599 --- /dev/null +++ b/src/cpp/server/load_reporter/load_data_store.h @@ -0,0 +1,339 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef GRPC_SRC_CPP_SERVER_LOAD_REPORTER_LOAD_DATA_STORE_H +#define GRPC_SRC_CPP_SERVER_LOAD_REPORTER_LOAD_DATA_STORE_H + +#include + +#include +#include +#include + +#include +#include + +namespace grpc { +namespace load_reporter { + +constexpr char kInvalidLbId[] = ""; +constexpr uint8_t kLbIdLen = 8; + +// The load data storage is organized in hierarchy. The LoadDataStore is the +// top-level data store. In LoadDataStore, for each host we keep a +// PerHostStore, in which for each balancer we keep a PerBalancerStore. Each +// PerBalancerStore maintains a map of load records, mapping from LoadRecordKey +// to LoadRecordValue. The LoadRecordValue contains a map of customized call +// metrics, mapping from a call metric name to the CallMetricValue. + +// The value of a customized call metric. +class CallMetricValue { + public: + explicit CallMetricValue(uint64_t num_calls = 0, + double total_metric_value = 0) + : num_calls_(num_calls), total_metric_value_(total_metric_value) {} + + void MergeFrom(CallMetricValue other) { + num_calls_ += other.num_calls_; + total_metric_value_ += other.total_metric_value_; + } + + // Getters. + uint64_t num_calls() const { return num_calls_; } + double total_metric_value() const { return total_metric_value_; } + + private: + // The number of calls that finished with this metric. + uint64_t num_calls_ = 0; + // The sum of metric values across all the calls that finished with this + // metric. + double total_metric_value_ = 0; +}; + +// The key of a load record. +class LoadRecordKey { + public: + explicit LoadRecordKey(grpc::string lb_id, grpc::string lb_tag, + grpc::string user_id, grpc::string client_ip_hex) + : lb_id_(std::move(lb_id)), + lb_tag_(std::move(lb_tag)), + user_id_(std::move(user_id)), + client_ip_hex_(std::move(client_ip_hex)) {} + + grpc::string ToString() const { + return "[lb_id_=" + lb_id_ + ", lb_tag_=" + lb_tag_ + + ", user_id_=" + user_id_ + ", client_ip_hex_=" + client_ip_hex_ + + "]"; + } + + bool operator==(const LoadRecordKey& other) const { + return lb_id_ == other.lb_id_ && lb_tag_ == other.lb_tag_ && + user_id_ == other.user_id_ && client_ip_hex_ == other.client_ip_hex_; + } + + // Getters. + const grpc::string& lb_id() const { return lb_id_; } + const grpc::string& lb_tag() const { return lb_tag_; } + const grpc::string& user_id() const { return user_id_; } + const grpc::string& client_ip_hex() const { return client_ip_hex_; } + + struct Hasher { + void hash_combine(size_t* seed, const grpc::string& k) const { + *seed ^= std::hash()(k) + 0x9e3779b9 + (*seed << 6) + + (*seed >> 2); + } + + size_t operator()(const LoadRecordKey& k) const { + size_t h = 0; + hash_combine(&h, k.lb_id_); + hash_combine(&h, k.lb_tag_); + hash_combine(&h, k.user_id_); + hash_combine(&h, k.client_ip_hex_); + return h; + } + }; + + private: + grpc::string lb_id_; + grpc::string lb_tag_; + grpc::string user_id_; + grpc::string client_ip_hex_; +}; + +// The value of a load record. +class LoadRecordValue { + public: + explicit LoadRecordValue(uint64_t start_count = 0, uint64_t ok_count = 0, + uint64_t error_count = 0, double bytes_sent = 0, + double bytes_recv = 0, double latency_ms = 0) + : start_count_(start_count), + ok_count_(ok_count), + error_count_(error_count), + bytes_sent_(bytes_sent), + bytes_recv_(bytes_recv), + latency_ms_(latency_ms) {} + + void MergeFrom(const LoadRecordValue& other) { + start_count_ += other.start_count_; + ok_count_ += other.ok_count_; + error_count_ += other.error_count_; + bytes_sent_ += other.bytes_sent_; + bytes_recv_ += other.bytes_recv_; + latency_ms_ += other.latency_ms_; + for (const auto& p : other.call_metrics_) { + const grpc::string& key = p.first; + const CallMetricValue& value = p.second; + call_metrics_[key].MergeFrom(value); + } + } + + int64_t GetNumCallsInProgressDelta() const { + return static_cast(start_count_ - ok_count_ - error_count_); + } + + grpc::string ToString() const { + return "[start_count_=" + grpc::to_string(start_count_) + + ", ok_count_=" + grpc::to_string(ok_count_) + + ", error_count_=" + grpc::to_string(error_count_) + + ", bytes_sent_=" + grpc::to_string(bytes_sent_) + + ", bytes_recv_=" + grpc::to_string(bytes_recv_) + + ", latency_ms_=" + grpc::to_string(latency_ms_) + "]"; + } + + bool InsertCallMetric(const grpc::string& metric_name, + const CallMetricValue& metric_value) { + return call_metrics_.insert({metric_name, metric_value}).second; + } + + // Getters. + uint64_t start_count() const { return start_count_; } + uint64_t ok_count() const { return ok_count_; } + uint64_t error_count() const { return error_count_; } + double bytes_sent() const { return bytes_sent_; } + double bytes_recv() const { return bytes_recv_; } + double latency_ms() const { return latency_ms_; } + const std::unordered_map& call_metrics() + const { + return call_metrics_; + } + + private: + uint64_t start_count_ = 0; + uint64_t ok_count_ = 0; + uint64_t error_count_ = 0; + double bytes_sent_ = 0; + double bytes_recv_ = 0; + double latency_ms_ = 0; + std::unordered_map call_metrics_; +}; + +// Stores the data associated with a particular LB ID. +class PerBalancerStore { + public: + using LoadRecordMap = + std::unordered_map; + + PerBalancerStore(grpc::string lb_id, grpc::string load_key) + : lb_id_(std::move(lb_id)), load_key_(std::move(load_key)) {} + + // Merge a load record with the given key and value if the store is not + // suspended. + void MergeRow(const LoadRecordKey& key, const LoadRecordValue& value); + + // Suspend this store, so that no detailed load data will be recorded. + void Suspend(); + // Resume this store from suspension. + void Resume(); + // Is this store suspended or not? + bool IsSuspended() const { return suspended_; } + + bool IsNumCallsInProgressChangedSinceLastReport() const { + return num_calls_in_progress_ != last_reported_num_calls_in_progress_; + } + + uint64_t GetNumCallsInProgressForReport(); + + grpc::string ToString() { + return "[PerBalancerStore lb_id_=" + lb_id_ + " load_key_=" + load_key_ + + "]"; + } + + void ClearLoadRecordMap() { load_record_map_.clear(); } + + // Getters. + const grpc::string& lb_id() const { return lb_id_; } + const grpc::string& load_key() const { return load_key_; } + const LoadRecordMap& load_record_map() const { return load_record_map_; } + + private: + grpc::string lb_id_; + // TODO(juanlishen): Use bytestring protobuf type? + grpc::string load_key_; + LoadRecordMap load_record_map_; + uint64_t num_calls_in_progress_ = 0; + uint64_t last_reported_num_calls_in_progress_ = 0; + bool suspended_ = false; +}; + +// Stores the data associated with a particular host. +class PerHostStore { + public: + // When a report stream is created, a PerBalancerStore is created for the + // LB ID (guaranteed unique) associated with that stream. If it is the only + // active store, adopt all the orphaned stores. If it is the first created + // store, adopt the store of kInvalidLbId. + void ReportStreamCreated(const grpc::string& lb_id, + const grpc::string& load_key); + + // When a report stream is closed, the PerBalancerStores assigned to the + // associate LB ID need to be re-assigned to other active balancers, + // ideally with the same load key. If there is no active balancer, we have + // to suspend those stores and drop the incoming load data until they are + // resumed. + void ReportStreamClosed(const grpc::string& lb_id); + + // Returns null if not found. Caller doesn't own the returned store. + PerBalancerStore* FindPerBalancerStore(const grpc::string& lb_id) const; + + // Returns null if lb_id is not found. The returned pointer points to the + // underlying data structure, which is not owned by the caller. + const std::set* GetAssignedStores( + const grpc::string& lb_id) const; + + private: + // Creates a PerBalancerStore for the given LB ID, assigns the store to + // itself, and records the LB ID to the load key. + void SetUpForNewLbId(const grpc::string& lb_id, const grpc::string& load_key); + + void AssignOrphanedStore(PerBalancerStore* orphaned_store, + const grpc::string& new_receiver); + + std::unordered_map> + load_key_to_receiving_lb_ids_; + + // Key: LB ID. The key set includes all the LB IDs that have been + // allocated for reporting streams so far. + // Value: the unique pointer to the PerBalancerStore of the LB ID. + std::unordered_map> + per_balancer_stores_; + + // Key: LB ID. The key set includes the LB IDs of the balancers that are + // currently receiving report. + // Value: the set of raw pointers to the PerBalancerStores assigned to the LB + // ID. Note that the sets in assigned_stores_ form a division of the value set + // of per_balancer_stores_. + std::unordered_map> + assigned_stores_; +}; + +// Thread-unsafe two-level bookkeeper of all the load data. +// Note: We never remove any store objects from this class, as per the +// current spec. That's because premature removal of the store objects +// may lead to loss of critical information, e.g., mapping from lb_id to +// load_key, and the number of in-progress calls. Such loss will cause +// information inconsistency when the balancer is re-connected. Keeping +// all the stores should be fine for PerHostStore, since we assume there +// should only be a few hostnames. But it's a potential problem for +// PerBalancerStore. +class LoadDataStore { + public: + // Returns null if not found. Caller doesn't own the returned store. + PerBalancerStore* FindPerBalancerStore(const grpc::string& hostname, + const grpc::string& lb_id) const; + + // Returns null if hostname or lb_id is not found. The returned pointer points + // to the underlying data structure, which is not owned by the caller. + const std::set* GetAssignedStores(const string& hostname, + const string& lb_id); + + // If a PerBalancerStore can be found by the hostname and LB ID in + // LoadRecordKey, the load data will be merged to that store. Otherwise, + // only track the number of the in-progress calls for this unknown LB ID. + void MergeRow(const grpc::string& hostname, const LoadRecordKey& key, + const LoadRecordValue& value); + + // Is the given lb_id a tracked unknown LB ID (i.e., the LB ID was associated + // with some received load data but unknown to this load data store)? + bool IsTrackedUnknownBalancerId(const grpc::string& lb_id) const { + return unknown_balancer_id_trackers_.find(lb_id) != + unknown_balancer_id_trackers_.end(); + } + + // Wrapper around PerHostStore::ReportStreamCreated. + void ReportStreamCreated(const grpc::string& hostname, + const grpc::string& lb_id, + const grpc::string& load_key); + + // Wrapper around PerHostStore::ReportStreamClosed. + void ReportStreamClosed(const grpc::string& hostname, + const grpc::string& lb_id); + + private: + // Buffered data that was fetched from Census but hasn't been sent to + // balancer. We need to keep this data ourselves because Census will + // delete the data once it's returned. + std::unordered_map per_host_stores_; + + // Tracks the number of in-progress calls for each unknown LB ID. + std::unordered_map unknown_balancer_id_trackers_; +}; + +} // namespace load_reporter +} // namespace grpc + +#endif // GRPC_SRC_CPP_SERVER_LOAD_REPORTER_LOAD_DATA_STORE_H diff --git a/src/objective-c/ProtoRPC/ProtoService.h b/src/objective-c/ProtoRPC/ProtoService.h index 29c4e9be360..c411bed60f0 100644 --- a/src/objective-c/ProtoRPC/ProtoService.h +++ b/src/objective-c/ProtoRPC/ProtoService.h @@ -22,6 +22,12 @@ @protocol GRXWriteable; @class GRXWriter; +@protocol GRPCProtoServiceInit + +- (instancetype)initWithHost:(NSString *)host; + +@end + __attribute__((deprecated("Please use GRPCProtoService."))) @interface ProtoService : NSObject - (instancetype)initWithHost : (NSString *)host packageName diff --git a/src/python/grpcio/grpc_core_dependencies.py b/src/python/grpcio/grpc_core_dependencies.py index b1f9d2018ed..234f7634e2a 100644 --- a/src/python/grpcio/grpc_core_dependencies.py +++ b/src/python/grpcio/grpc_core_dependencies.py @@ -344,7 +344,6 @@ CORE_SOURCE_FILES = [ 'src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c', 'src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc', - 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc', 'src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc', 'src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc', 'src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.cc', diff --git a/test/cpp/server/load_reporter/BUILD b/test/cpp/server/load_reporter/BUILD new file mode 100644 index 00000000000..5cb3a00f82d --- /dev/null +++ b/test/cpp/server/load_reporter/BUILD @@ -0,0 +1,31 @@ +# Copyright 2017 gRPC authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) # Apache v2 + +load("//bazel:grpc_build_system.bzl", "grpc_cc_test", "grpc_cc_library", "grpc_cc_binary", "grpc_package") + +grpc_package(name = "test/cpp/server/load_reporter") + +grpc_cc_test( + name = "lb_load_data_store_test", + srcs = ["load_data_store_test.cc"], + external_deps = [ + "gtest", + ], + deps = [ + "//:lb_load_data_store", + "//test/core/util:grpc_test_util", + ], +) diff --git a/test/cpp/server/load_reporter/load_data_store_test.cc b/test/cpp/server/load_reporter/load_data_store_test.cc new file mode 100644 index 00000000000..8280dee6a47 --- /dev/null +++ b/test/cpp/server/load_reporter/load_data_store_test.cc @@ -0,0 +1,481 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include + +#include +#include + +#include +#include + +#include "src/cpp/server/load_reporter/load_data_store.h" +#include "test/core/util/port.h" +#include "test/core/util/test_config.h" + +namespace grpc { +namespace testing { +namespace { + +using ::grpc::load_reporter::CallMetricValue; +using ::grpc::load_reporter::LoadDataStore; +using ::grpc::load_reporter::LoadRecordKey; +using ::grpc::load_reporter::LoadRecordValue; +using ::grpc::load_reporter::PerBalancerStore; +using ::grpc::load_reporter::kInvalidLbId; + +class LoadDataStoreTest : public ::testing::Test { + public: + LoadDataStoreTest() + : kKey1(kLbId1, kLbTag1, kUser1, kClientIp1), + kKey2(kLbId2, kLbTag2, kUser2, kClientIp2) {} + + // Check whether per_balancer_stores contains a store which was originally + // created for . + bool PerBalancerStoresContains( + const LoadDataStore& load_data_store, + const std::set* per_balancer_stores, + const grpc::string hostname, const grpc::string lb_id, + const grpc::string load_key) { + auto original_per_balancer_store = + load_data_store.FindPerBalancerStore(hostname, lb_id); + EXPECT_NE(original_per_balancer_store, nullptr); + EXPECT_EQ(original_per_balancer_store->lb_id(), lb_id); + EXPECT_EQ(original_per_balancer_store->load_key(), load_key); + for (auto per_balancer_store : *per_balancer_stores) { + if (per_balancer_store == original_per_balancer_store) { + return true; + } + } + return false; + } + + grpc::string FormatLbId(size_t index) { + return "kLbId" + std::to_string(index); + } + + const grpc::string kHostname1 = "kHostname1"; + const grpc::string kHostname2 = "kHostname2"; + const grpc::string kLbId1 = "kLbId1"; + const grpc::string kLbId2 = "kLbId2"; + const grpc::string kLbId3 = "kLbId3"; + const grpc::string kLbId4 = "kLbId4"; + const grpc::string kLoadKey1 = "kLoadKey1"; + const grpc::string kLoadKey2 = "kLoadKey2"; + const grpc::string kLbTag1 = "kLbTag1"; + const grpc::string kLbTag2 = "kLbTag2"; + const grpc::string kUser1 = "kUser1"; + const grpc::string kUser2 = "kUser2"; + const grpc::string kClientIp1 = "00"; + const grpc::string kClientIp2 = "02"; + const grpc::string kMetric1 = "kMetric1"; + const grpc::string kMetric2 = "kMetric2"; + const LoadRecordKey kKey1; + const LoadRecordKey kKey2; +}; + +using PerBalancerStoreTest = LoadDataStoreTest; + +TEST_F(LoadDataStoreTest, AssignToSelf) { + LoadDataStore load_data_store; + load_data_store.ReportStreamCreated(kHostname1, kLbId1, kLoadKey1); + auto assigned_stores = load_data_store.GetAssignedStores(kHostname1, kLbId1); + EXPECT_TRUE(PerBalancerStoresContains(load_data_store, assigned_stores, + kHostname1, kLbId1, kLoadKey1)); +} + +TEST_F(LoadDataStoreTest, ReassignOrphanStores) { + LoadDataStore load_data_store; + load_data_store.ReportStreamCreated(kHostname1, kLbId1, kLoadKey1); + load_data_store.ReportStreamCreated(kHostname1, kLbId2, kLoadKey1); + load_data_store.ReportStreamCreated(kHostname1, kLbId3, kLoadKey2); + load_data_store.ReportStreamCreated(kHostname2, kLbId4, kLoadKey1); + // 1. Close the second stream. + load_data_store.ReportStreamClosed(kHostname1, kLbId2); + auto assigned_to_lb_id_1 = + load_data_store.GetAssignedStores(kHostname1, kLbId1); + // The orphaned store is re-assigned to kLbId1 with the same load key. + EXPECT_TRUE(PerBalancerStoresContains(load_data_store, assigned_to_lb_id_1, + kHostname1, kLbId1, kLoadKey1)); + EXPECT_TRUE(PerBalancerStoresContains(load_data_store, assigned_to_lb_id_1, + kHostname1, kLbId2, kLoadKey1)); + // 2. Close the first stream. + load_data_store.ReportStreamClosed(kHostname1, kLbId1); + auto assigned_to_lb_id_3 = + load_data_store.GetAssignedStores(kHostname1, kLbId3); + // The orphaned stores are re-assigned to kLbId3 with the same host, + // because there isn't any LB with the same load key. + EXPECT_TRUE(PerBalancerStoresContains(load_data_store, assigned_to_lb_id_3, + kHostname1, kLbId1, kLoadKey1)); + EXPECT_TRUE(PerBalancerStoresContains(load_data_store, assigned_to_lb_id_3, + kHostname1, kLbId2, kLoadKey1)); + EXPECT_TRUE(PerBalancerStoresContains(load_data_store, assigned_to_lb_id_3, + kHostname1, kLbId3, kLoadKey2)); + // 3. Close the third stream. + load_data_store.ReportStreamClosed(kHostname1, kLbId3); + auto assigned_to_lb_id_4 = + load_data_store.GetAssignedStores(kHostname2, kLbId4); + // There is no active LB for the first host now. kLbId4 is active but + // it's for the second host, so it wll NOT adopt the orphaned stores. + EXPECT_FALSE(PerBalancerStoresContains(load_data_store, assigned_to_lb_id_4, + kHostname1, kLbId1, kLoadKey1)); + EXPECT_FALSE(PerBalancerStoresContains(load_data_store, assigned_to_lb_id_4, + kHostname1, kLbId2, kLoadKey1)); + EXPECT_FALSE(PerBalancerStoresContains(load_data_store, assigned_to_lb_id_4, + kHostname1, kLbId3, kLoadKey2)); + EXPECT_TRUE(PerBalancerStoresContains(load_data_store, assigned_to_lb_id_4, + kHostname2, kLbId4, kLoadKey1)); +} + +TEST_F(LoadDataStoreTest, OrphanAssignmentIsSticky) { + LoadDataStore load_data_store; + std::set active_lb_ids; + size_t num_lb_ids = 1000; + for (size_t i = 0; i < num_lb_ids; ++i) { + load_data_store.ReportStreamCreated(kHostname1, FormatLbId(i), kLoadKey1); + active_lb_ids.insert(FormatLbId(i)); + } + grpc::string orphaned_lb_id = FormatLbId(std::rand() % num_lb_ids); + load_data_store.ReportStreamClosed(kHostname1, orphaned_lb_id); + active_lb_ids.erase(orphaned_lb_id); + // Find which LB is assigned the orphaned store. + grpc::string assigned_lb_id = ""; + for (auto lb_id : active_lb_ids) { + if (PerBalancerStoresContains( + load_data_store, + load_data_store.GetAssignedStores(kHostname1, lb_id), kHostname1, + orphaned_lb_id, kLoadKey1)) { + assigned_lb_id = lb_id; + break; + } + } + EXPECT_STRNE(assigned_lb_id.c_str(), ""); + // Close 10 more stream, skipping the assigned_lb_id. The assignment of + // orphaned_lb_id shouldn't change. + for (size_t _ = 0; _ < 10; ++_) { + grpc::string lb_id_to_close = ""; + for (auto lb_id : active_lb_ids) { + if (lb_id != assigned_lb_id) { + lb_id_to_close = lb_id; + break; + } + } + EXPECT_STRNE(lb_id_to_close.c_str(), ""); + load_data_store.ReportStreamClosed(kHostname1, lb_id_to_close); + active_lb_ids.erase(lb_id_to_close); + EXPECT_TRUE(PerBalancerStoresContains( + load_data_store, + load_data_store.GetAssignedStores(kHostname1, assigned_lb_id), + kHostname1, orphaned_lb_id, kLoadKey1)); + } + // Close the assigned_lb_id, orphaned_lb_id will be re-assigned again. + load_data_store.ReportStreamClosed(kHostname1, assigned_lb_id); + active_lb_ids.erase(assigned_lb_id); + size_t orphaned_lb_id_occurences = 0; + for (auto lb_id : active_lb_ids) { + if (PerBalancerStoresContains( + load_data_store, + load_data_store.GetAssignedStores(kHostname1, lb_id), kHostname1, + orphaned_lb_id, kLoadKey1)) { + orphaned_lb_id_occurences++; + } + } + EXPECT_EQ(orphaned_lb_id_occurences, 1U); +} + +TEST_F(LoadDataStoreTest, HostTemporarilyLoseAllStreams) { + LoadDataStore load_data_store; + load_data_store.ReportStreamCreated(kHostname1, kLbId1, kLoadKey1); + load_data_store.ReportStreamCreated(kHostname2, kLbId2, kLoadKey1); + auto store_lb_id_1 = load_data_store.FindPerBalancerStore(kHostname1, kLbId1); + auto store_invalid_lb_id_1 = + load_data_store.FindPerBalancerStore(kHostname1, kInvalidLbId); + EXPECT_FALSE(store_lb_id_1->IsSuspended()); + EXPECT_FALSE(store_invalid_lb_id_1->IsSuspended()); + // Disconnect all the streams of the first host. + load_data_store.ReportStreamClosed(kHostname1, kLbId1); + // All the streams of that host are suspended. + EXPECT_TRUE(store_lb_id_1->IsSuspended()); + EXPECT_TRUE(store_invalid_lb_id_1->IsSuspended()); + // Detailed load data won't be kept when the PerBalancerStore is suspended. + store_lb_id_1->MergeRow(kKey1, LoadRecordValue()); + store_invalid_lb_id_1->MergeRow(kKey1, LoadRecordValue()); + EXPECT_EQ(store_lb_id_1->load_record_map().size(), 0U); + EXPECT_EQ(store_invalid_lb_id_1->load_record_map().size(), 0U); + // The stores for different hosts won't mix, even if the load key is the same. + auto assigned_to_lb_id_2 = + load_data_store.GetAssignedStores(kHostname2, kLbId2); + EXPECT_EQ(assigned_to_lb_id_2->size(), 2U); + EXPECT_TRUE(PerBalancerStoresContains(load_data_store, assigned_to_lb_id_2, + kHostname2, kLbId2, kLoadKey1)); + EXPECT_TRUE(PerBalancerStoresContains(load_data_store, assigned_to_lb_id_2, + kHostname2, kInvalidLbId, "")); + // A new stream is created for the first host. + load_data_store.ReportStreamCreated(kHostname1, kLbId3, kLoadKey2); + // The stores for the first host are resumed. + EXPECT_FALSE(store_lb_id_1->IsSuspended()); + EXPECT_FALSE(store_invalid_lb_id_1->IsSuspended()); + store_lb_id_1->MergeRow(kKey1, LoadRecordValue()); + store_invalid_lb_id_1->MergeRow(kKey1, LoadRecordValue()); + EXPECT_EQ(store_lb_id_1->load_record_map().size(), 1U); + EXPECT_EQ(store_invalid_lb_id_1->load_record_map().size(), 1U); + // The resumed stores are assigned to the new LB. + auto assigned_to_lb_id_3 = + load_data_store.GetAssignedStores(kHostname1, kLbId3); + EXPECT_EQ(assigned_to_lb_id_3->size(), 3U); + EXPECT_TRUE(PerBalancerStoresContains(load_data_store, assigned_to_lb_id_3, + kHostname1, kLbId1, kLoadKey1)); + EXPECT_TRUE(PerBalancerStoresContains(load_data_store, assigned_to_lb_id_3, + kHostname1, kInvalidLbId, "")); + EXPECT_TRUE(PerBalancerStoresContains(load_data_store, assigned_to_lb_id_3, + kHostname1, kLbId3, kLoadKey2)); +} + +TEST_F(LoadDataStoreTest, OneStorePerLbId) { + LoadDataStore load_data_store; + EXPECT_EQ(load_data_store.FindPerBalancerStore(kHostname1, kLbId1), nullptr); + EXPECT_EQ(load_data_store.FindPerBalancerStore(kHostname1, kInvalidLbId), + nullptr); + EXPECT_EQ(load_data_store.FindPerBalancerStore(kHostname2, kLbId2), nullptr); + EXPECT_EQ(load_data_store.FindPerBalancerStore(kHostname2, kLbId3), nullptr); + // Create The first stream. + load_data_store.ReportStreamCreated(kHostname1, kLbId1, kLoadKey1); + auto store_lb_id_1 = load_data_store.FindPerBalancerStore(kHostname1, kLbId1); + auto store_invalid_lb_id_1 = + load_data_store.FindPerBalancerStore(kHostname1, kInvalidLbId); + // Two stores will be created: one is for the stream; the other one is for + // kInvalidLbId. + EXPECT_NE(store_lb_id_1, nullptr); + EXPECT_NE(store_invalid_lb_id_1, nullptr); + EXPECT_NE(store_lb_id_1, store_invalid_lb_id_1); + EXPECT_EQ(load_data_store.FindPerBalancerStore(kHostname2, kLbId2), nullptr); + EXPECT_EQ(load_data_store.FindPerBalancerStore(kHostname2, kLbId3), nullptr); + // Create the second stream. + load_data_store.ReportStreamCreated(kHostname2, kLbId3, kLoadKey1); + auto store_lb_id_3 = load_data_store.FindPerBalancerStore(kHostname2, kLbId3); + auto store_invalid_lb_id_2 = + load_data_store.FindPerBalancerStore(kHostname2, kInvalidLbId); + EXPECT_NE(store_lb_id_3, nullptr); + EXPECT_NE(store_invalid_lb_id_2, nullptr); + EXPECT_NE(store_lb_id_3, store_invalid_lb_id_2); + // The PerBalancerStores created for different hosts are independent. + EXPECT_NE(store_lb_id_3, store_invalid_lb_id_1); + EXPECT_NE(store_invalid_lb_id_2, store_invalid_lb_id_1); + EXPECT_EQ(load_data_store.FindPerBalancerStore(kHostname2, kLbId2), nullptr); +} + +TEST_F(LoadDataStoreTest, ExactlyOnceAssignment) { + LoadDataStore load_data_store; + size_t num_create = 100; + size_t num_close = 50; + for (size_t i = 0; i < num_create; ++i) { + load_data_store.ReportStreamCreated(kHostname1, FormatLbId(i), kLoadKey1); + } + for (size_t i = 0; i < num_close; ++i) { + load_data_store.ReportStreamClosed(kHostname1, FormatLbId(i)); + } + std::set reported_lb_ids; + for (size_t i = num_close; i < num_create; ++i) { + for (auto assigned_store : + *load_data_store.GetAssignedStores(kHostname1, FormatLbId(i))) { + EXPECT_TRUE(reported_lb_ids.insert(assigned_store->lb_id()).second); + } + } + // Add one for kInvalidLbId. + EXPECT_EQ(reported_lb_ids.size(), (num_create + 1)); + EXPECT_NE(reported_lb_ids.find(kInvalidLbId), reported_lb_ids.end()); +} + +TEST_F(LoadDataStoreTest, UnknownBalancerIdTracking) { + LoadDataStore load_data_store; + load_data_store.ReportStreamCreated(kHostname1, kLbId1, kLoadKey1); + // Merge data for a known LB ID. + LoadRecordValue v1(192); + load_data_store.MergeRow(kHostname1, kKey1, v1); + // Merge data for unknown LB ID. + LoadRecordValue v2(23); + EXPECT_FALSE(load_data_store.IsTrackedUnknownBalancerId(kLbId2)); + load_data_store.MergeRow( + kHostname1, LoadRecordKey(kLbId2, kLbTag1, kUser1, kClientIp1), v2); + EXPECT_TRUE(load_data_store.IsTrackedUnknownBalancerId(kLbId2)); + LoadRecordValue v3(952); + load_data_store.MergeRow( + kHostname2, LoadRecordKey(kLbId3, kLbTag1, kUser1, kClientIp1), v3); + EXPECT_TRUE(load_data_store.IsTrackedUnknownBalancerId(kLbId3)); + // The data kept for a known LB ID is correct. + auto store_lb_id_1 = load_data_store.FindPerBalancerStore(kHostname1, kLbId1); + EXPECT_EQ(store_lb_id_1->load_record_map().size(), 1U); + EXPECT_EQ(store_lb_id_1->load_record_map().find(kKey1)->second.start_count(), + v1.start_count()); + EXPECT_EQ(store_lb_id_1->GetNumCallsInProgressForReport(), v1.start_count()); + // No PerBalancerStore created for Unknown LB ID. + EXPECT_EQ(load_data_store.FindPerBalancerStore(kHostname1, kLbId2), nullptr); + EXPECT_EQ(load_data_store.FindPerBalancerStore(kHostname2, kLbId3), nullptr); + // End all the started RPCs for kLbId1. + LoadRecordValue v4(0, v1.start_count()); + load_data_store.MergeRow(kHostname1, kKey1, v4); + EXPECT_EQ(store_lb_id_1->load_record_map().size(), 1U); + EXPECT_EQ(store_lb_id_1->load_record_map().find(kKey1)->second.start_count(), + v1.start_count()); + EXPECT_EQ(store_lb_id_1->load_record_map().find(kKey1)->second.ok_count(), + v4.ok_count()); + EXPECT_EQ(store_lb_id_1->GetNumCallsInProgressForReport(), 0U); + EXPECT_FALSE(load_data_store.IsTrackedUnknownBalancerId(kLbId1)); + // End all the started RPCs for kLbId2. + LoadRecordValue v5(0, v2.start_count()); + load_data_store.MergeRow( + kHostname1, LoadRecordKey(kLbId2, kLbTag1, kUser1, kClientIp1), v5); + EXPECT_FALSE(load_data_store.IsTrackedUnknownBalancerId(kLbId2)); + // End some of the started RPCs for kLbId3. + LoadRecordValue v6(0, v3.start_count() / 2); + load_data_store.MergeRow( + kHostname2, LoadRecordKey(kLbId3, kLbTag1, kUser1, kClientIp1), v6); + EXPECT_TRUE(load_data_store.IsTrackedUnknownBalancerId(kLbId3)); +} + +TEST_F(PerBalancerStoreTest, Suspend) { + PerBalancerStore per_balancer_store(kLbId1, kLoadKey1); + EXPECT_FALSE(per_balancer_store.IsSuspended()); + // Suspend the store. + per_balancer_store.Suspend(); + EXPECT_TRUE(per_balancer_store.IsSuspended()); + EXPECT_EQ(0U, per_balancer_store.load_record_map().size()); + // Data merged when the store is suspended won't be kept. + LoadRecordValue v1(139, 19); + per_balancer_store.MergeRow(kKey1, v1); + EXPECT_EQ(0U, per_balancer_store.load_record_map().size()); + // Resume the store. + per_balancer_store.Resume(); + EXPECT_FALSE(per_balancer_store.IsSuspended()); + EXPECT_EQ(0U, per_balancer_store.load_record_map().size()); + // Data merged after the store is resumed will be kept. + LoadRecordValue v2(23, 0, 51); + per_balancer_store.MergeRow(kKey1, v2); + EXPECT_EQ(1U, per_balancer_store.load_record_map().size()); + // Suspend the store. + per_balancer_store.Suspend(); + EXPECT_TRUE(per_balancer_store.IsSuspended()); + EXPECT_EQ(0U, per_balancer_store.load_record_map().size()); + // Data merged when the store is suspended won't be kept. + LoadRecordValue v3(62, 11); + per_balancer_store.MergeRow(kKey1, v3); + EXPECT_EQ(0U, per_balancer_store.load_record_map().size()); + // Resume the store. + per_balancer_store.Resume(); + EXPECT_FALSE(per_balancer_store.IsSuspended()); + EXPECT_EQ(0U, per_balancer_store.load_record_map().size()); + // Data merged after the store is resumed will be kept. + LoadRecordValue v4(225, 98); + per_balancer_store.MergeRow(kKey1, v4); + EXPECT_EQ(1U, per_balancer_store.load_record_map().size()); + // In-progress count is always kept. + EXPECT_EQ(per_balancer_store.GetNumCallsInProgressForReport(), + v1.start_count() - v1.ok_count() + v2.start_count() - + v2.error_count() + v3.start_count() - v3.ok_count() + + v4.start_count() - v4.ok_count()); +} + +TEST_F(PerBalancerStoreTest, DataAggregation) { + PerBalancerStore per_balancer_store(kLbId1, kLoadKey1); + // Construct some Values. + LoadRecordValue v1(992, 34, 13, 234.0, 164.0, 173467.38); + v1.InsertCallMetric(kMetric1, CallMetricValue(3, 2773.2)); + LoadRecordValue v2(4842, 213, 9, 393.0, 974.0, 1345.2398); + v2.InsertCallMetric(kMetric1, CallMetricValue(7, 25.234)); + v2.InsertCallMetric(kMetric2, CallMetricValue(2, 387.08)); + // v3 doesn't change the number of in-progress RPCs. + LoadRecordValue v3(293, 55, 293 - 55, 28764, 5284, 5772); + v3.InsertCallMetric(kMetric1, CallMetricValue(61, 3465.0)); + v3.InsertCallMetric(kMetric2, CallMetricValue(13, 672.0)); + // The initial state of the store. + uint64_t num_calls_in_progress = 0; + EXPECT_FALSE(per_balancer_store.IsNumCallsInProgressChangedSinceLastReport()); + EXPECT_EQ(per_balancer_store.GetNumCallsInProgressForReport(), + num_calls_in_progress); + // Merge v1 and get report of the number of in-progress calls. + per_balancer_store.MergeRow(kKey1, v1); + EXPECT_TRUE(per_balancer_store.IsNumCallsInProgressChangedSinceLastReport()); + EXPECT_EQ(per_balancer_store.GetNumCallsInProgressForReport(), + num_calls_in_progress += + (v1.start_count() - v1.ok_count() - v1.error_count())); + EXPECT_FALSE(per_balancer_store.IsNumCallsInProgressChangedSinceLastReport()); + // Merge v2 and get report of the number of in-progress calls. + per_balancer_store.MergeRow(kKey2, v2); + EXPECT_TRUE(per_balancer_store.IsNumCallsInProgressChangedSinceLastReport()); + EXPECT_EQ(per_balancer_store.GetNumCallsInProgressForReport(), + num_calls_in_progress += + (v2.start_count() - v2.ok_count() - v2.error_count())); + EXPECT_FALSE(per_balancer_store.IsNumCallsInProgressChangedSinceLastReport()); + // Merge v3 and get report of the number of in-progress calls. + per_balancer_store.MergeRow(kKey1, v3); + EXPECT_FALSE(per_balancer_store.IsNumCallsInProgressChangedSinceLastReport()); + EXPECT_EQ(per_balancer_store.GetNumCallsInProgressForReport(), + num_calls_in_progress); + // LoadRecordValue for kKey1 is aggregated correctly. + LoadRecordValue value_for_key1 = + per_balancer_store.load_record_map().find(kKey1)->second; + EXPECT_EQ(value_for_key1.start_count(), v1.start_count() + v3.start_count()); + EXPECT_EQ(value_for_key1.ok_count(), v1.ok_count() + v3.ok_count()); + EXPECT_EQ(value_for_key1.error_count(), v1.error_count() + v3.error_count()); + EXPECT_EQ(value_for_key1.bytes_sent(), v1.bytes_sent() + v3.bytes_sent()); + EXPECT_EQ(value_for_key1.bytes_recv(), v1.bytes_recv() + v3.bytes_recv()); + EXPECT_EQ(value_for_key1.latency_ms(), v1.latency_ms() + v3.latency_ms()); + EXPECT_EQ(value_for_key1.call_metrics().size(), 2U); + EXPECT_EQ(value_for_key1.call_metrics().find(kMetric1)->second.num_calls(), + v1.call_metrics().find(kMetric1)->second.num_calls() + + v3.call_metrics().find(kMetric1)->second.num_calls()); + EXPECT_EQ( + value_for_key1.call_metrics().find(kMetric1)->second.total_metric_value(), + v1.call_metrics().find(kMetric1)->second.total_metric_value() + + v3.call_metrics().find(kMetric1)->second.total_metric_value()); + EXPECT_EQ(value_for_key1.call_metrics().find(kMetric2)->second.num_calls(), + v3.call_metrics().find(kMetric2)->second.num_calls()); + EXPECT_EQ( + value_for_key1.call_metrics().find(kMetric2)->second.total_metric_value(), + v3.call_metrics().find(kMetric2)->second.total_metric_value()); + // LoadRecordValue for kKey2 is aggregated (trivially) correctly. + LoadRecordValue value_for_key2 = + per_balancer_store.load_record_map().find(kKey2)->second; + EXPECT_EQ(value_for_key2.start_count(), v2.start_count()); + EXPECT_EQ(value_for_key2.ok_count(), v2.ok_count()); + EXPECT_EQ(value_for_key2.error_count(), v2.error_count()); + EXPECT_EQ(value_for_key2.bytes_sent(), v2.bytes_sent()); + EXPECT_EQ(value_for_key2.bytes_recv(), v2.bytes_recv()); + EXPECT_EQ(value_for_key2.latency_ms(), v2.latency_ms()); + EXPECT_EQ(value_for_key2.call_metrics().size(), 2U); + EXPECT_EQ(value_for_key2.call_metrics().find(kMetric1)->second.num_calls(), + v2.call_metrics().find(kMetric1)->second.num_calls()); + EXPECT_EQ( + value_for_key2.call_metrics().find(kMetric1)->second.total_metric_value(), + v2.call_metrics().find(kMetric1)->second.total_metric_value()); + EXPECT_EQ(value_for_key2.call_metrics().find(kMetric2)->second.num_calls(), + v2.call_metrics().find(kMetric2)->second.num_calls()); + EXPECT_EQ( + value_for_key2.call_metrics().find(kMetric2)->second.total_metric_value(), + v2.call_metrics().find(kMetric2)->second.total_metric_value()); +} + +} // namespace +} // namespace testing +} // namespace grpc + +int main(int argc, char** argv) { + grpc_test_init(argc, argv); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tools/codegen/core/gen_stats_data.py b/tools/codegen/core/gen_stats_data.py index 5c9d9e5ea52..3ebd00d049f 100755 --- a/tools/codegen/core/gen_stats_data.py +++ b/tools/codegen/core/gen_stats_data.py @@ -230,13 +230,11 @@ with open('src/core/lib/debug/stats_data.h', 'w') as H: print >> H, "#ifndef GRPC_CORE_LIB_DEBUG_STATS_DATA_H" print >> H, "#define GRPC_CORE_LIB_DEBUG_STATS_DATA_H" print >> H + print >> H, "#include " + print >> H print >> H, "#include " print >> H, "#include \"src/core/lib/iomgr/exec_ctx.h\"" print >> H - print >> H, "#ifdef __cplusplus" - print >> H, "extern \"C\" {" - print >> H, "#endif" - print >> H for typename, instances in sorted(inst_map.items()): print >> H, "typedef enum {" @@ -289,10 +287,6 @@ with open('src/core/lib/debug/stats_data.h', 'w') as H: print >> H, "extern void (*const grpc_stats_inc_histogram[%d])(int x);" % len( inst_map['Histogram']) - print >> H - print >> H, "#ifdef __cplusplus" - print >> H, "}" - print >> H, "#endif" print >> H print >> H, "#endif /* GRPC_CORE_LIB_DEBUG_STATS_DATA_H */" @@ -316,10 +310,13 @@ with open('src/core/lib/debug/stats_data.cc', 'w') as C: [C], ["Automatically generated by tools/codegen/core/gen_stats_data.py"]) - print >> C, "#include \"src/core/lib/debug/stats_data.h\"" + print >> C, "#include " + print >> C print >> C, "#include \"src/core/lib/debug/stats.h\"" + print >> C, "#include \"src/core/lib/debug/stats_data.h\"" + print >> C, "#include \"src/core/lib/gpr/useful.h\"" print >> C, "#include \"src/core/lib/iomgr/exec_ctx.h\"" - print >> C, "#include " + print >> C histo_code = [] for histogram in inst_map['Histogram']: diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index b5869a6231e..b28641156fa 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -895,7 +895,6 @@ src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balan src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.h \ src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc \ src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc \ -src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc \ src/core/ext/filters/client_channel/lb_policy/subchannel_list.h \ src/core/ext/filters/client_channel/lb_policy_factory.cc \ src/core/ext/filters/client_channel/lb_policy_factory.h \ diff --git a/tools/gce/create_windows_debug_worker.sh b/tools/gce/create_windows_debug_worker.sh index 3829b71e190..3625df832a6 100755 --- a/tools/gce/create_windows_debug_worker.sh +++ b/tools/gce/create_windows_debug_worker.sh @@ -44,12 +44,12 @@ gcloud compute disks create "$TMP_DISK_NAME" \ echo 'Created scratch disk, waiting for it to become available.' sleep 15 +# The image version might need updating. gcloud compute instances create "$INSTANCE_NAME" \ --project="$CLOUD_PROJECT" \ --zone "$ZONE" \ --machine-type "$MACHINE_TYPE" \ --image-project google.com:kokoro \ - # The version might need updating. --image kokoro-win7build-v11-prod-debug \ --boot-disk-size 500 \ --boot-disk-type pd-ssd \ diff --git a/tools/internal_ci/linux/grpc_bazel_on_foundry_base.sh b/tools/internal_ci/linux/grpc_bazel_on_foundry_base.sh index 3102992beff..d38356c9669 100755 --- a/tools/internal_ci/linux/grpc_bazel_on_foundry_base.sh +++ b/tools/internal_ci/linux/grpc_bazel_on_foundry_base.sh @@ -22,8 +22,8 @@ mkdir -p ${KOKORO_KEYSTORE_DIR} cp ${KOKORO_GFILE_DIR}/GrpcTesting-d0eeee2db331.json ${KOKORO_KEYSTORE_DIR}/4321_grpc-testing-service temp_dir=$(mktemp -d) -ln -f "${KOKORO_GFILE_DIR}/bazel-canary" ${temp_dir}/bazel -chmod 755 "${KOKORO_GFILE_DIR}/bazel-canary" +ln -f "${KOKORO_GFILE_DIR}/bazel-release-0.12.0" ${temp_dir}/bazel +chmod 755 "${KOKORO_GFILE_DIR}/bazel-release-0.12.0" export PATH="${temp_dir}:${PATH}" # This should show ${temp_dir}/bazel which bazel @@ -53,4 +53,14 @@ source tools/internal_ci/helper_scripts/prepare_build_linux_rc --crosstool_top=@com_github_bazelbuild_bazeltoolchains//configs/debian8_clang/0.3.0/bazel_0.10.0:toolchain \ --define GRPC_PORT_ISOLATED_RUNTIME=1 \ $1 \ - -- //test/... + -- //test/... || FAILED="true" + +if [ "$UPLOAD_TEST_RESULTS" != "" ] +then + python ./tools/run_tests/python_utils/upload_rbe_results.py +fi + +if [ "$FAILED" != "" ] +then + exit 1 +fi diff --git a/tools/internal_ci/linux/grpc_msan_on_foundry.sh b/tools/internal_ci/linux/grpc_msan_on_foundry.sh index 6858d971cb7..390ca3428de 100644 --- a/tools/internal_ci/linux/grpc_msan_on_foundry.sh +++ b/tools/internal_ci/linux/grpc_msan_on_foundry.sh @@ -23,8 +23,8 @@ mkdir -p ${KOKORO_KEYSTORE_DIR} cp ${KOKORO_GFILE_DIR}/GrpcTesting-d0eeee2db331.json ${KOKORO_KEYSTORE_DIR}/4321_grpc-testing-service temp_dir=$(mktemp -d) -ln -f "${KOKORO_GFILE_DIR}/bazel-canary" ${temp_dir}/bazel -chmod 755 "${KOKORO_GFILE_DIR}/bazel-canary" +ln -f "${KOKORO_GFILE_DIR}/bazel-release-0.12.0" ${temp_dir}/bazel +chmod 755 "${KOKORO_GFILE_DIR}/bazel-release-0.12.0" export PATH="${temp_dir}:${PATH}" # This should show ${temp_dir}/bazel which bazel diff --git a/tools/internal_ci/linux/grpc_ubsan_on_foundry.sh b/tools/internal_ci/linux/grpc_ubsan_on_foundry.sh index 0f0c12db12b..ba50011e5b8 100644 --- a/tools/internal_ci/linux/grpc_ubsan_on_foundry.sh +++ b/tools/internal_ci/linux/grpc_ubsan_on_foundry.sh @@ -23,8 +23,8 @@ mkdir -p ${KOKORO_KEYSTORE_DIR} cp ${KOKORO_GFILE_DIR}/GrpcTesting-d0eeee2db331.json ${KOKORO_KEYSTORE_DIR}/4321_grpc-testing-service temp_dir=$(mktemp -d) -ln -f "${KOKORO_GFILE_DIR}/bazel-canary" ${temp_dir}/bazel -chmod 755 "${KOKORO_GFILE_DIR}/bazel-canary" +ln -f "${KOKORO_GFILE_DIR}/bazel-release-0.12.0" ${temp_dir}/bazel +chmod 755 "${KOKORO_GFILE_DIR}/bazel-release-0.12.0" export PATH="${temp_dir}:${PATH}" # This should show ${temp_dir}/bazel which bazel diff --git a/tools/run_tests/artifacts/build_artifact_python.sh b/tools/run_tests/artifacts/build_artifact_python.sh index 9ea0f05660a..cd794a1391b 100755 --- a/tools/run_tests/artifacts/build_artifact_python.sh +++ b/tools/run_tests/artifacts/build_artifact_python.sh @@ -17,7 +17,6 @@ set -ex cd "$(dirname "$0")/../../.." -export GRPC_PYTHON_USE_CUSTOM_BDIST=0 export GRPC_PYTHON_BUILD_WITH_CYTHON=1 export PYTHON=${PYTHON:-python} export PIP=${PIP:-pip} diff --git a/tools/run_tests/generated/sources_and_headers.json b/tools/run_tests/generated/sources_and_headers.json index 41722e063d5..6659724fb4c 100644 --- a/tools/run_tests/generated/sources_and_headers.json +++ b/tools/run_tests/generated/sources_and_headers.json @@ -3903,6 +3903,26 @@ "third_party": false, "type": "target" }, + { + "deps": [ + "gpr", + "gpr_test_util", + "grpc", + "grpc++", + "grpc++_test_util", + "grpc_test_util", + "lb_load_data_store" + ], + "headers": [], + "is_filegroup": false, + "language": "c++", + "name": "lb_load_data_store_test", + "src": [ + "test/cpp/server/load_reporter/load_data_store_test.cc" + ], + "third_party": false, + "type": "target" + }, { "deps": [ "gpr", @@ -7467,6 +7487,23 @@ "third_party": false, "type": "lib" }, + { + "deps": [ + "grpc++" + ], + "headers": [ + "src/cpp/server/load_reporter/load_data_store.h" + ], + "is_filegroup": false, + "language": "c++", + "name": "lb_load_data_store", + "src": [ + "src/cpp/server/load_reporter/load_data_store.cc", + "src/cpp/server/load_reporter/load_data_store.h" + ], + "third_party": false, + "type": "lib" + }, { "deps": [ "grpc", @@ -9863,7 +9900,6 @@ "language": "c", "name": "grpc_lb_subchannel_list", "src": [ - "src/core/ext/filters/client_channel/lb_policy/subchannel_list.cc", "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h" ], "third_party": false, diff --git a/tools/run_tests/generated/tests.json b/tools/run_tests/generated/tests.json index 067d9b300d3..8eb5303e823 100644 --- a/tools/run_tests/generated/tests.json +++ b/tools/run_tests/generated/tests.json @@ -4389,6 +4389,30 @@ ], "uses_polling": true }, + { + "args": [], + "benchmark": false, + "ci_platforms": [ + "linux", + "mac", + "posix", + "windows" + ], + "cpu_cost": 1.0, + "exclude_configs": [], + "exclude_iomgrs": [], + "flaky": false, + "gtest": true, + "language": "c++", + "name": "lb_load_data_store_test", + "platforms": [ + "linux", + "mac", + "posix", + "windows" + ], + "uses_polling": true + }, { "args": [], "benchmark": false, diff --git a/tools/run_tests/python_utils/upload_rbe_results.py b/tools/run_tests/python_utils/upload_rbe_results.py new file mode 100644 index 00000000000..d3020248832 --- /dev/null +++ b/tools/run_tests/python_utils/upload_rbe_results.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python +# Copyright 2017 gRPC authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Uploads RBE results to BigQuery""" + +import argparse +import os +import json +import sys +import urllib2 +import uuid + +gcp_utils_dir = os.path.abspath( + os.path.join(os.path.dirname(__file__), '../../gcp/utils')) +sys.path.append(gcp_utils_dir) +import big_query_utils + +_DATASET_ID = 'jenkins_test_results' +_DESCRIPTION = 'Test results from master RBE builds on Kokoro' +# 90 days in milliseconds +_EXPIRATION_MS = 90 * 24 * 60 * 60 * 1000 +_PARTITION_TYPE = 'DAY' +_PROJECT_ID = 'grpc-testing' +_RESULTS_SCHEMA = [ + ('job_name', 'STRING', 'Name of Kokoro job'), + ('build_id', 'INTEGER', 'Build ID of Kokoro job'), + ('build_url', 'STRING', 'URL of Kokoro build'), + ('test_target', 'STRING', 'Bazel target path'), + ('test_case', 'STRING', 'Name of test case'), + ('result', 'STRING', 'Test or build result'), + ('timestamp', 'TIMESTAMP', 'Timestamp of test run'), +] +_TABLE_ID = 'rbe_test_results' + + +def _get_api_key(): + """Returns string with API key to access ResultStore. + Intended to be used in Kokoro envrionment.""" + api_key_directory = os.getenv('KOKORO_GFILE_DIR') + api_key_file = os.path.join(api_key_directory, 'resultstore_api_key') + assert os.path.isfile(api_key_file), 'Must add --api_key arg if not on ' \ + 'Kokoro or Kokoro envrionment is not set up properly.' + with open(api_key_file, 'r') as f: + return f.read().replace('\n', '') + + +def _get_invocation_id(): + """Returns String of Bazel invocation ID. Intended to be used in + Kokoro envirionment.""" + bazel_id_directory = os.getenv('KOKORO_ARTIFACTS_DIR') + bazel_id_file = os.path.join(bazel_id_directory, 'bazel_invocation_ids') + assert os.path.isfile(bazel_id_file), 'bazel_invocation_ids file, written ' \ + 'by bazel_wrapper.py, expected but not found.' + with open(bazel_id_file, 'r') as f: + return f.read().replace('\n', '') + + +def _upload_results_to_bq(rows): + """Upload test results to a BQ table. + + Args: + rows: A list of dictionaries containing data for each row to insert + """ + bq = big_query_utils.create_big_query() + big_query_utils.create_partitioned_table( + bq, + _PROJECT_ID, + _DATASET_ID, + _TABLE_ID, + _RESULTS_SCHEMA, + _DESCRIPTION, + partition_type=_PARTITION_TYPE, + expiration_ms=_EXPIRATION_MS) + + max_retries = 3 + for attempt in range(max_retries): + if big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET_ID, _TABLE_ID, + rows): + break + else: + if attempt < max_retries - 1: + print('Error uploading result to bigquery, will retry.') + else: + print( + 'Error uploading result to bigquery, all attempts failed.') + sys.exit(1) + + +def _get_resultstore_data(api_key, invocation_id): + """Returns dictionary of test results by querying ResultStore API. + Args: + api_key: String of ResultStore API key + invocation_id: String of ResultStore invocation ID to results from + """ + all_actions = [] + page_token = '' + # ResultStore's API returns data on a limited number of tests. When we exceed + # that limit, the 'nextPageToken' field is included in the request to get + # subsequent data, so keep requesting until 'nextPageToken' field is omitted. + while True: + req = urllib2.Request( + url= + 'https://resultstore.googleapis.com/v2/invocations/%s/targets/-/configuredTargets/-/actions?key=%s&pageToken=%s' + % (invocation_id, api_key, page_token), + headers={ + 'Content-Type': 'application/json' + }) + results = json.loads(urllib2.urlopen(req).read()) + all_actions.extend(results['actions']) + if 'nextPageToken' not in results: + break + page_token = results['nextPageToken'] + return all_actions + + +if __name__ == "__main__": + # Arguments are necessary if running in a non-Kokoro envrionment. + argp = argparse.ArgumentParser(description='Upload RBE results.') + argp.add_argument('--api_key', default='', type=str) + argp.add_argument('--invocation_id', default='', type=str) + args = argp.parse_args() + + api_key = args.api_key or _get_api_key() + invocation_id = args.invocation_id or _get_invocation_id() + resultstore_actions = _get_resultstore_data(api_key, invocation_id) + + bq_rows = [] + for action in resultstore_actions: + # Filter out non-test related data, such as build results. + if 'testAction' not in action: + continue + # Some test results contain the fileProcessingErrors field, which indicates + # an issue with parsing results individual test cases. + if 'fileProcessingErrors' in action: + test_cases = [{ + 'testCase': { + 'caseName': str(action['id']['actionId']), + 'result': str(action['statusAttributes']['status']) + } + }] + else: + test_cases = action['testAction']['testSuite']['tests'][0][ + 'testSuite']['tests'] + for test_case in test_cases: + if 'errors' in test_case['testCase']: + result = 'FAILED' + else: + result = 'PASSED' + bq_rows.append({ + 'insertId': str(uuid.uuid4()), + 'json': { + 'job_name': + os.getenv('KOKORO_JOB_NAME'), + 'build_id': + os.getenv('KOKORO_BUILD_NUMBER'), + 'build_url': + 'https://sponge.corp.google.com/invocation?id=%s' % + os.getenv('KOKORO_BUILD_ID'), + 'test_target': + action['id']['targetId'], + 'test_case': + test_case['testCase']['caseName'], + 'result': + result, + 'timestamp': + action['timing']['startTime'], + } + }) + # BigQuery sometimes fails with large uploads, so batch 1,000 rows at a time. + for i in range((len(bq_rows) / 1000) + 1): + _upload_results_to_bq(bq_rows[i * 1000:(i + 1) * 1000])