More work on benchmarks (performance tests).

16 years ago · 306bc554c6
parent 1733b58257
commit 306bc554c6
8 changed files with 103 additions and 23 deletions
--- a/18
+++ b/18
@ -2,7 +2,7 @@
 # Function to expand a wildcard pattern recursively.
 rwildcard=$(strip $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2)$(filter $(subst *,%,$2),$d)))

-.PHONY: all clean test benchmarks
+.PHONY: all clean test benchmarks benchmark
 CC=gcc
 CXX=g++
 CFLAGS=-std=c99
@ -33,15 +33,21 @@ tests/tests: src/libupb.a
 tools/upbc: src/libupb.a

 # Benchmarks
-BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \
-           benchmarks/b.parsetostruct_googlemessage1.upb_table_byref \
-           benchmarks/b.parsetostruct_googlemessage2.upb_table_byval \
-           benchmarks/b.parsetostruct_googlemessage2.upb_table_byref \
+UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \
+               benchmarks/b.parsetostruct_googlemessage1.upb_table_byref \
+               benchmarks/b.parsetostruct_googlemessage2.upb_table_byval \
+               benchmarks/b.parsetostruct_googlemessage2.upb_table_byref
+
+BENCHMARKS=$(UPB_BENCHMARKS) \
           benchmarks/b.parsetostruct_googlemessage1.proto2_table \
           benchmarks/b.parsetostruct_googlemessage2.proto2_table \
           benchmarks/b.parsetostruct_googlemessage1.proto2_compiled \
           benchmarks/b.parsetostruct_googlemessage2.proto2_compiled
+upb_benchmarks: $(UPB_BENCHMARKS)
 benchmarks: $(BENCHMARKS)
+benchmark:
+	@rm -f benchmarks/results
+	@for test in benchmarks/b.* ; do ./$$test ; done

 benchmarks/google_messages.proto.pb: benchmarks/google_messages.proto
 	# TODO: replace with upbc.
@ -107,4 +113,4 @@ benchmarks/b.parsetostruct_googlemessage2.proto2_compiled: \

 -include deps
 deps: $(SRC) $(HEADERS) gen-deps.sh Makefile
-	./gen-deps.sh $(SRC)
+	@./gen-deps.sh $(SRC)
--- a/benchmarks/main.c
+++ b/benchmarks/main.c
@ -5,9 +5,15 @@
 #include <unistd.h>
 #include <string.h>

+/* Cycle between a bunch of different messages, to avoid performance
+ * variations due to memory effects of a particular allocation pattern. */
+#ifndef NUM_MESSAGES
+#define NUM_MESSAGES 32
+#endif
+
 static bool initialize();
 static void cleanup();
-static size_t run();
+static size_t run(int i);

 int main (int argc, char *argv[])
 {
@ -32,7 +38,7 @@ int main (int argc, char *argv[])
  clock_t before = clock();
  for(int i = 0; true; i++) {
    if((i & 0xFF) == 0 && (clock() - before > CLOCKS_PER_SEC)) break;
-    size_t bytes = run();
+    size_t bytes = run(i);
    if(bytes == 0) {
      fprintf(stderr, "%s: failed.\n", argv[0]);
      return 2;
--- a/benchmarks/parsetostruct.proto2_compiled.cc
+++ b/benchmarks/parsetostruct.proto2_compiled.cc
@ -7,7 +7,7 @@
 #include <fstream>

 static std::string str;
-MESSAGE_CIDENT msg;
+MESSAGE_CIDENT msg[NUM_MESSAGES];

 static bool initialize()
 {
@ -27,9 +27,9 @@ static void cleanup()
 {
 }

-static size_t run()
+static size_t run(int i)
 {
-  if(!msg.ParseFromString(str)) {
+  if(!msg[i%NUM_MESSAGES].ParseFromString(str)) {
    fprintf(stderr, "Error parsing with proto2.\n");
    return 0;
  }
--- a/benchmarks/parsetostruct.proto2_table.cc
+++ b/benchmarks/parsetostruct.proto2_table.cc
@ -8,7 +8,7 @@

 static std::string str;
 static google::protobuf::DynamicMessageFactory factory;
-static google::protobuf::Message *msg;
+static google::protobuf::Message *msg[NUM_MESSAGES];

 static bool initialize()
 {
@ -25,18 +25,20 @@ static bool initialize()
  /* Create the DynamicMessage. */
  const google::protobuf::Message *dynamic_msg_prototype =
      factory.GetPrototype(MESSAGE_CIDENT::descriptor());
-  msg = dynamic_msg_prototype->New();
+  for(int i = 0; i < NUM_MESSAGES; i++)
+    msg[i] = dynamic_msg_prototype->New();
  return true;
 }

 static void cleanup()
 {
-  delete msg;
+  for(int i = 0; i < NUM_MESSAGES; i++)
+    delete msg[i];
 }

-static size_t run()
+static size_t run(int i)
 {
-  if(!msg->ParseFromString(str)) {
+  if(!msg[i%NUM_MESSAGES]->ParseFromString(str)) {
    fprintf(stderr, "Error parsing with proto2.\n");
    return 0;
  }
--- a/benchmarks/parsetostruct.upb_table.c
+++ b/benchmarks/parsetostruct.upb_table.c
@ -8,7 +8,7 @@ static struct upb_context c;
 static struct upb_string str;
 static struct upb_msg_parse_state s;
 static struct upb_msg *m;
-static void *data;
+static void *data[NUM_MESSAGES];

 static bool initialize()
 {
@ -37,8 +37,8 @@ static bool initialize()
  }

  m = e->ref.msg;
-  data = upb_msgdata_new(m);
-  upb_msg_parse_init(&s, data, m, false, true);
+  for(int i = 0; i < 32; i++)
+    data[i] = upb_msgdata_new(m);

  /* Read the message data itself. */
  if(!upb_strreadfile(MESSAGE_FILE, &str)) {
@ -50,17 +50,19 @@ static bool initialize()

 static void cleanup()
 {
+  for(int i = 0; i < 32; i++)
+    upb_msgdata_free(data[i], m, true);
  upb_strfree(str);
  upb_context_free(&c);
 }

-static size_t run()
+static size_t run(int i)
 {
  size_t read;
-  upb_msg_parse_reset(&s, data, m, false, BYREF);
+  upb_msg_parse_reset(&s, data[i%NUM_MESSAGES], m, false, BYREF);
  upb_status_t status = upb_msg_parse(&s, str.ptr, str.byte_len, &read);
  if(status != UPB_STATUS_OK && read != str.byte_len) {
-    fprintf(stderr, "Error. :(  error=%d, read=%lu\n", status, read);
+    fprintf(stderr, "Error. :(  error=%d, read=%zu\n", status, read);
    return 0;
  }
  return read;
--- a/2
+++ b/2
@ -1 +1 @@
-DNDEBUG -fomit-frame-pointer
+-DNDEBUG -fomit-frame-pointer -msse3
--- a/perf-regression-test.py
+++ b/perf-regression-test.py
@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# Use to test the current working directory's performance against HEAD.
+
+import os
+
+os.system("""
+set -e
+set -v
+
+# Generate numbers for baseline.
+rm -rf perf-tmp
+git clone . perf-tmp
+(cd perf-tmp && ./perf-tests.sh upb)
+cp perf-tmp/perf-tests.out perf-tests.baseline
+
+# Generate numbers for working directory.
+./perf-tests.sh upb""")
+
+baseline = {}
+baseline_file = open("perf-tests.baseline")
+for line in baseline_file:
+  test, speed = line.split(":")
+  baseline[test] = int(speed)
+
+wd_file = open("perf-tests.out")
+for line in wd_file:
+  test, speed = line.split(":")
+  baseline_val = baseline[test]
+  change = float(int(speed) - baseline_val) / float(baseline_val) * 100
+  print "%s: %d -> %d (%f)" % (test, baseline_val, speed, change)
--- a/perf-tests.sh
+++ b/perf-tests.sh
@ -0,0 +1,34 @@
+#!/bin/sh
+# Builds and runs all available benchmarks.  The tree will be built
+# multiple times with a few different compiler flag combinations.
+# The output will be dumped to stdout and to perf-tests.out.
+
+MAKETARGET=benchmarks
+if [ x$1 == xupb ]; then
+  MAKETARGET=upb_benchmarks
+fi
+
+rm -f perf-tests.out
+
+make clean
+echo "-DNDEBUG -msse3" > perf-cppflags
+make $MAKETARGET
+make benchmark | sed -e 's/^/plain./g' | tee -a perf-tests.out
+
+make clean
+echo "-DNDEBUG -fomit-frame-pointer -msse3" > perf-cppflags
+make $MAKETARGET
+make benchmark | sed -e 's/^/omitfp./g' | tee -a perf-tests.out
+
+if [ x`uname -m` == xx86_64 ]; then
+  make clean
+  echo "-DNDEBUG -msse3 -m32" > perf-cppflags
+  make upb_benchmarks
+  make benchmark | sed -e 's/^/plain32./g' | tee -a perf-tests.out
+
+  make clean
+  echo "-DNDEBUG -fomit-frame-pointer -msse3 -m32" > perf-cppflags
+  make upb_benchmarks
+  make benchmark | sed -e 's/^/omitfp32./g' | tee -a perf-tests.out
+fi
+