Browse Source

H2O: Optimize the implementation further for round 14 (#2694)

* Make sure that at most 2 database connections are used for each
logical CPU core the database server has in the cloud environment.
* Increase the maximum number of connections accepted simultaneously
even more.
* Enable link-time optimization for H2O.
* Set the CPU affinity mask before starting each thread, so that on
NUMA systems newly allocated memory is on the same node as the
execution.
* Print error messages if there are issues when generating the
database update query.
Anton Kirilov 8 years ago
parent
commit
5bb1cc6172

+ 9 - 7
frameworks/C/h2o/setup.sh

@@ -4,20 +4,22 @@ fw_depends postgresql h2o mustache-c yajl
 
 H2O_APP_HOME="${IROOT}/h2o_app"
 BUILD_DIR="${H2O_APP_HOME}_build"
-H2O_APP_PROFILE_PORT="54321"
+# Use 2 database connections for each logical CPU core the database server has.
+DB_CONN=2
+H2O_APP_PROFILE_PORT=54321
 H2O_APP_PROFILE_URL="http://127.0.0.1:$H2O_APP_PROFILE_PORT"
 NUM_PROC=$(nproc)
+PHYSICAL_ENVIRONMENT_THREADS=16
 
 # A hacky way to detect whether we are running in the physical hardware or the cloud environment.
 if [[ "$NUM_PROC" -gt 16 ]]; then
 	CLOUD_ENVIRONMENT=false
-	# In the physical hardware environment the application server has more CPU cores than the
-	# database server, so we need to reduce the maximum number of database connections per
-	# thread accordingly.
+	# In the physical hardware environment the number of threads used by the application is not
+	# the same as the number of logical CPU cores that the database server has, so we need to
+	# adjust the maximum number of database connections per thread accordingly.
 	DB_CONN=4
 else
 	CLOUD_ENVIRONMENT=true
-	DB_CONN=8
 fi
 
 build_h2o_app()
@@ -38,7 +40,7 @@ run_curl()
 
 run_h2o_app()
 {
-	taskset -c "$1" "$2/h2o_app" -f "$3/template/fortunes.mustache" -m "$DB_CONN" "$4" "$5" \
+	taskset -c "$1" "$2/h2o_app" -a20 -f "$3/template/fortunes.mustache" -m "$DB_CONN" "$4" "$5" \
 		-d "host=TFB-database dbname=hello_world user=benchmarkdbuser password=benchmarkdbpass" &
 }
 
@@ -72,7 +74,7 @@ echo "Maximum database connections per thread: $DB_CONN"
 if "$CLOUD_ENVIRONMENT"; then
 	run_h2o_app "0-$((NUM_PROC - 1))" "${H2O_APP_HOME}/bin" "${H2O_APP_HOME}/share/h2o_app"
 else
-	for ((i = 0; i < 16; i++)); do
+	for ((i = 0; i < PHYSICAL_ENVIRONMENT_THREADS; i++)); do
 		run_h2o_app "$i" "${H2O_APP_HOME}/bin" "${H2O_APP_HOME}/share/h2o_app" -t1
 	done
 fi

+ 2 - 4
frameworks/C/h2o/src/event_loop.c

@@ -108,7 +108,7 @@ static void do_epoll_wait(h2o_socket_t *epoll_sock, const char *err)
 		struct epoll_event event[MAX_EPOLL_EVENTS];
 
 		do
-			ready = epoll_wait(ctx->event_loop.epoll_fd, event, MAX_EPOLL_EVENTS, 0);
+			ready = epoll_wait(ctx->event_loop.epoll_fd, event, ARRAY_SIZE(event), 0);
 		while (ready < 0 && errno == EINTR);
 
 		if (ready > 0)
@@ -141,9 +141,7 @@ static int get_listener_socket(const char *bind_address, uint16_t port)
 		return ret;
 	}
 
-	struct addrinfo *iter = res;
-
-	for (; iter; iter = iter->ai_next) {
+	for (const struct addrinfo *iter = res; iter; iter = iter->ai_next) {
 		const int s = socket(iter->ai_family,
 		                     iter->ai_socktype | SOCK_NONBLOCK | SOCK_CLOEXEC,
 		                     iter->ai_protocol);

+ 22 - 15
frameworks/C/h2o/src/thread.c

@@ -93,33 +93,40 @@ void initialize_thread_context(global_thread_data_t *global_thread_data,
 
 void start_threads(global_thread_data_t *global_thread_data)
 {
+	pthread_attr_t attr;
 	const size_t num_cpus = h2o_numproc();
+	const size_t cpusetsize = CPU_ALLOC_SIZE(num_cpus);
+	cpu_set_t * const cpuset = CPU_ALLOC(num_cpus);
 
+	if (!cpuset)
+		abort();
+
+	CHECK_ERROR(pthread_attr_init, &attr);
 	// The first thread context is used by the main thread.
 	global_thread_data->thread = pthread_self();
 
-	for (size_t i = global_thread_data->config->thread_num - 1; i > 0; i--)
-		CHECK_ERROR(pthread_create,
-		            &global_thread_data[i].thread,
-		            NULL,
-		            run_thread,
-		            global_thread_data + i);
-
 	// If the number of threads is not equal to the number of processors, then let the scheduler
 	// decide how to balance the load.
 	if (global_thread_data->config->thread_num == num_cpus) {
-		const size_t cpusetsize = CPU_ALLOC_SIZE(num_cpus);
-		cpu_set_t * const cpuset = CPU_ALLOC(num_cpus);
-
-		if (!cpuset)
-			abort();
+		CPU_ZERO_S(cpusetsize, cpuset);
+		CPU_SET_S(0, cpusetsize, cpuset);
+		CHECK_ERROR(pthread_setaffinity_np, global_thread_data->thread, cpusetsize, cpuset);
+	}
 
-		for (size_t i = 0; i < global_thread_data->config->thread_num; i++) {
+	for (size_t i = global_thread_data->config->thread_num - 1; i > 0; i--) {
+		if (global_thread_data->config->thread_num == num_cpus) {
 			CPU_ZERO_S(cpusetsize, cpuset);
 			CPU_SET_S(i, cpusetsize, cpuset);
-			CHECK_ERROR(pthread_setaffinity_np, global_thread_data[i].thread, cpusetsize, cpuset);
+			CHECK_ERROR(pthread_attr_setaffinity_np, &attr, cpusetsize, cpuset);
 		}
 
-		CPU_FREE(cpuset);
+		CHECK_ERROR(pthread_create,
+		            &global_thread_data[i].thread,
+		            &attr,
+		            run_thread,
+		            global_thread_data + i);
 	}
+
+	pthread_attr_destroy(&attr);
+	CPU_FREE(cpuset);
 }

+ 4 - 3
frameworks/C/h2o/src/world.c

@@ -220,7 +220,7 @@ static void do_updates(multiple_query_ctx_t *query_ctx)
 	                 query_ctx->res->id,
 	                 query_ctx->res->random_number);
 
-	if (c < 0 || (size_t) c >= sz)
+	if ((size_t) c >= sz)
 		goto error;
 
 	iter += c;
@@ -234,7 +234,7 @@ static void do_updates(multiple_query_ctx_t *query_ctx)
 		             query_ctx->res[i].id,
 		             query_ctx->res[i].random_number);
 
-		if (c < 0 || (size_t) c >= sz)
+		if ((size_t) c >= sz)
 			goto error;
 
 		iter += c;
@@ -243,7 +243,7 @@ static void do_updates(multiple_query_ctx_t *query_ctx)
 
 	c = snprintf(iter, sz, UPDATE_QUERY_END);
 
-	if (c < 0 || (size_t) c >= sz)
+	if ((size_t) c >= sz)
 		goto error;
 
 	if (execute_query(ctx, &query_ctx->query_param->param))
@@ -255,6 +255,7 @@ static void do_updates(multiple_query_ctx_t *query_ctx)
 
 	return;
 error:
+	LIBRARY_ERROR("snprintf", "Truncated output.");
 	send_error(INTERNAL_SERVER_ERROR, REQ_ERROR, query_ctx->req);
 }
 

+ 2 - 1
toolset/setup/linux/webservers/h2o.sh

@@ -11,7 +11,8 @@ pushd "${IROOT}"
 fw_get -O "https://github.com/h2o/h2o/archive/$ARCHIVE"
 fw_untar "$ARCHIVE"
 pushd "$BUILD_DIR"
-cmake -DCMAKE_INSTALL_PREFIX="$H2O_HOME" -DCMAKE_C_FLAGS="-march=native"
+cmake -DCMAKE_INSTALL_PREFIX="$H2O_HOME" -DCMAKE_C_FLAGS="-flto -march=native" \
+	-DCMAKE_AR=/usr/bin/gcc-ar -DCMAKE_RANLIB=/usr/bin/gcc-ranlib
 make -j "$(nproc)" install
 popd
 rm -rf "$BUILD_DIR"