Browse Source

H2O: Set thread memory allocation policy (#3163)

In particular, this change ensures that each thread stack is allocated
on the memory node the thread is currently running on.
Anton Kirilov 7 years ago
parent
commit
d11851d616

+ 1 - 1
frameworks/C/h2o/CMakeLists.txt

@@ -16,7 +16,7 @@ include_directories(src ${H2O_INCLUDE} ${MUSTACHE_C_INCLUDE} ${YAJL_INCLUDE})
 file(GLOB SOURCES "src/*.c")
 add_executable(${PROJECT_NAME} ${SOURCES})
 target_link_libraries(${PROJECT_NAME} ${COMMON_OPTIONS})
-target_link_libraries(${PROJECT_NAME} ${H2O_LIB} ssl crypto pq z ${MUSTACHE_C_LIB} ${YAJL_LIB})
+target_link_libraries(${PROJECT_NAME} ${H2O_LIB} ssl crypto numa pq z ${MUSTACHE_C_LIB} ${YAJL_LIB})
 install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin)
 file(GLOB TEMPLATES "template/*")
 install(FILES ${TEMPLATES} DESTINATION share/${PROJECT_NAME}/template)

+ 3 - 2
frameworks/C/h2o/src/bitset.h

@@ -34,8 +34,9 @@ typedef uint_fast32_t bitset_base_t;
 #define BITSET_SET(i, b) bitset_set((i), (b), sizeof(b) * CHAR_BIT)
 // Use a designated initializer to set all array elements to zero.
 #define DEFINE_BITSET(b, s) \
-	assert(s); \
-	bitset_base_t (b)[((s) - 1) / (sizeof(bitset_base_t) * CHAR_BIT) + 1] = {[0] = 0}
+	bitset_base_t (b)[ \
+		((s) + sizeof(bitset_base_t) * CHAR_BIT - 1) / (sizeof(bitset_base_t) * CHAR_BIT)] = \
+		{[0] = 0}
 
 static inline bool bitset_isset(size_t i, bitset_base_t *b, size_t num)
 {

+ 38 - 0
frameworks/C/h2o/src/thread.c

@@ -21,10 +21,13 @@
 
 #include <errno.h>
 #include <h2o.h>
+#include <limits.h>
+#include <numaif.h>
 #include <pthread.h>
 #include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
 #include <h2o/serverutil.h>
 #include <sys/syscall.h>
 
@@ -35,12 +38,14 @@
 #include "utility.h"
 
 static void *run_thread(void *arg);
+static void set_thread_memory_allocation_policy(size_t thread_num);
 
 static void *run_thread(void *arg)
 {
 	thread_context_t ctx;
 
 	initialize_thread_context(arg, false, &ctx);
+	set_thread_memory_allocation_policy(ctx.config->thread_num);
 	// This is just an optimization, so that the application does not try to
 	// establish database connections in the middle of servicing requests.
 	connect_to_database(&ctx);
@@ -49,6 +54,39 @@ static void *run_thread(void *arg)
 	pthread_exit(NULL);
 }
 
+static void set_thread_memory_allocation_policy(size_t thread_num)
+{
+	// There is no need to set a memory allocation policy unless
+	// the application controls the processor affinity as well.
+	if (thread_num % h2o_numproc())
+		return;
+
+	void *stack_addr;
+	size_t stack_size;
+	unsigned memory_node;
+	pthread_attr_t attr;
+
+	CHECK_ERRNO(syscall, SYS_getcpu, NULL, &memory_node, NULL);
+	CHECK_ERROR(pthread_getattr_np, pthread_self(), &attr);
+	CHECK_ERROR(pthread_attr_getstack, &attr, &stack_addr, &stack_size);
+	pthread_attr_destroy(&attr);
+
+	unsigned long nodemask[
+		(memory_node + sizeof(unsigned long) * CHAR_BIT) / (sizeof(unsigned long) * CHAR_BIT)];
+
+	memset(nodemask, 0, sizeof(nodemask));
+	nodemask[memory_node / (sizeof(*nodemask) * CHAR_BIT)] |=
+		1UL << (memory_node % (sizeof(*nodemask) * CHAR_BIT));
+	CHECK_ERRNO(mbind,
+	            stack_addr,
+	            stack_size,
+	            MPOL_PREFERRED,
+	            nodemask,
+	            memory_node + 1,
+	            MPOL_MF_MOVE | MPOL_MF_STRICT);
+	CHECK_ERRNO(set_mempolicy, MPOL_PREFERRED, NULL, 0);
+}
+
 void free_thread_context(thread_context_t *ctx)
 {
 	free_database_state(ctx->event_loop.h2o_ctx.loop, &ctx->db_state);

+ 1 - 1
toolset/setup/linux/prerequisites.sh

@@ -28,7 +28,7 @@ sudo apt-get -qqy install -o Dpkg::Options::="--force-confdef" -o Dpkg::Options:
   libpq-dev mlton \
   cloc dstat                        `# Collect resource usage statistics` \
   python-dev \
-  python-pip re2c
+  python-pip re2c libnuma-dev
 
 sudo pip install colorama==0.3.1
 # Version 2.3 has a nice Counter() and other features