瀏覽代碼

Reaper update (#2825)

Moved process killing to the TFBReaper.
Mike Smith 8 年之前
父節點
當前提交
3a3647ed32
共有 3 個文件被更改,包括 89 次插入69 次删除
  1. 13 67
      toolset/benchmark/benchmarker.py
  2. 1 1
      toolset/benchmark/framework_test.py
  3. 75 1
      toolset/setup/linux/TFBReaper.c

+ 13 - 67
toolset/benchmark/benchmarker.py

@@ -483,6 +483,7 @@ class Benchmarker:
                     if test_process.exitcode != 0:
                         error_happened = True
             pbar.finish()
+
         if os.path.isfile(self.current_benchmark):
             os.remove(self.current_benchmark)
         logging.debug("End __run_tests.")
@@ -563,8 +564,9 @@ class Benchmarker:
                     return exit_with_code(1)
 
                 result, process = test.start(out)
+                self.__process = process
                 if result != 0:
-                    self.__stop_test(out, process)
+                    self.__process.terminate()
                     time.sleep(5)
                     out.write( "ERROR: Problem starting {name}\n".format(name=test.name) )
                     out.flush()
@@ -607,13 +609,13 @@ class Benchmarker:
                 ##########################
                 out.write(header("Stopping %s" % test.name))
                 out.flush()
-                self.__stop_test(out, process)
+                self.__process.terminate()
                 out.flush()
                 time.sleep(5)
 
                 if self.__is_port_bound(test.port):
                     # This can happen sometimes - let's try again
-                    self.__stop_test(out, process)
+                    self.__process.terminate()
                     out.flush()
                     time.sleep(5)
                     if self.__is_port_bound(test.port):
@@ -671,28 +673,16 @@ class Benchmarker:
                 if self.mode == "verify" and not passed_verify:
                     print "Failed verify!"
                     return exit_with_code(1)
+            except KeyboardInterrupt:
+                if self.__process is not None:
+                    self.__process.terminate()
             except (OSError, IOError, subprocess.CalledProcessError) as e:
                 self.__write_intermediate_results(test.name,"<setup.py> raised an exception")
                 out.write(header("Subprocess Error %s" % test.name))
                 traceback.print_exc(file=out)
                 out.flush()
-                try:
-                    self.__stop_test(out, process)
-                except (subprocess.CalledProcessError) as e:
-                    self.__write_intermediate_results(test.name,"<setup.py>#stop() raised an error")
-                    out.write(header("Subprocess Error: Test .stop() raised exception %s" % test.name))
-                    traceback.print_exc(file=out)
-                    out.flush()
                 out.close()
                 return exit_with_code(1)
-            # TODO - subprocess should not catch this exception!
-            # Parent process should catch it and cleanup/exit
-            except (KeyboardInterrupt) as e:
-                self.__stop_test(out, process)
-                out.write(header("Cleaning up..."))
-                out.flush()
-                self.__finish()
-                sys.exit(1)
 
             out.close()
             return exit_with_code(0)
@@ -701,52 +691,6 @@ class Benchmarker:
     # End __run_tests
     ############################################################
 
-    ############################################################
-    # __stop_test(benchmarker)
-    # Stops all running tests
-    ############################################################
-    def __stop_test(self, out, process):
-        if process is not None and process.poll() is None:
-            # Stop
-            pids = self.__find_child_processes(process.pid)
-            if pids is not None:
-                stop = ['kill', '-STOP'] + pids
-                subprocess.call(stop, stderr=out, stdout=out)
-            pids = self.__find_child_processes(process.pid)
-            if pids is not None:
-                term = ['kill', '-TERM'] + pids
-                subprocess.call(term, stderr=out, stdout=out)
-            # Okay, if there are any more PIDs, kill them harder
-            pids = self.__find_child_processes(process.pid)
-            if pids is not None:
-                kill = ['kill', '-KILL'] + pids
-                subprocess.call(kill, stderr=out, stdout=out)
-            process.terminate()
-    ############################################################
-    # End __stop_test
-    ############################################################
-
-    ############################################################
-    # __find_child_processes
-    # Recursively finds all child processes for the given PID.
-    ############################################################
-    def __find_child_processes(self, pid):
-        toRet = []
-        try:
-            pids = subprocess.check_output(['pgrep','-P',str(pid)]).split()
-            toRet.extend(pids)
-            for aPid in pids:
-                toRet.extend(self.__find_child_processes(aPid))
-        except:
-            # pgrep will return a non-zero status code if there are no
-            # processes who have a PPID of PID.
-            pass
-
-        return toRet
-    ############################################################
-    # End __find_child_processes
-    ############################################################
-
     def is_port_bound(self, port):
         return self.__is_port_bound(port)
 
@@ -1086,9 +1030,11 @@ class Benchmarker:
         if self.client_identity_file != None:
             self.client_ssh_string = self.client_ssh_string + " -i " + self.client_identity_file
 
-            ############################################################
-            # End __init__
-            ############################################################
+        self.__process = None
+
+    ############################################################
+    # End __init__
+    ############################################################
 
 
 class QuietOutputStream:

+ 1 - 1
toolset/benchmark/framework_test.py

@@ -189,7 +189,7 @@ class FrameworkTest:
       subprocess.check_call(['gcc', 
         '-std=c99', 
         '-o%s/TFBReaper' % self.install_root, 
-        os.path.join(self.fwroot,'toolset/setup/linux/TFBReaper.c')  ],
+        os.path.join(self.fwroot,'toolset/setup/linux/TFBReaper.c')],
         stderr=out, stdout=out)
 
     # Check that the client is setup

+ 75 - 1
toolset/setup/linux/TFBReaper.c

@@ -1,13 +1,87 @@
 #define _DEFAULT_SOURCE
 
+#include <signal.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <sys/prctl.h>
-#include <sys/syscall.h>
 #include <string.h>
 
+typedef struct Node Node;
+
+/**
+ * Simple linked-list struct.
+ */
+struct Node
+{
+  char *str; 
+  Node *next; 
+};
+
+/**
+ * References to the head and tail of the linked-list.
+ */
+Node *head = NULL;
+Node *tail = NULL;
+
+/**
+ * Reap will recursively find all processes with this process
+ * as an ancestor, and kill them.
+ */
+void reap(int signum)
+{
+  int pid = getpid();
+
+  FILE *fp;
+  char buf[256];
+
+  char command[256];
+  sprintf(command, "findChilds() { for child in $(ps --ppid $1 ho pid); do echo $child; findChilds $child; done } && findChilds %d", pid);
+
+  char *pids[256];
+  fp = popen(command, "r");
+  while(fgets(buf, sizeof(buf), fp) != 0)
+  {
+    Node *newNode = malloc(sizeof(Node));
+    newNode->str = malloc(strlen(buf)+1);
+    strcpy(newNode->str, buf);
+    newNode->next = NULL;
+
+    if(tail == NULL)
+    {
+      tail = newNode;
+      head = newNode;
+    }
+    else
+    {
+      if(head->next == NULL)
+      {
+        head->next = newNode;
+      }
+      tail->next = newNode;
+      tail = newNode;
+    }
+  }
+
+  Node *curr = head;
+  while(curr != NULL)
+  {
+    kill(atoi(curr->str), SIGKILL);
+    curr = curr->next;
+  }
+
+  exit(0);
+}
+
 int main(int argc, char *argv[])
 {
+  // Interrupt SIGTERM and SIGINT and pass to our handler.
+  struct sigaction action;
+  memset(&action, 0, sizeof(action));
+  action.sa_handler = reap;
+  sigaction(SIGTERM, &action, NULL);
+  sigaction(SIGINT, &action, NULL);
+
   // Gather the command line arguments for the pass-through.
   int count = argc - 1;
   int *sizes = malloc(sizeof(int) * count);