Browse Source

Merge pull request #1133 from TechEmpower/scorched_earth

Scorched earth
Alexander Schneider 10 years ago
parent
commit
178f04c4fb

+ 25 - 7
frameworks/Perl/plack/setup.py

@@ -15,11 +15,29 @@ def start(args, logfile, errfile):
     return 0
   except subprocess.CalledProcessError:
     return 1
-def stop(logfile, errfile):
-  try:
-    subprocess.call('kill -TERM $(cat $TROOT/app.pid)', shell=True, stderr=errfile, stdout=logfile)
-    subprocess.call("sudo /usr/local/nginx/sbin/nginx -c $TROOT/nginx.conf -s stop", shell=True, stderr=errfile, stdout=logfile)
-    return 0
-  except subprocess.CalledProcessError:
-    return 1
 
+ def stop(logfile, errfile):
+   try:
+     subprocess.Popen("kill -TERM $(ps --ppid `cat app.pid` -o pid --no-header)", shell=True, cwd="plack", stderr=errfile, stdout=logfile)
+     # TE - There was an issue on the EC2 machines where this, for reasons unknown,
+     # was not sufficient in cleanly ending plack. In fact, the above would not 
+     # successfully kill the starter process which would result in this 'stop' call
+     # to report success but leave port 8080 bound to a plackup instance. We tried
+     # adding a 'nuke' approach which detects the test's port still being bound
+     # after calling stop and then directly kills those pids a few times to try and
+     # cleanly release any/all ports (over 6000).
+     # Why this only happens on EC2 is just a guess, but somehow the server seems
+     # overwhelmed by the sheer volume of requests from the client and gets into
+     # a deadlock state. Calling "kill -15 [pid]" against the server process does
+     # nothing; so we needed a general way to kill all the processes that were 
+     # spawned by the original process. For plack, this was as simple as the next
+     # subprocess.Popen call (killall -s 9 plackup), but to do this generally is
+     # a bit more difficult.
+ 
+     # TE - In general, no test should ever be forced to use the KILL sigterm;
+     # TERM should be sufficient. However, in this case it seems that the plack
+     # server gets into a deadlock state and will not respond to a TERM sigterm.
+     subprocess.Popen("killall -s 9 plackup")
+     return 0
+   except subprocess.CalledProcessError:
+     return 1

+ 39 - 4
toolset/benchmark/benchmarker.py

@@ -325,7 +325,7 @@ class Benchmarker:
     # off, rather than starting from the beginning
     if os.path.isfile('current_benchmark.txt'):
         with open('current_benchmark.txt', 'r') as interrupted_benchmark:
-            interrupt_bench = interrupted_benchmark.read()
+            interrupt_bench = interrupted_benchmark.read().strip()
             for index, atest in enumerate(tests):
                 if atest.name == interrupt_bench:
                     tests = tests[index:]
@@ -591,9 +591,13 @@ class Benchmarker:
         time.sleep(5)
 
         if self.__is_port_bound(test.port):
-          self.__write_intermediate_results(test.name, "port " + str(test.port) + " was not released by stop")
-          err.write(header("Error: Port %s was not released by stop %s" % (test.port, test.name)))
-          err.flush()
+          self.__forciblyEndPortBoundProcesses(test.port, out, err)
+          time.sleep(5)
+          if self.__is_port_bound(test.port):
+            err.write(header("Error: Port %s was not released by stop %s" % (test.port, test.name)))
+            err.flush()
+            self.__write_intermediate_results(test.name, "port " + str(test.port) + " was not released by stop")
+
           return exit_with_code(1)
 
         out.write(header("Stopped %s" % test.name))
@@ -680,6 +684,37 @@ class Benchmarker:
   # End __is_port_bound
   ############################################################
 
+  def __forciblyEndPortBoundProcesses(self, test_port, out, err):
+    p = subprocess.Popen(['sudo', 'netstat', '-lnp'], stdout=subprocess.PIPE)
+    out, err = p.communicate()
+    for line in out.splitlines():
+      if 'tcp' in line:
+        splitline = line.split()
+        port = splitline[3].split(':')
+        port = int(port[len(port) - 1].strip())
+        if port == test_port:
+          try:
+            pid = splitline[6].split('/')[0].strip()
+            ps = subprocess.Popen(['ps','p',pid], stdout=subprocess.PIPE)
+            # Store some info about this process
+            proc = ps.communicate()
+            os.kill(int(pid), 15)
+            # Sleep for 10 sec; kill can be finicky
+            time.sleep(10)
+            # Check that PID again
+            ps = subprocess.Popen(['ps','p',pid], stdout=subprocess.PIPE)
+            dead = ps.communicate()
+            if dead in proc:
+              os.kill(int(pid), 9)
+          except OSError:
+            out.write( textwrap.dedent("""
+              -----------------------------------------------------
+                Error: Could not kill pid {pid}
+              -----------------------------------------------------
+              """.format(pid=str(pid))) )
+            # This is okay; likely we killed a parent that ended
+            # up automatically killing this before we could.
+
   ############################################################
   # __parse_results
   # Ensures that the system has all necessary software to run

+ 1 - 0
toolset/benchmark/framework_test.py

@@ -380,6 +380,7 @@ class FrameworkTest:
 
       valid = parser.isValidFortune(out)
       return (valid, '' if valid else 'Did not pass validation')
+
     except:
       print "Got exception when trying to validate the fortune test: {exception} ".format(exception=traceback.format_exc())
     return (False, err_str)