ソースを参照

Fix some ThreadPool performance counters. (#6118)

* Fix some ThreadPool performance counters.
https://bugzilla.xamarin.com/show_bug.cgi?id=41294

The bug is that some performance counters are always zero.
Because we do not update them.
This addresses two of them: Work items added and thread count.

There is PerformanceCounter class and you can Increment instances,
however its metadata and some of the implementation is in System.dll.
Most of the code is in native.
ThreadPool is in mscorlib.dll, so can't use System.dll.
A few options exist to address this.
We could move and forward the type. We could duplicate and rename
and possibly subset the type.
Here we add a new icall very specifically for the scenario.

There is slight cost for the icall, and for the atomic increment.
There is an attempt to measure but the results were unclear.

Note that the count of work items is not specifically for QueueUserWorkItem
as one might expect, but also for any async calls.

Fix a little bit of typos.

* Take command lines from command line per PR feedback.

* newline at end of file
jaykrell 8 年 前
コミット
dc312c772f

+ 25 - 1
mcs/class/corlib/Test/System.Threading/ThreadPoolTest.cs

@@ -255,5 +255,29 @@ namespace MonoTests.System.Threading
 			Assert.AreEqual (0, var_3, "var_3");
 		}
 
+		[Test]
+		public static void PerformanceCounter ()
+		// This is test related to bug https://bugzilla.xamarin.com/show_bug.cgi?id=41294.
+		// The bug is that the performance counters return 0.
+		// "Work Items Added" and "# of Threads" are fixed, the others are not.
+		{
+			var workItems = new PerformanceCounter ("Mono Threadpool", "Work Items Added");
+			var threads   = new PerformanceCounter ("Mono Threadpool", "# of Threads");
+
+			var workItems0 = workItems.NextValue();
+
+			int N = 99;
+			for (var i = 0; i < N; i++)
+				ThreadPool.QueueUserWorkItem (_ => {});
+
+			var workItems1 = workItems.NextValue();
+			var threads0 = threads.NextValue();
+
+			Console.WriteLine ("workItems0:{0} workItems1:{1}", workItems0, workItems1);
+			Console.WriteLine ("{0}:{1}", "threads", threads0);
+
+			Assert.IsTrue (workItems1 - workItems0 == N, "#1");
+			Assert.IsTrue (threads0 > 0, "#2");
+		}
 	}
-}
+}

+ 28 - 0
mcs/class/corlib/Test/System.Threading/icallperf.cs

@@ -0,0 +1,28 @@
+// This is a benchmark to be run w/ and w/o the change
+// to compute PerformanceCounter ("Mono Threadpool", "Work Items Added").
+
+using System;
+using System.Threading;
+using System.Diagnostics;
+
+class Program
+{
+	static void Main ()
+	{
+		var workItems = new PerformanceCounter ("Mono Threadpool", "Work Items Added");
+		var t1 = DateTime.Now;
+
+		int N = 100 * 100 * 100;
+
+		for (var i = 0; i < N; i++)
+			ThreadPool.QueueUserWorkItem (_ => {});
+
+		var t2 = DateTime.Now;
+		var d0 = t2 - t1;
+		var d1 = d0.TotalMilliseconds;
+
+		Console.WriteLine("{0} items in {1}ms, {2}ms per queue", N, d1, d1 / (double)N);
+		var workItems0 = workItems.NextValue();
+		Console.WriteLine("workItems0:{0}", workItems0);
+	}
+}

+ 96 - 0
mcs/class/corlib/Test/System.Threading/icallperf.pl

@@ -0,0 +1,96 @@
+# Attempt to analyze the performance before and after collecting
+# performance counters for ThreadPool.
+#
+# The added costs are an icall per queued item.
+# The icall takes/returns nothing -- no marshaling -- and
+# internally does an atomic increment. Theoretically the
+# bulk of the cost is the increment, not the call/ret.
+#
+# Doing the work on the managed side would have about the same motivation
+# to do the same atomic increment, but would save transition to/from native.
+# We could do it thread local and occasionally sweep.
+#
+# optbase is optimized baseline
+# opt is optimized local changes
+
+sub run
+{
+	my $cmd = shift;
+	open(my $pipe, "-|", $cmd) || die("unable to run $cmd");
+	while (my $line = <$pipe>)
+	{
+		if ($line =~ / items in (\d+)/)
+		{
+			return $1;
+		}
+	}
+}
+
+sub runloop
+{
+	my $cmd = shift;
+	my $count = shift;
+	my @data;
+	#run($cmd); # throw out first
+	for (my $i = 0; $i < $count; ++$i)
+	{
+		push(@data, run($cmd));
+	}
+	#run($cmd); # and last
+	# Later we throw out slowest/fastest.
+	return \@data;
+}
+
+sub report
+{
+	my $name = shift;
+	my $data = shift;
+
+	@{$data} = sort { $a <=> $b } @{$data}; # sort numerically
+	shift($data); # remove first and last
+	pop($data);
+
+	my $n = scalar(@{$data});
+
+	print("data $name:$n:");
+	my $sum = 0;
+	my $a = 0;
+	for my $i (@{$data})
+	{
+		print("$i ");
+		$sum += $i;
+	}
+	my $avg = $sum / $n;
+	print("avg:$avg ");
+	for my $i (@{$data})
+	{
+		my $b = abs($i - $avg);
+		$a += $b * $b;
+	}
+	my $dev = sqrt($a) / $n;
+	print("stddev: $dev");
+	print("\n");
+}
+
+for (@ARGV)
+{
+	if (/^-?help/i || /^-?h/i || /^-?\?/ || /^-?usage/)
+	{
+		print("usage: perl $0 private_command baseline_command iterations\n");
+		print(" e.g. perl $0"
+			  . " /inst/monoopt/bin/mono /dev2/monoopt/mcs/class/corlib/Test/System.Threading/icallperf.exe"
+		      . " /inst/monooptbase/bin/mono /dev2/monoopt/mcs/class/corlib/Test/System.Threading/icallperf.exe"
+		      . " 20\n");
+		exit(0);
+	}
+}
+
+my $optcmd = shift || "/inst/monoopt/bin/mono /dev2/monoopt/mcs/class/corlib/Test/System.Threading/icallperf.exe";
+my $optbasecmd = shift || "/inst/monooptbase/bin/mono /dev2/monoopt/mcs/class/corlib/Test/System.Threading/icallperf.exe";
+my $n = shift || 20;
+
+my $optdata = runloop($optcmd, $n);
+my $optbasedata = runloop($optbasecmd, $n);
+
+report("opt", $optdata);
+report("base", $optbasedata);

+ 8 - 1
mcs/class/referencesource/mscorlib/system/threading/threadpool.cs

@@ -675,7 +675,9 @@ namespace System.Threading
                     }
                 }
             }
-
+#if MONO
+            ThreadPool.NotifyWorkItemQueued();
+#endif
             EnsureThreadRequested();
         }
 
@@ -1897,6 +1899,11 @@ namespace System.Threading
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         internal static extern void NotifyWorkItemProgressNative();
 
+        [System.Security.SecurityCritical]
+        [ResourceExposure(ResourceScope.None)]
+        [MethodImplAttribute(MethodImplOptions.InternalCall)]
+        internal static extern void NotifyWorkItemQueued();
+
         [System.Security.SecurityCritical]  // auto-generated
         [ResourceExposure(ResourceScope.None)]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]

+ 3 - 2
mono/metadata/icall-def.h

@@ -9,7 +9,7 @@
  * typeid must be a C symbol name unique to the type, don't worry about namespace
  * 	pollution, since it will be automatically prefixed to avoid it.
  * typename is a C string containing the full name of the type
- * first_icall_id s the symbol ID of the first internal call of the declared
+ * first_icall_id is the symbol ID of the first internal call of the declared
  * 	type (see below)
  *
  * The list of internal calls of the methods of a type must follow the
@@ -18,7 +18,7 @@
  * 	ICALL(icallid, methodname, cfuncptr)
  *
  * icallid must be a C symbol, unique for each icall defined in this file and
- * tipically equal to the typeid + '_' + a sequential number.
+ * typically equal to the typeid + '_' + a sequential number.
  * methodname is a C string defining the method name and the optional signature
  * (the signature is required only when several internal calls in the type
  * have the same name)
@@ -994,6 +994,7 @@ ICALL(THREADP_5, "InitializeVMTp", ves_icall_System_Threading_ThreadPool_Initial
 ICALL(THREADP_6, "IsThreadPoolHosted", ves_icall_System_Threading_ThreadPool_IsThreadPoolHosted)
 ICALL(THREADP_7, "NotifyWorkItemComplete", ves_icall_System_Threading_ThreadPool_NotifyWorkItemComplete)
 ICALL(THREADP_8, "NotifyWorkItemProgressNative", ves_icall_System_Threading_ThreadPool_NotifyWorkItemProgressNative)
+ICALL(THREADP_8m, "NotifyWorkItemQueued", ves_icall_System_Threading_ThreadPool_NotifyWorkItemQueued)
 ICALL(THREADP_9, "PostQueuedCompletionStatus", ves_icall_System_Threading_ThreadPool_PostQueuedCompletionStatus)
 ICALL(THREADP_11, "ReportThreadStatus", ves_icall_System_Threading_ThreadPool_ReportThreadStatus)
 ICALL(THREADP_12, "RequestWorkerThread", ves_icall_System_Threading_ThreadPool_RequestWorkerThread)

+ 2 - 2
mono/metadata/mono-perfcounters.c

@@ -993,10 +993,10 @@ mono_mem_counter (ImplVtable *vtable, MonoBoolean only_value, MonoCounterSample
 		sample->rawValue = 0;
 		return TRUE;
 	case COUNTER_MEM_PHYS_TOTAL:
-		sample->rawValue = mono_determine_physical_ram_size ();;
+		sample->rawValue = mono_determine_physical_ram_size ();
 		return TRUE;
 	case COUNTER_MEM_PHYS_AVAILABLE:
-		sample->rawValue = mono_determine_physical_ram_available_size ();;
+		sample->rawValue = mono_determine_physical_ram_available_size ();
 		return TRUE;
 	}
 	return FALSE;

+ 4 - 0
mono/metadata/threadpool-worker-default.c

@@ -566,6 +566,10 @@ worker_try_create (void)
 		return FALSE;
 	}
 
+#ifndef DISABLE_PERFCOUNTERS
+	mono_atomic_inc_i32 (&mono_perfcounters->threadpool_threads);
+#endif
+
 	worker.worker_creation_current_count += 1;
 
 	mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_THREADPOOL, "[%p] try create worker, created %p, now = %d count = %d",

+ 8 - 0
mono/metadata/threadpool.c

@@ -736,6 +736,14 @@ ves_icall_System_Threading_ThreadPool_NotifyWorkItemProgressNative (void)
 	mono_threadpool_worker_notify_completed ();
 }
 
+void
+ves_icall_System_Threading_ThreadPool_NotifyWorkItemQueued (void)
+{
+#ifndef DISABLE_PERFCOUNTERS
+	mono_atomic_inc_i64 (&mono_perfcounters->threadpool_workitems);
+#endif
+}
+
 void
 ves_icall_System_Threading_ThreadPool_ReportThreadStatus (MonoBoolean is_working)
 {

+ 2 - 0
mono/metadata/threadpool.h

@@ -46,6 +46,8 @@ ves_icall_System_Threading_ThreadPool_NotifyWorkItemComplete (void);
 void
 ves_icall_System_Threading_ThreadPool_NotifyWorkItemProgressNative (void);
 void
+ves_icall_System_Threading_ThreadPool_NotifyWorkItemQueued (void);
+void
 ves_icall_System_Threading_ThreadPool_ReportThreadStatus (MonoBoolean is_working);
 MonoBoolean
 ves_icall_System_Threading_ThreadPool_RequestWorkerThread (void);