Bläddra i källkod

Add extra size when resizing tables with deleted keys

Without this extra space, sequences of insertions/deletions (and
some other uses) can have unpexpected low performances.  See the
added tests for an example, and *Mathematical Models to Analyze Lua
Hybrid Tables and Why They Need a Fix* (Martínez, Nicaud, Rotondo;
arXiv:2208.13602v2) for detais.
Roberto Ierusalimschy 8 månader sedan
förälder
incheckning
9a91fe1640
2 ändrade filer med 82 tillägg och 6 borttagningar
  1. 21 5
      ltable.c
  2. 61 1
      testes/nextvar.lua

+ 21 - 5
ltable.c

@@ -461,6 +461,7 @@ static int keyinarray (Table *t, lua_Integer key) {
 ** Structure to count the keys in a table.
 ** Structure to count the keys in a table.
 ** 'total' is the total number of keys in the table.
 ** 'total' is the total number of keys in the table.
 ** 'na' is the number of *array indices* in the table (see 'arrayindex').
 ** 'na' is the number of *array indices* in the table (see 'arrayindex').
+** 'deleted' is true if there are deleted nodes in the hash part.
 ** 'nums' is a "count array" where 'nums[i]' is the number of integer
 ** 'nums' is a "count array" where 'nums[i]' is the number of integer
 ** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation
 ** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation
 ** of 'nums'.
 ** of 'nums'.
@@ -468,6 +469,7 @@ static int keyinarray (Table *t, lua_Integer key) {
 typedef struct {
 typedef struct {
   unsigned total;
   unsigned total;
   unsigned na;
   unsigned na;
+  int deleted;
   unsigned nums[MAXABITS + 1];
   unsigned nums[MAXABITS + 1];
 } Counters;
 } Counters;
 
 
@@ -560,14 +562,21 @@ static void numusearray (const Table *t, Counters *ct) {
 
 
 
 
 /*
 /*
-** Count keys in hash part of table 't'.
+** Count keys in hash part of table 't'. As this only happens during
+** a rehash, all nodes have been used. A node can have a nil value only
+** if it was deleted after being created.
 */
 */
 static void numusehash (const Table *t, Counters *ct) {
 static void numusehash (const Table *t, Counters *ct) {
   unsigned i = sizenode(t);
   unsigned i = sizenode(t);
   unsigned total = 0;
   unsigned total = 0;
   while (i--) {
   while (i--) {
     Node *n = &t->node[i];
     Node *n = &t->node[i];
-    if (!isempty(gval(n))) {
+    if (isempty(gval(n))) {
+      /* entry was deleted; key cannot be nil */
+      lua_assert(isdummy(t) || !keyisnil(n));
+      ct->deleted = 1;
+    }
+    else {
       total++;
       total++;
       if (keyisinteger(n))
       if (keyisinteger(n))
         countint(keyival(n), ct);
         countint(keyival(n), ct);
@@ -799,10 +808,12 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) {
   unsigned asize;  /* optimal size for array part */
   unsigned asize;  /* optimal size for array part */
   Counters ct;
   Counters ct;
   unsigned i;
   unsigned i;
+  unsigned nsize;  /* size for the hash part */
   setlimittosize(t);
   setlimittosize(t);
   /* reset counts */
   /* reset counts */
   for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0;
   for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0;
   ct.na = 0;
   ct.na = 0;
+  ct.deleted = 0;
   ct.total = 1;  /* count extra key */
   ct.total = 1;  /* count extra key */
   if (ttisinteger(ek))
   if (ttisinteger(ek))
     countint(ivalue(ek), &ct);  /* extra key may go to array */
     countint(ivalue(ek), &ct);  /* extra key may go to array */
@@ -815,12 +826,17 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) {
     numusearray(t, &ct);  /* count keys in array part */
     numusearray(t, &ct);  /* count keys in array part */
     asize = computesizes(&ct);  /* compute new size for array part */
     asize = computesizes(&ct);  /* compute new size for array part */
   }
   }
+  /* all keys not in the array part go to the hash part */
+  nsize = ct.total - ct.na;
+  if (ct.deleted) {  /* table has deleted entries? */
+    /* insertion-deletion-insertion: give hash some extra size to
+       avoid constant resizings */
+    nsize += nsize >> 2;
+  }
   /* resize the table to new computed sizes */
   /* resize the table to new computed sizes */
-  luaH_resize(L, t, asize, ct.total - ct.na);
+  luaH_resize(L, t, asize, nsize);
 }
 }
 
 
-
-
 /*
 /*
 ** }=============================================================
 ** }=============================================================
 */
 */

+ 61 - 1
testes/nextvar.lua

@@ -23,6 +23,12 @@ local function printTable (t)
   end
   end
 end
 end
 ----------------------------------------------------------------
 ----------------------------------------------------------------
+local function countentries (t)
+  local e = 0
+  for _ in pairs(t) do e = e + 1 end
+  return e
+end
+----------------------------------------------------------------
 
 
 
 
 local function check (t, na, nh)
 local function check (t, na, nh)
@@ -115,6 +121,24 @@ do   -- overflow (must wrap-around)
   assert(k == nil)
   assert(k == nil)
 end
 end
 
 
+
+do
+  -- alternate insertions and deletions in an almost full hash.
+  -- In versions pre-5.5, that causes constant rehashings and
+  -- takes a long time to complete.
+  local a = {}
+  for i = 1, 2^11 - 1 do
+    a[i .. ""] = true
+  end
+
+  for i = 1, 1e5 do
+    local key = i .. "."
+    a[key] = true
+    a[key] = nil
+  end
+  assert(countentries(a) == 2^11 - 1)
+end
+
 if not T then
 if not T then
   (Message or print)
   (Message or print)
     ('\n >>> testC not active: skipping tests for table sizes <<<\n')
     ('\n >>> testC not active: skipping tests for table sizes <<<\n')
@@ -202,6 +226,23 @@ for i = 1,lim do
   check(a, 0, mp2(i))
   check(a, 0, mp2(i))
 end
 end
 
 
+
+-- insert and delete elements until a rehash occurr. Caller must ensure
+-- that a rehash will change the shape of the table. Must repeat because
+-- the insertion may collide with the deleted element, and then there is
+-- no rehash.
+local function forcerehash (t)
+  local na, nh = T.querytab(t)
+  local i = 10000
+  repeat
+    i = i + 1
+    t[i] = true
+    t[i] = undef
+    local nna, nnh = T.querytab(t)
+  until nna ~= na or nnh ~= nh
+end
+
+
 do
 do
   local a = {}
   local a = {}
   for i=1,16 do a[i] = i end
   for i=1,16 do a[i] = i end
@@ -212,7 +253,7 @@ do
   a[30] = undef
   a[30] = undef
   check(a, 0, 8)   -- 5 elements in the hash part: [12]-[16]
   check(a, 0, 8)   -- 5 elements in the hash part: [12]-[16]
   a[10] = 1
   a[10] = 1
-  for i=30,50 do a[i] = true; a[i] = undef end   -- force a rehash
+  forcerehash(a)
   check(a, 16, 1)
   check(a, 16, 1)
   for i=1,14 do a[i] = true; a[i] = undef end
   for i=1,14 do a[i] = true; a[i] = undef end
   check(a, 16, 1)   -- no rehash...
   check(a, 16, 1)   -- no rehash...
@@ -242,6 +283,25 @@ do     -- "almost sparse" arrays
 end
 end
 
 
 
 
+do
+  -- alternate insertions and deletions should give some extra
+  -- space for the hash part. Otherwise, a mix of insertions/deletions
+  -- could cause too many rehashes. (See the other test for "alternate
+  -- insertions and deletions" in this file.)
+  local a = {}
+  for i = 1, 256 do
+    a[i .. ""] = true
+  end
+  check(a, 0, 256)    -- hash part is full
+  a["256"] = nil    -- delete a key
+  forcerehash(a)
+  -- table has only 255 elements, but it got some extra space;
+  -- otherwise, almost each delete-insert would rehash the table again.
+  assert(countentries(a) == 255)
+  check(a, 0, 512)
+end
+
+
 -- size tests for vararg
 -- size tests for vararg
 lim = 35
 lim = 35
 local function foo (n, ...)
 local function foo (n, ...)