Bläddra i källkod

adding itersorted(), a generator-driven lazy sorting function.

Josh Wilson 18 år sedan
förälder
incheckning
6e5a6da175
1 ändrade filer med 190 tillägg och 0 borttagningar
  1. 190 0
      direct/src/showbase/LazySort.py

+ 190 - 0
direct/src/showbase/LazySort.py

@@ -0,0 +1,190 @@
+from itertools import tee
+
+def itersorted(iterable, cmp = cmp, key = lambda x: x, reverse = False):
+    """
+    This function returns a generator object that yields sorted items from
+    'iterable'.
+
+    It implements a form of lazy sorting that's most useful in two cases:
+    1) When you only need the first few values in the sorted data.
+    2) When you want to amortize the cost of the sort over the time
+       you use the data.
+
+    It is to be considered a 'stable sort', where values with equivalent
+    sorting criteria maintain their relative order as it is in the input
+    data set.
+
+    'cmp' MUST return values in [-1,0,1]. Otherwise, behavior is
+    undefined, and will most likely be very incorrect.
+    """
+
+    # Notes:
+    # Understanding the concepts of 'left' and 'right' here is important.
+    # 'left' values are those that are yielded earlier in the sort. So
+    # each subsequent value yielded is 'to the right' of the previous one.
+    # A stack is used to maintain sets of values who share the same key
+    # value.  Each layer corresponds to one key.  During the traversals of
+    # the input data, values are added to each layer in such a way that
+    # they maintain their relative position (to others in the same layer)
+    # from the original data.  This ensures a 'stable sort'.
+    
+    # Create our working structures
+    stack = []      # holds a stack of 'layers'.
+                    # 'left' value layers are above 'right' ones.
+    layer = ()      # A 3-tuple of the form:
+                    # (key, data iterator, [values])
+    init = True     # Is set to true for the first pass through
+                    # the data.
+    if reverse:     # Use this to easily switch the direction of the sort.
+        rev = -1
+    else:
+        rev = 1
+
+    # Create the base iterator that will track our
+    # main progress through the data.
+    a = ((key(x),x) for x in iterable)
+        
+    # Begin the main loop
+    while 1:
+        # If the stack is empty, we must now seed it.
+        # Advance the base iterator until we find a value 'to the right' of
+        # anything we've yielded so far. (All values 'to the left' have
+        # already been yielded)
+        if not stack:
+            # pull next value off the base iterator
+            k,val = a.next()
+
+            # If init, get the first value and stop.
+            # Otherwise, find the first value 'to the right'
+            # of the most recently yielded value.
+            while (not init) and (cmp(k,lLimit) == -rev):
+                k,val = a.next()
+                pass
+
+            # Place the found value as the initial stack value
+            # (and store its iteration progress as well).
+            a,b = tee(a)
+            stack.append([k, b, [val]])
+            pass
+
+
+        # We now iterate through the data, starting where the value
+        # at the top of the stack left off.
+        layer = stack[-1]
+        b = layer[1]
+        for k,val in b:
+            # If the next data element is 'to the left' of (or equal to)
+            # the top off the stack and 'to the right' of the last element
+            # yielded, add it to the stack.
+            if cmp(k,layer[0]) != rev and (init or cmp(k,lLimit) == rev):
+                # If it's 'to the left' of the current stack value,
+                # make a new layer and add it to the top of the stack.
+                # Otherwise, it's equivalent so we'll just append it
+                # to the values in the top layer of the stack.
+                if cmp(k,layer[0]) == -rev:
+                    b,layer[1] = tee(b)
+                    stack.append([k, b, []])
+                    layer = stack[-1]
+                    pass
+                layer[2].append(val)
+                pass
+            pass
+        
+        # Remove the initialization condition to enable lLimit checking.
+        init = False
+
+        # Whatever values that are on the top stack at this point are
+        # the 'left-most' we've found that we haven't yet yielded. Yield
+        # them in the order that we discovered them in the source data.
+        # Define lLimit as the rightmost limit for values that have not
+        # yet been yielded.  This will allow us to ignore these values
+        # on future iterations.
+        lLimit, b, vals = stack.pop()
+        for val in vals:
+            yield val
+            pass
+
+if __debug__:
+    def pp(i):
+        for x in i:
+            print x
+
+    from unittest import TestCase, main
+    from random import shuffle
+    from itertools import islice
+    
+    class LazySortTest(TestCase):
+        """
+        Run these tests with:
+        > python LazySort.py
+        """
+        
+        RANGELEN = 10
+        TESTLEN = 10
+
+        a = range(RANGELEN)
+        b = range(RANGELEN)
+        shuffle(a)
+        shuffle(b)
+        DATA = zip(a,b)
+        shuffle(DATA)
+        
+        del a
+        del b
+        
+        def testRange(self):
+            control = sorted(self.DATA)
+            variable = itersorted(self.DATA)
+            self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
+            
+        def testRangeCompare(self):
+            control = sorted(self.DATA, cmp = lambda a,b: -cmp(a,b))
+            variable = itersorted(self.DATA, cmp = lambda a,b: -cmp(a,b))
+            self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
+
+        def testRangeKey(self):
+            control = sorted(self.DATA, key = lambda x: x[0])
+            variable = itersorted(self.DATA, key = lambda x: x[0])
+            self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
+        
+        def testRangeReverse(self):
+            control = sorted(self.DATA, reverse = True)
+            variable = itersorted(self.DATA, reverse = True)
+            self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
+
+        def testRangeCompareKey(self):
+            control = sorted(self.DATA, cmp = lambda a,b: -cmp(a,b),
+                             key = lambda x: x[0])
+            variable = itersorted(self.DATA, cmp = lambda a,b: -cmp(a,b),
+                                  key = lambda x: x[0])
+            self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
+        
+        def testRangeCompareReverse(self):
+            control = sorted(self.DATA, cmp = lambda a,b: -cmp(a,b),
+                             reverse = True)
+            variable = itersorted(self.DATA, cmp = lambda a,b: -cmp(a,b),
+                                  reverse = True)
+            self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
+            
+        def testRangeKeyReverse(self):
+            control = sorted(self.DATA, key = lambda x: x[0], reverse = True)
+            variable = itersorted(self.DATA, key = lambda x: x[0], reverse = True)
+            self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
+            
+            control = sorted(self.DATA, key = lambda x: x[1], reverse = True)
+            variable = itersorted(self.DATA, key = lambda x: x[1], reverse = True)
+            self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
+
+        def testRangeCompareKeyReverse(self):
+            control = sorted(self.DATA, cmp = lambda a,b: -cmp(a,b),
+                             key = lambda x: x[0],
+                             reverse = True)
+            variable = itersorted(self.DATA, cmp = lambda a,b: -cmp(a,b),
+                                  key = lambda x: x[0],
+                                  reverse = True)
+            self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
+        
+
+    if __name__ == '__main__':
+        main() # unittest.main
+