From c87ccb33f7232727dbf7bb32589c3767fb184238 Mon Sep 17 00:00:00 2001 From: alexcjohnson Date: Fri, 24 Aug 2018 15:35:18 -0400 Subject: [PATCH] Lib.sort faster sort of already-sorted arrays with minimal penalty for unsorted arrays --- src/lib/index.js | 1 + src/lib/search.js | 41 +++++++++ test/jasmine/tests/lib_test.js | 158 +++++++++++++++++++++++++++++++++ 3 files changed, 200 insertions(+) diff --git a/src/lib/index.js b/src/lib/index.js index cd9f7e2ad6b..0448e9599ba 100644 --- a/src/lib/index.js +++ b/src/lib/index.js @@ -62,6 +62,7 @@ lib.sorterAsc = searchModule.sorterAsc; lib.sorterDes = searchModule.sorterDes; lib.distinctVals = searchModule.distinctVals; lib.roundUp = searchModule.roundUp; +lib.sort = searchModule.sort; var statsModule = require('./stats'); lib.aggNums = statsModule.aggNums; diff --git a/src/lib/search.js b/src/lib/search.js index d2684f4b79e..6e626712dbf 100644 --- a/src/lib/search.js +++ b/src/lib/search.js @@ -113,3 +113,44 @@ exports.roundUp = function(val, arrayIn, reverse) { } return arrayIn[low]; }; + +/** + * Tweak to Array.sort(sortFn) that improves performance for pre-sorted arrays + * + * Motivation: sometimes we need to sort arrays but the input is likely to + * already be sorted. Browsers don't seem to pick up on pre-sorted arrays, + * and in fact Chrome is actually *slower* sorting pre-sorted arrays than purely + * random arrays. FF is at least faster if the array is pre-sorted, but still + * not as fast as it could be. + * Here's how this plays out sorting a length-1e6 array: + * + * Calls to Sort FN | Chrome bare | FF bare | Chrome tweak | FF tweak + * ------------------+---------------+-----------+----------------+------------ + * ordered | 30.4e6 | 10.1e6 | 1e6 | 1e6 + * reversed | 29.4e6 | 9.9e6 | 1e6 + reverse | 1e6 + reverse + * random | ~21e6 | ~18.7e6 | ~21e6 | ~18.7e6 + * + * So this is a substantial win for pre-sorted (ordered or exactly reversed) + * arrays. Including this wrapper on an unsorted array adds a penalty that will + * in general be only a few calls to the sort function. The only case this + * penalty will be significant is if the array is mostly sorted but there are + * a few unsorted items near the end, but the penalty is still at most N calls + * out of (for N=1e6) ~20N total calls + * + * @param {Array} array: the array, to be sorted in place + * @param {function} sortFn: As in Array.sort, function(a, b) that puts + * item a before item b if the return is negative, a after b if positive, + * and no change if zero. + * @return {Array}: the original array, sorted in place. + */ +exports.sort = function(array, sortFn) { + var notOrdered = 0; + var notReversed = 0; + for(var i = 1; i < array.length; i++) { + var pairOrder = sortFn(array[i], array[i - 1]); + if(pairOrder < 0) notOrdered = 1; + else if(pairOrder > 0) notReversed = 1; + if(notOrdered && notReversed) return array.sort(sortFn); + } + return notReversed ? array : array.reverse(); +}; diff --git a/test/jasmine/tests/lib_test.js b/test/jasmine/tests/lib_test.js index dffebe8ae9a..e4ed92e3a3c 100644 --- a/test/jasmine/tests/lib_test.js +++ b/test/jasmine/tests/lib_test.js @@ -2189,6 +2189,164 @@ describe('Test lib.js:', function() { }); }); + describe('sort', function() { + var callCount; + beforeEach(function() { + callCount = 0; + }); + + function sortCounter(a, b) { + callCount++; + return a - b; + } + + function sortCounterReversed(a, b) { + callCount++; + return b - a; + } + + function ascending(n) { + var out = new Array(n); + for(var i = 0; i < n; i++) { + out[i] = i; + } + assertAscending(out); + return out; + } + + function descending(n) { + var out = new Array(n); + for(var i = 0; i < n; i++) { + out[i] = n - 1 - i; + } + assertDescending(out); + return out; + } + + function rand(n) { + Lib.seedPseudoRandom(); + var out = new Array(n); + for(var i = 0; i < n; i++) { + out[i] = Lib.pseudoRandom(); + } + return out; + } + + function assertAscending(array) { + for(var i = 1; i < array.length; i++) { + if(array[i] < array[i - 1]) { + // we already know this expect will fail, + // just want to format the message nicely and then + // quit so we don't get a million messages + expect(array[i]).not.toBeLessThan(array[i - 1]); + break; + } + } + } + + function assertDescending(array) { + for(var i = 1; i < array.length; i++) { + if(array[i] < array[i - 1]) { + expect(array[i]).not.toBeGreaterThan(array[i - 1]); + break; + } + } + } + + function _sort(array, sortFn) { + var arrayOut = Lib.sort(array, sortFn); + expect(arrayOut).toBe(array); + return array; + } + + it('sorts ascending arrays ascending in N-1 calls', function() { + var arrayIn = _sort(ascending(100000), sortCounter); + expect(callCount).toBe(99999); + assertAscending(arrayIn); + }); + + it('sorts descending arrays ascending in N-1 calls', function() { + var arrayIn = _sort(descending(100000), sortCounter); + expect(callCount).toBe(99999); + assertAscending(arrayIn); + }); + + it('sorts ascending arrays descending in N-1 calls', function() { + var arrayIn = _sort(ascending(100000), sortCounterReversed); + expect(callCount).toBe(99999); + assertDescending(arrayIn); + }); + + it('sorts descending arrays descending in N-1 calls', function() { + var arrayIn = _sort(descending(100000), sortCounterReversed); + expect(callCount).toBe(99999); + assertDescending(arrayIn); + }); + + it('sorts random arrays ascending in a few more calls than bare sort', function() { + var arrayIn = _sort(rand(100000), sortCounter); + assertAscending(arrayIn); + + var ourCallCount = callCount; + callCount = 0; + rand(100000).sort(sortCounter); + // in general this will be ~N*log_2(N) + expect(callCount).toBeGreaterThan(1e6); + // This number (2) is only repeatable because we used Lib.pseudoRandom + // should always be at least 2 and less than N - 1, and if + // the input array is really not sorted it will be close to 2. It will + // only be large if the array is sorted until near the end. + expect(ourCallCount - callCount).toBe(2); + }); + + it('sorts random arrays descending in a few more calls than bare sort', function() { + var arrayIn = _sort(rand(100000), sortCounterReversed); + assertDescending(arrayIn); + + var ourCallCount = callCount; + callCount = 0; + rand(100000).sort(sortCounterReversed); + expect(callCount).toBeGreaterThan(1e6); + expect(ourCallCount - callCount).toBe(2); + }); + + it('supports short arrays', function() { + expect(_sort([], sortCounter)).toEqual([]); + expect(_sort([1], sortCounter)).toEqual([1]); + expect(callCount).toBe(0); + + expect(_sort([1, 2], sortCounter)).toEqual([1, 2]); + expect(_sort([2, 3], sortCounterReversed)).toEqual([3, 2]); + expect(callCount).toBe(2); + }); + + function dupes() { + return [0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 7, 8, 9]; + } + + it('still short-circuits in order with duplicates', function() { + expect(_sort(dupes(), sortCounter)) + .toEqual(dupes()); + + expect(callCount).toEqual(18); + + callCount = 0; + dupes().sort(sortCounter); + expect(callCount).toBeGreaterThan(18); + }); + + it('still short-circuits reversed with duplicates', function() { + expect(_sort(dupes(), sortCounterReversed)) + .toEqual(dupes().reverse()); + + expect(callCount).toEqual(18); + + callCount = 0; + dupes().sort(sortCounterReversed); + expect(callCount).toBeGreaterThan(18); + }); + }); + describe('relinkPrivateKeys', function() { it('ignores customdata and ids', function() { var fromContainer = {