TimSort Java源码个人解读

   1 /*JDK 1.8
   2  */
   3 
   4 package java.util;
   5 
   6 /**
   7  * A stable, adaptive, iterative mergesort that requires far fewer than
   8  * n lg(n) comparisons when running on partially sorted arrays, while
   9  * offering performance comparable to a traditional mergesort when run
  10  * on random arrays.  Like all proper mergesorts, this sort is stable and
  11  * runs O(n log n) time (worst case).  In the worst case, this sort requires
  12  * temporary storage space for n/2 object references; in the best case,
  13  * it requires only a small constant amount of space.
  14  *
  15  * This implementation was adapted from Tim Peters's list sort for
  16  * Python, which is described in detail here:
  17  *
  18  *   http://svn.python.org/projects/python/trunk/Objects/listsort.txt
  19  *
  20  * Tim's C code may be found here:
  21  *
  22  *   http://svn.python.org/projects/python/trunk/Objects/listobject.c
  23  *
  24  * The underlying techniques are described in this paper (and may have
  25  * even earlier origins):
  26  *
  27  *  "Optimistic Sorting and Information Theoretic Complexity"
  28  *  Peter McIlroy
  29  *  SODA (Fourth Annual ACM-SIAM Symposium on Discrete Algorithms),
  30  *  pp 467-474, Austin, Texas, 25-27 January 1993.
  31  *
  32  * While the API to this class consists solely of static methods, it is
  33  * (privately) instantiable; a TimSort instance holds the state of an ongoing
  34  * sort, assuming the input array is large enough to warrant the full-blown
  35  * TimSort. Small arrays are sorted in place, using a binary insertion sort.
  36  *
  37  * @author Josh Bloch
  38  */
  39 class TimSort<T> {
  40     /**
  41      * This is the minimum sized sequence that will be merged.  Shorter
  42      * sequences will be lengthened by calling binarySort.  If the entire
  43      * array is less than this length, no merges will be performed.
  44      *
  45      * This constant should be a power of two.  It was 64 in Tim Peter's C
  46      * implementation, but 32 was empirically determined to work better in
  47      * this implementation.  In the unlikely event that you set this constant
  48      * to be a number that's not a power of two, you'll need to change the
  49      * {@link #minRunLength} computation.
  50      *
  51      * If you decrease this constant, you must change the stackLen
  52      * computation in the TimSort constructor, or you risk an
  53      * ArrayOutOfBounds exception.  See listsort.txt for a discussion
  54      * of the minimum stack length required as a function of the length
  55      * of the array being sorted and the minimum merge sequence length.
  56      */
  57     private static final int MIN_MERGE = 32;
  58 
  59     /**
  60      * The array being sorted.
  61      */
  62     private final T[] a;
  63 
  64     /**
  65      * The comparator for this sort.
  66      */
  67     private final Comparator<? super T> c;
  68 
  69     /**
  70      * When we get into galloping mode, we stay there until both runs win less
  71      * often than MIN_GALLOP consecutive times.
  72      */
  73     private static final int  MIN_GALLOP = 7;
  74 
  75     /**
  76      * This controls when we get *into* galloping mode.  It is initialized
  77      * to MIN_GALLOP.  The mergeLo and mergeHi methods nudge it higher for
  78      * random data, and lower for highly structured data.
  79      */
  80     private int minGallop = MIN_GALLOP;
  81 
  82     /**
  83      * Maximum initial size of tmp array, which is used for merging.  The array
  84      * can grow to accommodate demand.
  85      *
  86      * Unlike Tim's original C version, we do not allocate this much storage
  87      * when sorting smaller arrays.  This change was required for performance.
  88      */
  89     private static final int INITIAL_TMP_STORAGE_LENGTH = 256;
  90 
  91     /**
  92      * Temp storage for merges. A workspace array may optionally be
  93      * provided in constructor, and if so will be used as long as it
  94      * is big enough.
  95      */
  96     private T[] tmp;
  97     private int tmpBase; // base of tmp array slice
  98     private int tmpLen;  // length of tmp array slice
  99 
 100     /**
 101      * A stack of pending runs yet to be merged.  Run i starts at
 102      * address base[i] and extends for len[i] elements.  It's always
 103      * true (so long as the indices are in bounds) that:
 104      *
 105      *     runBase[i] + runLen[i] == runBase[i + 1]
 106      *
 107      * so we could cut the storage for this, but it's a minor amount,
 108      * and keeping all the info explicit simplifies the code.
 109      */
 110     private int stackSize = 0;  // Number of pending runs on stack
 111     private final int[] runBase;
 112     private final int[] runLen;
 113 
 114     /**
 115      * Creates a TimSort instance to maintain the state of an ongoing sort.
 116      *
 117      * @param a the array to be sorted
 118      * @param c the comparator to determine the order of the sort
 119      * @param work a workspace array (slice)
 120      * @param workBase origin of usable space in work array
 121      * @param workLen usable size of work array
 122      */
 123     private TimSort(T[] a, Comparator<? super T> c, T[] work, int workBase, int workLen) {
 124         this.a = a;
 125         this.c = c;
 126 
 127         // Allocate temp storage (which may be increased later if necessary)
 128         int len = a.length;
 129         // 确定临时数组的长度, 如果低于默认值256的2倍, 则空间大小为原始数组a的长度乘以2, 否则为默认长度
 130         int tlen = (len < 2 * INITIAL_TMP_STORAGE_LENGTH) ?
 131             len >>> 1 : INITIAL_TMP_STORAGE_LENGTH;
 132         if (work == null || workLen < tlen || workBase + tlen > work.length) {
 133             @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
 134             T[] newArray = (T[])java.lang.reflect.Array.newInstance
 135                 (a.getClass().getComponentType(), tlen);
 136             tmp = newArray;
 137             tmpBase = 0;
 138             tmpLen = tlen;
 139         }
 140         else {
 141             // 当指定的work数组不为空, 且workLen大于计算出的tlen的长度, 并且work数组的有效长度大于tlen的长度时, 使用指定的临时数组
 142             tmp = work;
 143             tmpBase = workBase;
 144             tmpLen = workLen;
 145         }
 146 
 147         /*
 148          * Allocate runs-to-be-merged stack (which cannot be expanded).  The
 149          * stack length requirements are described in listsort.txt.  The C
 150          * version always uses the same stack length (85), but this was
 151          * measured to be too expensive when sorting "mid-sized" arrays (e.g.,
 152          * 100 elements) in Java.  Therefore, we use smaller (but sufficiently
 153          * large) stack lengths for smaller arrays.  The "magic numbers" in the
 154          * computation below must be changed if MIN_MERGE is decreased.  See
 155          * the MIN_MERGE declaration above for more information.
 156          * The maximum value of 49 allows for an array up to length
 157          * Integer.MAX_VALUE-4, if array is filled by the worst case stack size
 158          * increasing scenario. More explanations are given in section 4 of:
 159          * http://envisage-project.eu/wp-content/uploads/2015/02/sorting.pdf
 160          */
 161         int stackLen = (len <    120  ?  5 :
 162                         len <   1542  ? 10 :
 163                         len < 119151  ? 24 : 49);
 164         runBase = new int[stackLen];
 165         runLen = new int[stackLen];
 166     }
 167 
 168     /*
 169      * The next method (package private and static) constitutes the
 170      * entire API of this class.
 171      */
 172 
 173     /**
 174      * Sorts the given range, using the given workspace array slice
 175      * for temp storage when possible. This method is designed to be
 176      * invoked from public methods (in class Arrays) after performing
 177      * any necessary array bounds checks and expanding parameters into
 178      * the required forms.
 179      *
 180      * @param a the array to be sorted
 181      * @param lo the index of the first element, inclusive, to be sorted
 182      * @param hi the index of the last element, exclusive, to be sorted
 183      * @param c the comparator to use
 184      * @param work a workspace array (slice)
 185      * @param workBase origin of usable space in work array
 186      * @param workLen usable size of work array
 187      * @since 1.8
 188      */
 189     static <T> void sort(T[] a, int lo, int hi, Comparator<? super T> c,
 190                          T[] work, int workBase, int workLen) {
 191         assert c != null && a != null && lo >= 0 && lo <= hi && hi <= a.length;
 192 
 193         int nRemaining  = hi - lo;        //总共的待排序的元素个数
 194         if (nRemaining < 2)
 195             return;  // Arrays of size 0 and 1 are always sorted
 196 
 197         // If array is small, do a "mini-TimSort" with no merges
 198         // 当元素个数小于7时, 使用折半插入排序, 因为插入排序对于元素个数少的数组更快。
 199         if (nRemaining < MIN_MERGE) {
 200             // initRunLen是初始的有序的元素的个数,而从索引位置lo + initRunLen开始, 后面为乱序的元素。 一种优化方法, lo + initRunLen之前的不用排序了
 201             int initRunLen = countRunAndMakeAscending(a, lo, hi, c);
 202             // 折半插入排序
 203             binarySort(a, lo, hi, lo + initRunLen, c);
 204             return;
 205         }
 206 
 207         /**
 208          * March over the array once, left to right, finding natural runs,
 209          * extending short natural runs to minRun elements, and merging runs
 210          * to maintain stack invariant.
 211          */
 212          // 新建一个TimSort实例, 存储运行时的状态, 比如临时的run(一个run即一个有序的数组)
 213         TimSort<T> ts = new TimSort<>(a, c, work, workBase, workLen);
 214         // 查找一个minRun值, 小于minRun时使用折半插入排序,MIN_MERGE/2 <= minRun <= MIN_MERGE
 215         int minRun = minRunLength(nRemaining);
 216         do {
 217             // Identify next run
 218             // 找到下一个有序的数组的长度
 219             int runLen = countRunAndMakeAscending(a, lo, hi, c);
 220 
 221             // If run is short, extend to min(minRun, nRemaining)
 222             if (runLen < minRun) {
 223                 // 使用折半插入排序扩展数组, 使之达到minRun, 因为元素个数小于minRun, 折半插入排序更快速
 224                 int force = nRemaining <= minRun ? nRemaining : minRun;
 225                 binarySort(a, lo, lo + force, lo + runLen, c);
 226                 runLen = force;
 227             }
 228 
 229             // Push run onto pending-run stack, and maybe merge
 230             // 把这个有序数组压入栈中
 231             ts.pushRun(lo, runLen);
 232             /**
 233             * 判断当 
 234             *    runLen[i - 3] <= runLen[i - 2] + runLen[i - 1]
 235             *        且 runLen[i-3] < runLen[i-1]时
 236              *   或
 237             *    runLen[i - 2] <= runLen[i - 1]
 238             *    合并较小的两个有序数组, 以达到最大的平衡(即每个数组大小基本相同)
 239             */
 240             ts.mergeCollapse();
 241 
 242             // Advance to find next run
 243             lo += runLen;
 244             nRemaining -= runLen;
 245         } while (nRemaining != 0);
 246 
 247         // Merge all remaining runs to complete sort
 248         assert lo == hi;
 249         //合并剩余数组
 250         ts.mergeForceCollapse();
 251         assert ts.stackSize == 1;
 252     }
 253 
 254     /**
 255      * Sorts the specified portion of the specified array using a binary
 256      * insertion sort.  This is the best method for sorting small numbers
 257      * of elements.  It requires O(n log n) compares, but O(n^2) data
 258      * movement (worst case).
 259      *
 260      * If the initial part of the specified range is already sorted,
 261      * this method can take advantage of it: the method assumes that the
 262      * elements from index {@code lo}, inclusive, to {@code start},
 263      * exclusive are already sorted.
 264      *
 265      * @param a the array in which a range is to be sorted
 266      * @param lo the index of the first element in the range to be sorted
 267      * @param hi the index after the last element in the range to be sorted
 268      * @param start the index of the first element in the range that is
 269      *        not already known to be sorted ({@code lo <= start <= hi})
 270      * @param c comparator to used for the sort
 271      */
 272     @SuppressWarnings("fallthrough")
 273     private static <T> void binarySort(T[] a, int lo, int hi, int start,
 274                                        Comparator<? super T> c) {
 275         assert lo <= start && start <= hi;
 276         // start之前的有序元素直接略过
 277         if (start == lo)
 278             start++;
 279         // 从start到hi, 使用折半插入排序进行数组排序
 280         for ( ; start < hi; start++) {
 281             //待插入的元素
 282             T pivot = a[start];
 283 
 284             // Set left (and right) to the index where a[start] (pivot) belongs
 285             // 从left到right, 找到插入位置
 286             int left = lo;
 287             int right = start;
 288             assert left <= right;
 289             /*
 290              * Invariants:
 291              *   pivot >= all in [lo, left).
 292              *   pivot <  all in [right, start).
 293              */
 294             while (left < right) {
 295                 int mid = (left + right) >>> 1;
 296                 if (c.compare(pivot, a[mid]) < 0)
 297                     right = mid;
 298                 else
 299                     left = mid + 1;
 300             }
 301             // left即为最终的插入位置, 因为start>=lo && start <=hi, 所以最终一定会找到一个位置使得pivot>=a[mid], 因此最终一定是pivot >= right, 因此最终为left的位置, 即mid+1
 302             assert left == right;
 303 
 304             /*
 305              * The invariants still hold: pivot >= all in [lo, left) and
 306              * pivot < all in [left, start), so pivot belongs at left.  Note
 307              * that if there are elements equal to pivot, left points to the
 308              * first slot after them -- that's why this sort is stable.
 309              * Slide elements over to make room for pivot.
 310              */
 311             int n = start - left;  // The number of elements to move
 312             // Switch is just an optimization for arraycopy in default case
 313             switch (n) {
 314                 case 2:  a[left + 2] = a[left + 1]; // 如果待移动元素个数小于等于2则直接移动
 315                 case 1:  a[left + 1] = a[left];
 316                          break;
 317                 default: System.arraycopy(a, left, a, left + 1, n);  // 从left开始往后移, 然后把start位置的元素插入到原来的left的位置
 318             }
 319             a[left] = pivot;
 320         }
 321     }
 322 
 323     /**
 324      * Returns the length of the run beginning at the specified position in
 325      * the specified array and reverses the run if it is descending (ensuring
 326      * that the run will always be ascending when the method returns).
 327      *
 328      * A run is the longest ascending sequence with:
 329      *
 330      *    a[lo] <= a[lo + 1] <= a[lo + 2] <= ...
 331      *
 332      * or the longest descending sequence with:
 333      *
 334      *    a[lo] >  a[lo + 1] >  a[lo + 2] >  ...
 335      *
 336      * For its intended use in a stable mergesort, the strictness of the
 337      * definition of "descending" is needed so that the call can safely
 338      * reverse a descending sequence without violating stability.
 339      *
 340      * @param a the array in which a run is to be counted and possibly reversed
 341      * @param lo index of the first element in the run
 342      * @param hi index after the last element that may be contained in the run.
 343               It is required that {@code lo < hi}.
 344      * @param c the comparator to used for the sort
 345      * @return  the length of the run beginning at the specified position in
 346      *          the specified array
 347      */
 348     private static <T> int countRunAndMakeAscending(T[] a, int lo, int hi,
 349                                                     Comparator<? super T> c) {
 350         assert lo < hi;
 351         int runHi = lo + 1;
 352         if (runHi == hi)
 353             return 1;  // lo < hi, 且lo + 1 = hi, 因此是有序且升序的, 直接返回
 354 
 355         // Find end of run, and reverse range if descending
 356         if (c.compare(a[runHi++], a[lo]) < 0) { // Descending
 357             // 如果是降序的, 找到最长的有序降序序列的长度, 并且把序列倒置, 使之升序
 358             while (runHi < hi && c.compare(a[runHi], a[runHi - 1]) < 0)
 359                 runHi++;
 360             reverseRange(a, lo, runHi);
 361         } else {                              // Ascending
 362             // 如果是升序的, 同样找到最长的有序序列的长度
 363             while (runHi < hi && c.compare(a[runHi], a[runHi - 1]) >= 0)
 364                 runHi++;
 365         }
 366 
 367         // 返回有序序列的长度
 368         return runHi - lo;
 369     }
 370 
 371     /**
 372      * Reverse the specified range of the specified array.
 373      *
 374      * @param a the array in which a range is to be reversed
 375      * @param lo the index of the first element in the range to be reversed
 376      * @param hi the index after the last element in the range to be reversed
 377      */
 378     private static void reverseRange(Object[] a, int lo, int hi) {
 379         hi--;
 380         // 首尾倒置
 381         while (lo < hi) {
 382             Object t = a[lo];
 383             a[lo++] = a[hi];
 384             a[hi--] = t;
 385         }
 386     }
 387 
 388     /**
 389      * Returns the minimum acceptable run length for an array of the specified
 390      * length. Natural runs shorter than this will be extended with
 391      * {@link #binarySort}.
 392      *
 393      * Roughly speaking, the computation is:
 394      *
 395      *  If n < MIN_MERGE, return n (it's too small to bother with fancy stuff).
 396      *  Else if n is an exact power of 2, return MIN_MERGE/2.
 397      *  Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k
 398      *   is close to, but strictly less than, an exact power of 2.
 399      *
 400      * For the rationale, see listsort.txt.
 401      *
 402      * @param n the length of the array to be sorted
 403      * @return the length of the minimum run to be merged
 404      */
 405     private static int minRunLength(int n) {
 406         assert n >= 0;
 407         int r = 0;      // Becomes 1 if any 1 bits are shifted off
 408         while (n >= MIN_MERGE) {
 409             // n&1是判断n是能否被2整除, 如果不能被2整除, 最后一个Bit位一定是1,则1&1为1, r = r | 1 为1
 410             r |= (n & 1);
 411             n >>= 1;
 412         }
 413         return n + r;
 414     }
 415 
 416     /**
 417      * Pushes the specified run onto the pending-run stack.
 418      *
 419      * @param runBase index of the first element in the run
 420      * @param runLen  the number of elements in the run
 421      */
 422      // 把有序序列起始位置和长度放入栈中
 423     private void pushRun(int runBase, int runLen) {
 424         this.runBase[stackSize] = runBase;
 425         this.runLen[stackSize] = runLen;
 426         stackSize++;
 427     }
 428 
 429     /**
 430      * Examines the stack of runs waiting to be merged and merges adjacent runs
 431      * until the stack invariants are reestablished:
 432      *
 433      *     1. runLen[i - 3] > runLen[i - 2] + runLen[i - 1]
 434      *     2. runLen[i - 2] > runLen[i - 1]
 435      *
 436      * This method is called each time a new run is pushed onto the stack,
 437      * so the invariants are guaranteed to hold for i < stackSize upon
 438      * entry to the method.
 439      */
 440      // 判断合并栈顶的三个元素中较小的两个, 或如果第二个元素比第一个小, 则合并, 使栈中所有的序列大小达到近似相等
 441     private void mergeCollapse() {
 442         while (stackSize > 1) {
 443             int n = stackSize - 2;
 444             if (n > 0 && runLen[n-1] <= runLen[n] + runLen[n+1]) {
 445                 if (runLen[n - 1] < runLen[n + 1])
 446                     n--;
 447                 mergeAt(n);
 448             } else if (runLen[n] <= runLen[n + 1]) {
 449                 mergeAt(n);
 450             } else {
 451                 break; // Invariant is established
 452             }
 453         }
 454     }
 455 
 456     /**
 457      * Merges all runs on the stack until only one remains.  This method is
 458      * called once, to complete the sort.
 459      */
 460      // 最后合并栈中所有的序列, 直到最后只剩一个有序序列
 461     private void mergeForceCollapse() {
 462         while (stackSize > 1) {
 463             int n = stackSize - 2;
 464             // 如果runLen[n-1] < runLen[n+1], 则合并较小的较小的runBase[n-1]和runBase[n], 否则合并runBase[n]和runBase[n+1]
 465             if (n > 0 && runLen[n - 1] < runLen[n + 1])
 466                 n--;
 467             mergeAt(n);
 468         }
 469     }
 470 
 471     /**
 472      * Merges the two runs at stack indices i and i+1.  Run i must be
 473      * the penultimate or antepenultimate run on the stack.  In other words,
 474      * i must be equal to stackSize-2 or stackSize-3.
 475      *
 476      * @param i stack index of the first of the two runs to merge
 477      */
 478     private void mergeAt(int i) {
 479         assert stackSize >= 2;
 480         assert i >= 0;
 481         assert i == stackSize - 2 || i == stackSize - 3;
 482 
 483         int base1 = runBase[i];
 484         int len1 = runLen[i];
 485         int base2 = runBase[i + 1];
 486         int len2 = runLen[i + 1];
 487         assert len1 > 0 && len2 > 0;
 488         assert base1 + len1 == base2;
 489 
 490         /*
 491          * Record the length of the combined runs; if i is the 3rd-last
 492          * run now, also slide over the last run (which isn't involved
 493          * in this merge).  The current run (i+1) goes away in any case.
 494          */
 495         runLen[i] = len1 + len2;
 496         // 如果i是栈顶倒数第三个元素, 则最后i+1一定会合进i数组, 因此i+1的位置替换成i+2
 497         if (i == stackSize - 3) {
 498             runBase[i + 1] = runBase[i + 2];
 499             runLen[i + 1] = runLen[i + 2];
 500         }
 501         stackSize--;
 502 
 503         /*
 504          * Find where the first element of run2 goes in run1. Prior elements
 505          * in run1 can be ignored (because they're already in place).
 506          */
 507          // 找到run2的首元素在run1中的位置
 508         int k = gallopRight(a[base2], a, base1, len1, 0, c);
 509         assert k >= 0;
 510         // 忽略k之前的序列, 因为已经有序, 减少比较次数
 511         base1 += k;
 512         len1 -= k;
 513         if (len1 == 0)
 514             return;
 515 
 516         /*
 517          * Find where the last element of run1 goes in run2. Subsequent elements
 518          * in run2 can be ignored (because they're already in place).
 519          */
 520          // 找打run1的尾元素在run2中的位置
 521         len2 = gallopLeft(a[base1 + len1 - 1], a, base2, len2, len2 - 1, c);
 522         assert len2 >= 0;
 523         // len2 == 0, 说明run1和run2已经是一个整体有序的序列了, 直接返回。
 524         if (len2 == 0)
 525             return;
 526 
 527         // Merge remaining runs, using tmp array with min(len1, len2) elements
 528         if (len1 <= len2)
 529             mergeLo(base1, len1, base2, len2);
 530         else
 531             mergeHi(base1, len1, base2, len2);
 532     }
 533 
 534     /**
 535      * Locates the position at which to insert the specified key into the
 536      * specified sorted range; if the range contains an element equal to key,
 537      * returns the index of the leftmost equal element.
 538      *
 539      * @param key the key whose insertion point to search for
 540      * @param a the array in which to search
 541      * @param base the index of the first element in the range
 542      * @param len the length of the range; must be > 0
 543      * @param hint the index at which to begin the search, 0 <= hint < n.
 544      *     The closer hint is to the result, the faster this method will run.
 545      * @param c the comparator used to order the range, and to search
 546      * @return the int k,  0 <= k <= n such that a[b + k - 1] < key <= a[b + k],
 547      *    pretending that a[b - 1] is minus infinity and a[b + n] is infinity.
 548      *    In other words, key belongs at index b + k; or in other words,
 549      *    the first k elements of a should precede key, and the last n - k
 550      *    should follow it.
 551      */
 552     private static <T> int gallopLeft(T key, T[] a, int base, int len, int hint,
 553                                       Comparator<? super T> c) {
 554         assert len > 0 && hint >= 0 && hint < len;
 555         int lastOfs = 0;
 556         int ofs = 1;
 557         if (c.compare(key, a[base + hint]) > 0) {
 558             // 查找区间[base + hint, len], 查找使得a[base+hint+lastOfs] < key <= a[base+hint+ofs]成立的ofs值。
 559             int maxOfs = len - hint;
 560             // 向右查找, 最大可能Ofs为maxOfs-1, 即len - hint - 1, 即a[base + len - 1]
 561             while (ofs < maxOfs && c.compare(key, a[base + hint + ofs]) > 0) {
 562                 // 记录上一次ofs的值, 存到lastOfs中
 563                 lastOfs = ofs;
 564                 // ofs乘以2再加1
 565                 ofs = (ofs << 1) + 1;
 566                 // 整数溢出
 567                 if (ofs <= 0)
 568                     ofs = maxOfs;
 569             }
 570             
 571             // ofs最大值为maxOfs
 572             if (ofs > maxOfs)
 573                 ofs = maxOfs;
 574 
 575             // Make offsets relative to base
 576             // 之前的ofs和lastOfs都是相对于hint位置的, 现在把它重置为相对于base的位置
 577             lastOfs += hint;
 578             ofs += hint;
 579         } else { // key <= a[base + hint]
 580             // 从base+hint向前搜索,查找区间[base, base + hint], 直到找到ofs值使得a[base+hint-ofs] < key <= a[base+hint-lastOfs]
 581             // maxOfs为hint+1, 而ofs < maxOfs, 因此当ofs = maxOfs -1 时, 比较到的最左边的元素为a[base + hint - hint] == a[base]
 582             final int maxOfs = hint + 1;
 583             while (ofs < maxOfs && c.compare(key, a[base + hint - ofs]) <= 0) {
 584                 lastOfs = ofs;
 585                 // ofs乘以2再加1
 586                 ofs = (ofs << 1) + 1;
 587                 // 正整数溢出
 588                 if (ofs <= 0)
 589                     ofs = maxOfs;
 590             }
 591             // 最大为maxOfs
 592             if (ofs > maxOfs)
 593                 ofs = maxOfs;
 594 
 595             // 重置ofs和lastOfs为相对于base的位置索引
 596             int tmp = lastOfs;
 597             lastOfs = hint - ofs;
 598             ofs = hint - tmp;
 599         }
 600         assert -1 <= lastOfs && lastOfs < ofs && ofs <= len;
 601 
 602         /*
 603          * Now a[base+lastOfs] < key <= a[base+ofs], so key belongs somewhere
 604          * to the right of lastOfs but no farther right than ofs.  Do a binary
 605          * search, with invariant a[base + lastOfs - 1] < key <= a[base + ofs].
 606          */
 607         lastOfs++;
 608         // 查找准确位置
 609         while (lastOfs < ofs) {
 610             // 中间位置
 611             int m = lastOfs + ((ofs - lastOfs) >>> 1);
 612 
 613             if (c.compare(key, a[base + m]) > 0)
 614                 lastOfs = m + 1;  // a[base + m] < key
 615             else
 616                 ofs = m;          // key <= a[base + m]
 617         }
 618         assert lastOfs == ofs;    // so a[base + ofs - 1] < key <= a[base + ofs]
 619         return ofs;
 620     }
 621 
 622     /**
 623      * Like gallopLeft, except that if the range contains an element equal to
 624      * key, gallopRight returns the index after the rightmost equal element.
 625      *
 626      * @param key the key whose insertion point to search for
 627      * @param a the array in which to search
 628      * @param base the index of the first element in the range
 629      * @param len the length of the range; must be > 0
 630      * @param hint the index at which to begin the search, 0 <= hint < n.
 631      *     The closer hint is to the result, the faster this method will run.
 632      * @param c the comparator used to order the range, and to search
 633      * @return the int k,  0 <= k <= n such that a[b + k - 1] <= key < a[b + k]
 634      */
 635     private static <T> int gallopRight(T key, T[] a, int base, int len,
 636                                        int hint, Comparator<? super T> c) {
 637         assert len > 0 && hint >= 0 && hint < len;
 638 
 639         int ofs = 1;
 640         int lastOfs = 0;
 641         if (c.compare(key, a[base + hint]) < 0) {
 642             // 从base + hint位置向前搜索区间[base, base + hint], 使得a[b+hint - ofs] <= key < a[b+hint - lastOfs]
 643             int maxOfs = hint + 1;
 644             while (ofs < maxOfs && c.compare(key, a[base + hint - ofs]) < 0) {
 645                 // 记录上次查找位置
 646                 lastOfs = ofs;
 647                 // 乘以2 加1
 648                 ofs = (ofs << 1) + 1;
 649                 // 正整数溢出
 650                 if (ofs <= 0)
 651                     ofs = maxOfs;
 652             }
 653             // 最大为maxOfs
 654             if (ofs > maxOfs)
 655                 ofs = maxOfs;
 656 
 657             // 重置ofs和lastOfs为相对于base的位置索引
 658             int tmp = lastOfs;
 659             lastOfs = hint - ofs;
 660             ofs = hint - tmp;
 661         } else { // a[b + hint] <= key
 662             // 搜索区间[base + hint, base + len -1]使得a[b+hint + lastOfs] <= key < a[b+hint + ofs]
 663             int maxOfs = len - hint;
 664             while (ofs < maxOfs && c.compare(key, a[base + hint + ofs]) >= 0) {
 665                 lastOfs = ofs;
 666                 ofs = (ofs << 1) + 1;
 667                 // 正整数溢出
 668                 if (ofs <= 0)  
 669                     ofs = maxOfs;
 670             }
 671             if (ofs > maxOfs)
 672                 ofs = maxOfs;
 673 
 674             // 重置ofs和lastOfs为相对于base的位置索引
 675             lastOfs += hint;
 676             ofs += hint;
 677         }
 678         assert -1 <= lastOfs && lastOfs < ofs && ofs <= len;
 679 
 680         lastOfs++;
 681         // 查找key的准确位置
 682         while (lastOfs < ofs) {
 683             int m = lastOfs + ((ofs - lastOfs) >>> 1);
 684 
 685             if (c.compare(key, a[base + m]) < 0)
 686                 ofs = m;          // key < a[b + m]
 687             else
 688                 lastOfs = m + 1;  // a[b + m] <= key
 689         }
 690         // 最终会找到m使得k >= a[base + m], 而此时lastOfs == ofs且lastOfs = m +1, 则ofs = m +1, 因此a[base + ofs] > k >= a[base + ofs -1], ofs即m+1, ofs - 1即为m, 因此ofs位置的值大于key
 691         assert lastOfs == ofs;    // so a[b + ofs - 1] <= key < a[b + ofs]
 692         return ofs;
 693     }
 694 
 695     /**
 696     *基于以上gallopRight方法最后查找key的索引的解释, 因此run1的第一个元素一定大于run2的第一个元素,     *而run2中run1最后一个元素所在索引位置之后的值也被忽略掉, 因此run1的最后一个元素大于run2中的所有元素的值。*/
 697     
 698     
 699     /**
 700      * Merges two adjacent runs in place, in a stable fashion.  The first
 701      * element of the first run must be greater than the first element of the
 702      * second run (a[base1] > a[base2]), and the last element of the first run
 703      * (a[base1 + len1-1]) must be greater than all elements of the second run.
 704      *
 705      * For performance, this method should be called only when len1 <= len2;
 706      * its twin, mergeHi should be called if len1 >= len2.  (Either method
 707      * may be called if len1 == len2.)
 708      *
 709      * @param base1 index of first element in first run to be merged
 710      * @param len1  length of first run to be merged (must be > 0)
 711      * @param base2 index of first element in second run to be merged
 712      *        (must be aBase + aLen)
 713      * @param len2  length of second run to be merged (must be > 0)
 714      */
 715     private void mergeLo(int base1, int len1, int base2, int len2) {
 716         assert len1 > 0 && len2 > 0 && base1 + len1 == base2;
 717 
 718         // Copy first run into temp array
 719         T[] a = this.a; // For performance
 720         T[] tmp = ensureCapacity(len1);
 721         int cursor1 = tmpBase; // Indexes into tmp array
 722         int cursor2 = base2;   // Indexes int a
 723         // 目标位置从base1的索引开始, 因为base1在base2之前, 下面会将base1的内容放入临时数组, 这样run1中的内容就可以覆盖了
 724         int dest = base1;      // Indexes int a
 725         // 把第一个序列的内容放入临时数组tmp中
 726         System.arraycopy(a, base1, tmp, cursor1, len1);
 727 
 728         // Move first element of second run and deal with degenerate cases
 729         // 因为run1的第一个元素大于run2的第一个元素, 因此将run2的第一个元素先放进
 730         a[dest++] = a[cursor2++];
 731         // 如果run2只有一个元素, 则将run1中剩余的元素放入正确位置后返回
 732         if (--len2 == 0) {
 733             System.arraycopy(tmp, cursor1, a, dest, len1);
 734             return;
 735         }
 736         // 如果run1只有一个元素, 因为run1的最后一个元素大于run2中的所有元素, 因此先将run2中的元素放入正确位置, 然后将run1的唯一的一个元素放入最后一个位置, 然后返回
 737         if (len1 == 1) {
 738             System.arraycopy(a, cursor2, a, dest, len2);
 739             a[dest + len2] = tmp[cursor1]; // Last elt of run 1 to end of merge
 740             return;
 741         }
 742 
 743         Comparator<? super T> c = this.c;  // Use local variable for performance
 744         int minGallop = this.minGallop;    //  "    "       "     "      "
 745     outer:
 746         while (true) {
 747             int count1 = 0; // Number of times in a row that first run won
 748             int count2 = 0; // Number of times in a row that second run won
 749 
 750             /*
 751              * Do the straightforward thing until (if ever) one run starts
 752              * winning consistently.
 753              */
 754             do {
 755                 assert len1 > 1 && len2 > 0;
 756                 if (c.compare(a[cursor2], tmp[cursor1]) < 0) {
 757                     a[dest++] = a[cursor2++];
 758                     count2++;
 759                     count1 = 0;
 760                     if (--len2 == 0)
 761                         break outer;
 762                 } else {
 763                     a[dest++] = tmp[cursor1++];
 764                     count1++;
 765                     count2 = 0;
 766                     if (--len1 == 1)
 767                         break outer;
 768                 }
 769                 // 当每个序列中的连续放入目标位置的元素个数小于minGallop时, 这样分别拷贝就可以了
 770             } while ((count1 | count2) < minGallop);
 771 
 772             /*
 773              * One run is winning so consistently that galloping may be a
 774              * huge win. So try that, and continue galloping until (if ever)
 775              * neither run appears to be winning consistently anymore.
 776              */
 777              // 因为两个run序列的大小是近似相等的, 如果一个序列连续超过minGallop个数的元素被放入目标位置, 则另一个有接近大小的连续序列等待被放入正确位置,切换成Gallopping模式
 778             do {
 779                 assert len1 > 1 && len2 > 0;
 780                 //查找run1第一个大于run2中第一个元素的元素的位置索引
 781                 count1 = gallopRight(a[cursor2], tmp, cursor1, len1, 0, c);
 782                 if (count1 != 0) {
 783                     // run1中count1之前的元素全部放入目标序列
 784                     System.arraycopy(tmp, cursor1, a, dest, count1);
 785                     // 移动索引位置
 786                     dest += count1;
 787                     cursor1 += count1;
 788                     len1 -= count1;
 789                     if (len1 <= 1) // len1 == 1 || len1 == 0
 790                         break outer;
 791                 }
 792                 // 移动run2的第一个元素到目标序列中
 793                 a[dest++] = a[cursor2++];
 794                 // 如果run2中没有其他元素则跳出
 795                 if (--len2 == 0)
 796                     break outer;
 797 
 798                 // 查找run2中第一个小于等于run1当前元素的元素的位置索引
 799                 count2 = gallopLeft(tmp[cursor1], a, cursor2, len2, 0, c);
 800                 if (count2 != 0) {
 801                     // 拷贝count2之后的元素到目标序列
 802                     System.arraycopy(a, cursor2, a, dest, count2);
 803                     dest += count2;
 804                     cursor2 += count2;
 805                     len2 -= count2;
 806                     // run2中没有其他元素则跳出
 807                     if (len2 == 0)
 808                         break outer;
 809                 }
 810                 
 811                 // 此时run2中的第一个元素大于等于run1中的第一个元素, 拷贝run1中的第一个元素到目标序列
 812                 a[dest++] = tmp[cursor1++];
 813                 // 如果run1中只有一个元素则跳出
 814                 if (--len1 == 1)
 815                     break outer;
 816                 // 动态调整minGallop的值
 817                 minGallop--;
 818             } while (count1 >= MIN_GALLOP | count2 >= MIN_GALLOP);
 819             if (minGallop < 0)
 820                 minGallop = 0;
 821             // 调整minGallop的值, 使得在有序序列不多的情况下不用Gallopping模式
 822             minGallop += 2;  // Penalize for leaving gallop mode
 823         }  // End of "outer" loop
 824         this.minGallop = minGallop < 1 ? 1 : minGallop;  // Write back to field
 825 
 826         // run1中只有一个元素
 827         if (len1 == 1) {
 828             assert len2 > 0;
 829             System.arraycopy(a, cursor2, a, dest, len2);
 830             a[dest + len2] = tmp[cursor1]; //  Last elt of run 1 to end of merge
 831         } else if (len1 == 0) {
 832             // 因为run1中的最后一个元素大于run2中的所有元素, 因此这种情况不存在
 833             throw new IllegalArgumentException(
 834                 "Comparison method violates its general contract!");
 835         } else {
 836             // run2中已经没有元素
 837             assert len2 == 0;
 838             assert len1 > 1;
 839             System.arraycopy(tmp, cursor1, a, dest, len1);
 840         }
 841     }
 842 
 843     /**
 844      * Like mergeLo, except that this method should be called only if
 845      * len1 >= len2; mergeLo should be called if len1 <= len2.  (Either method
 846      * may be called if len1 == len2.)
 847      *
 848      * @param base1 index of first element in first run to be merged
 849      * @param len1  length of first run to be merged (must be > 0)
 850      * @param base2 index of first element in second run to be merged
 851      *        (must be aBase + aLen)
 852      * @param len2  length of second run to be merged (must be > 0)
 853      */
 854     private void mergeHi(int base1, int len1, int base2, int len2) {
 855         assert len1 > 0 && len2 > 0 && base1 + len1 == base2;
 856 
 857         // Copy second run into temp array
 858         T[] a = this.a; // For performance
 859         T[] tmp = ensureCapacity(len2);
 860         int tmpBase = this.tmpBase;
 861         // 将run2中的所有元素放入临时数组tmp中
 862         System.arraycopy(a, base2, tmp, tmpBase, len2);
 863 
 864         int cursor1 = base1 + len1 - 1;  // Indexes into a
 865         int cursor2 = tmpBase + len2 - 1; // Indexes into tmp array
 866         // 从后往前插入元素
 867         int dest = base2 + len2 - 1;     // Indexes into a
 868 
 869         // Move last element of first run and deal with degenerate cases
 870         // run1的最后一个元素导入目标位置
 871         a[dest--] = a[cursor1--];
 872         // 如果run1中只有一个元素, 将run2中的剩余元素放入目标位置(从后往前)
 873         if (--len1 == 0) {
 874             System.arraycopy(tmp, tmpBase, a, dest - (len2 - 1), len2);
 875             return;
 876         }
 877         if (len2 == 1) {
 878             // run2中只有一个元素, 因为run1的第一个元素大于run2的第一个元素, 因此, run2中唯一的一个元素小于run1中所有的元素, 因此将run1中的元素全部放入目标位置, 最后将唯一的run2中的一个元素放入第一个位置
 879             dest -= len1;
 880             cursor1 -= len1;
 881             System.arraycopy(a, cursor1 + 1, a, dest + 1, len1);
 882             a[dest] = tmp[cursor2];
 883             return;
 884         }
 885 
 886         Comparator<? super T> c = this.c;  // Use local variable for performance
 887         int minGallop = this.minGallop;    //  "    "       "     "      "
 888     outer:
 889         while (true) {
 890             int count1 = 0; // Number of times in a row that first run won
 891             int count2 = 0; // Number of times in a row that second run won
 892 
 893             /*
 894              * Do the straightforward thing until (if ever) one run
 895              * appears to win consistently.
 896              */
 897             do {
 898                 assert len1 > 0 && len2 > 1;
 899                 if (c.compare(tmp[cursor2], a[cursor1]) < 0) {
 900                        // 从后往前放入目标位置
 901                     a[dest--] = a[cursor1--];
 902                     count1++;
 903                     count2 = 0;
 904                     // run1中没有了元素
 905                     if (--len1 == 0)
 906                         break outer;
 907                 } else {
 908                     a[dest--] = tmp[cursor2--];
 909                     count2++;
 910                     count1 = 0;
 911                     // run2中只有一个剩余元素
 912                     if (--len2 == 1)
 913                         break outer;
 914                 }
 915             } while ((count1 | count2) < minGallop);
 916 
 917             /*
 918              * One run is winning so consistently that galloping may be a
 919              * huge win. So try that, and continue galloping until (if ever)
 920              * neither run appears to be winning consistently anymore.
 921              */
 922             do {
 923                 assert len1 > 0 && len2 > 1;
 924                 // 找到大于run2当前位置的元素的run1中元素, 因为是从后往前查找, 因此找到的位置比如k1,k1之后的所有元素大于run1, run2中的剩余所有元素
 925                 count1 = len1 - gallopRight(tmp[cursor2], a, base1, len1, len1 - 1, c);
 926                 if (count1 != 0) {
 927                     dest -= count1;
 928                     cursor1 -= count1;
 929                     len1 -= count1;
 930                     // 拷贝run1中的大的元素
 931                     System.arraycopy(a, cursor1 + 1, a, dest + 1, count1);
 932                     // run1中没有元素, 跳出
 933                     if (len1 == 0)
 934                         break outer;
 935                 }
 936                 // run2的当前元素拷贝到目标位置
 937                 a[dest--] = tmp[cursor2--];
 938                 if (--len2 == 1)
 939                     break outer;
 940 
 941                 // 找到run2中大于等于run1当前元素的元素的位置索引比如K2, 则k2之后的所有元素大于run1, run2中的剩余元素
 942                 count2 = len2 - gallopLeft(a[cursor1], tmp, tmpBase, len2, len2 - 1, c);
 943                 if (count2 != 0) {
 944                     dest -= count2;
 945                     cursor2 -= count2;
 946                     len2 -= count2;
 947                     System.arraycopy(tmp, cursor2 + 1, a, dest + 1, count2);
 948                     if (len2 <= 1)  // len2 == 1 || len2 == 0
 949                         break outer;
 950                 }
 951                 // 拷贝run1的当前元素到目标位置, 因为a[cursior1]大于等于run2中的剩余元素
 952                 a[dest--] = a[cursor1--];
 953                 if (--len1 == 0)
 954                     break outer;
 955                 minGallop--;
 956             } while (count1 >= MIN_GALLOP | count2 >= MIN_GALLOP);
 957             if (minGallop < 0)
 958                 minGallop = 0;
 959             minGallop += 2;  // Penalize for leaving gallop mode
 960         }  // End of "outer" loop
 961         this.minGallop = minGallop < 1 ? 1 : minGallop;  // Write back to field
 962 
 963         // run2中只有一个元素
 964         if (len2 == 1) {
 965             assert len1 > 0;
 966             dest -= len1;
 967             cursor1 -= len1;
 968             // 拷贝run1中的所有元素到目标位置
 969             System.arraycopy(a, cursor1 + 1, a, dest + 1, len1);
 970             // run2的最后一个元素放入第一个位置
 971             a[dest] = tmp[cursor2];  // Move first elt of run2 to front of merge
 972         } else if (len2 == 0) {
 973             // 因为run2的第一个元素小于run1, run2中的所有元素, 因此run2不可能最后为空
 974             throw new IllegalArgumentException(
 975                 "Comparison method violates its general contract!");
 976         } else {
 977             assert len1 == 0;
 978             assert len2 > 0;
 979             // 拷贝run2中的剩余元素
 980             System.arraycopy(tmp, tmpBase, a, dest - (len2 - 1), len2);
 981         }
 982     }
 983 
 984     /**
 985      * Ensures that the external array tmp has at least the specified
 986      * number of elements, increasing its size if necessary.  The size
 987      * increases exponentially to ensure amortized linear time complexity.
 988      *
 989      * @param minCapacity the minimum required capacity of the tmp array
 990      * @return tmp, whether or not it grew
 991      */
 992     private T[] ensureCapacity(int minCapacity) {
 993         if (tmpLen < minCapacity) {
 994             // Compute smallest power of 2 > minCapacity
 995             int newSize = minCapacity;
 996             newSize |= newSize >> 1;
 997             newSize |= newSize >> 2;
 998             newSize |= newSize >> 4;
 999             newSize |= newSize >> 8;
1000             newSize |= newSize >> 16;
1001             newSize++;
1002 
1003             if (newSize < 0) // Not bloody likely!
1004                 newSize = minCapacity;
1005             else
1006                 newSize = Math.min(newSize, a.length >>> 1);
1007 
1008             @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
1009             T[] newArray = (T[])java.lang.reflect.Array.newInstance
1010                 (a.getClass().getComponentType(), newSize);
1011             tmp = newArray;
1012             tmpLen = newSize;
1013             tmpBase = 0;
1014         }
1015         return tmp;
1016     }
1017 }
原文地址:https://www.cnblogs.com/helloz/p/11610660.html