KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > python > core > MergeState


1 // Copyright 2002 Finn Bock
2

3 package org.python.core;
4
5 /**
6  * The MergeState class is a java implementation of the sort created
7  * Tim Peters and added to CPython2.3.
8  *
9  * The algorithm is described in details in the file
10  * python/dist/src/Objects/listsort.txt in the CPython development CVS.
11  */

12 class MergeState {
13     /**
14      * The maximum number of entries in a MergeState's pending-runs stack.
15      * This is enough to sort arrays of size up to about
16      * 32 * phi ** MAX_MERGE_PENDING
17      * where phi ~= 1.618. 85 is ridiculously large enough, good for an
18      * array with 2**64 elements.
19      */

20     static final int MAX_MERGE_PENDING = 85;
21
22     /**
23      * If a run wins MIN_GALLOP times in a row, we switch to galloping mode,
24      * and stay there until both runs win less often than MIN_GALLOP
25      * consecutive times. See listsort.txt for more info.
26      */

27     static final int MIN_GALLOP = 8;
28
29     /**
30      * Initial temp array size
31      */

32     static final int MERGESTATE_TEMP_SIZE = 256;
33
34     private PyObject[] a = new PyObject[MERGESTATE_TEMP_SIZE];
35
36     private int[] base = new int[MAX_MERGE_PENDING];
37     private int[] len = new int[MAX_MERGE_PENDING];
38
39     private PyObject compare;
40     private PyObject[] data;
41     private int size;
42     private int n;
43
44     MergeState(PyObject[] data, int size, PyObject compare) {
45         this.data = data;
46         this.compare = compare;
47         this.size = size;
48         this.n = 0;
49     }
50
51     public void sort() {
52         int nremaining = size;
53         if (nremaining < 2) {
54             return;
55         }
56
57         int lo = 0;
58         int hi = nremaining;
59         int minrun = merge_compute_minrun(nremaining);
60
61         boolean[] descending = new boolean[1];
62         do {
63             /* Identify next run. */
64             int n = count_run(lo, hi, descending);
65             if (descending[0])
66                 reverse_slice(lo, lo + n);
67             /* If short, extend to min(minrun, nremaining). */
68             if (n < minrun) {
69                 int force = nremaining < minrun ? nremaining : minrun;
70                 binarysort(lo, lo + force, lo + n);
71                 n = force;
72             }
73             /* Push run onto pending-runs stack, and maybe merge. */
74             //ms.assert_(ms.n < ms.MAX_MERGE_PENDING);
75
base[this.n] = lo;
76             len[this.n] = n;
77             ++this.n;
78             merge_collapse();
79             /* Advance to find next run */
80             lo += n;
81             nremaining -= n;
82         } while (nremaining != 0);
83         //assert_(lo == hi);
84

85         merge_force_collapse();
86         //assert_(ms.n == 1);
87
//assert_(ms.base[0] == 0);
88
//assert_(ms.len[0] == size);
89
}
90
91     public void getmem(int need) {
92         if (need <= a.length)
93             return;
94         a = new PyObject[need];
95     }
96
97     int count_run(int lo, int hi, boolean[] descending) {
98         //assert_(lo < hi);
99
descending[0] = false;
100         ++lo;
101         if (lo == hi)
102             return 1;
103         int n = 2;
104         if (iflt(data[lo], data[lo-1])) {
105             descending[0] = true;
106             for (lo = lo + 1; lo < hi; ++lo, ++n) {
107                 if (! iflt(data[lo], data[lo-1]))
108                     break;
109             }
110         } else {
111             for (lo = lo + 1; lo < hi; ++lo, ++n) {
112                 if (iflt(data[lo], data[lo-1]))
113                     break;
114             }
115         }
116         return n;
117     }
118
119
120     void merge_lo(int pa, int na, int pb, int nb) {
121         //debug("merge_lo pa:" + pa + " na:" + na + " pb:" + pb + " nb:" + nb);
122
int cnt = nb + na;
123         int origpa = pa;
124         //dump_data("padata", pa, na);
125
//dump_data("pbdata", pb, nb);
126

127         //assert_(na > 0 && nb > 0 && pa + na == pb);
128
getmem(na);
129         System.arraycopy(data, pa, a, 0, na);
130         int dest = pa;
131         pa = 0;
132
133         data[dest++] = data[pb++];
134         --nb;
135         if (nb == 0)
136             return;
137         if (na == 1) {
138             // CopyB;
139
System.arraycopy(data, pb, data, dest, nb);
140             data[dest + nb] = a[pa];
141             return;
142         }
143
144         try {
145             PyObject compare = this.compare;
146             for (;;) {
147                 int acount = 0; /* # of time A won in a row */
148                 int bcount = 0; /* # of time B won in a row */
149
150                 /* Do the straightforward thing until (if ever) one run
151                  * appears to win consistently.
152                  */

153                 for (;;) {
154                     boolean k = iflt(data[pb], a[pa]);
155                     if (k) {
156                         data[dest++] = data[pb++];
157                         ++bcount;
158                         acount = 0;
159                         --nb;
160                         if (nb == 0)
161                             return;
162                         if (bcount >= MIN_GALLOP)
163                             break;
164                     } else {
165                         data[dest++] = a[pa++];
166                         ++acount;
167                         bcount = 0;
168                         --na;
169                         if (na == 1) {
170                             // CopyB;
171
System.arraycopy(data, pb, data, dest, nb);
172                             data[dest + nb] = a[pa];
173                             na = 0;
174                             return;
175                         }
176                         if (acount >= MIN_GALLOP)
177                             break;
178                     }
179                 }
180
181                 /* One run is winning so consistently that galloping may
182                  * be a huge win. So try that, and continue galloping until
183                  * (if ever) neither run appears to be winning consistently
184                  * anymore.
185                  */

186                 do {
187                     int k = gallop_right(data[pb], a, pa, na, 0);
188                     acount = k;
189                     if (k != 0) {
190                         System.arraycopy(a, pa, data, dest, k);
191                         dest += k;
192                         pa += k;
193                         na -= k;
194                         if (na == 1) {
195                             // CopyB
196
System.arraycopy(data, pb, data, dest, nb);
197                             data[dest + nb] = a[pa];
198                             na = 0;
199                             return;
200                         }
201                         /* na==0 is impossible now if the comparison
202                          * function is consistent, but we can't assume
203                          * that it is.
204                          */

205                         if (na == 0)
206                             return;
207                     }
208
209                     data[dest++] = data[pb++];
210                     --nb;
211                     if (nb == 0)
212                         return;
213
214                     k = gallop_left(a[pa], data, pb, nb, 0);
215                     bcount = k;
216                     if (k != 0) {
217                         System.arraycopy(data, pb, data, dest, k);
218                         dest += k;
219                         pb += k;
220                         nb -= k;
221                         if (nb == 0)
222                             return;
223                     }
224                     data[dest++] = a[pa++];
225                     --na;
226                     if (na == 1) {
227                         // CopyB;
228
System.arraycopy(data, pb, data, dest, nb);
229                         data[dest + nb] = a[pa];
230                         na = 0;
231                         return;
232                     }
233                 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
234             }
235         } finally {
236             if (na != 0)
237                 System.arraycopy(a, pa, data, dest, na);
238
239             //dump_data("result", origpa, cnt);
240
}
241     }
242
243
244
245     void merge_hi(int pa, int na, int pb, int nb) {
246         //debug("merge_hi pa:" + pa + " na:" + na + " pb:" + pb + " nb:" + nb);
247
int cnt = nb + na;
248         int origpa = pa;
249         //dump_data("padata", pa, na);
250
//dump_data("pbdata", pb, nb);
251

252         //assert_(na > 0 && nb > 0 && pa + na == pb);
253
getmem(nb);
254         int dest = pb + nb - 1;
255         int basea = pa;
256         System.arraycopy(data, pb, a, 0, nb);
257
258         pb = nb - 1;
259         pa += na - 1;
260
261         data[dest--] = data[pa--];
262         --na;
263         if (na == 0)
264             return;
265         if (nb == 1) {
266             // CopyA;
267
dest -= na;
268             pa -= na;
269             System.arraycopy(data, pa+1, data, dest+1, na);
270             data[dest] = a[pb];
271             nb = 0;
272             return;
273         }
274
275         try {
276             PyObject compare = this.compare;
277             for (;;) {
278                 int acount = 0; /* # of time A won in a row */
279                 int bcount = 0; /* # of time B won in a row */
280
281                 /* Do the straightforward thing until (if ever) one run
282                  * appears to win consistently.
283                  */

284                 for (;;) {
285                     boolean k = iflt(a[pb], data[pa]);
286                     if (k) {
287                         data[dest--] = data[pa--];
288                         ++acount;
289                         bcount = 0;
290                         --na;
291                         if (na == 0)
292                             return;
293                         if (acount >= MIN_GALLOP)
294                             break;
295                     } else {
296                         data[dest--] = a[pb--];
297                         ++bcount;
298                         acount = 0;
299                         --nb;
300                         if (nb == 1) {
301                             // CopyA
302
dest -= na;
303                             pa -= na;
304                             System.arraycopy(data, pa+1, data, dest+1, na);
305                             data[dest] = a[pb];
306                             nb = 0;
307                             return;
308                         }
309                         if (bcount >= MIN_GALLOP)
310                             break;
311                     }
312                 }
313
314                 /* One run is winning so consistently that galloping may
315                  * be a huge win. So try that, and continue galloping until
316                  * (if ever) neither run appears to be winning consistently
317                  * anymore.
318                  */

319                 do {
320                     int k = gallop_right(a[pb], data, basea, na, na-1);
321                     acount = k = na - k;
322                     if (k != 0) {
323                         dest -= k;
324                         pa -= k;
325                         System.arraycopy(data, pa+1, data, dest+1, k);
326                         na -= k;
327                         if (na == 0)
328                             return;
329                     }
330
331                     data[dest--] = a[pb--];
332                     --nb;
333                     if (nb == 1) {
334                         // CopyA
335
dest -= na;
336                         pa -= na;
337                         System.arraycopy(data, pa+1, data, dest+1, na);
338                         data[dest] = a[pb];
339                         nb = 0;
340                         return;
341                     }
342
343                     k = gallop_left(data[pa], a, 0, nb, nb-1);
344                     bcount = k = nb - k;
345                     if (k != 0) {
346                         dest -= k;
347                         pb -= k;
348                         System.arraycopy(a, pb+1, data, dest+1, k);
349                         nb -= k;
350                         if (nb == 1) {
351                             // CopyA
352
dest -= na;
353                             pa -= na;
354                             System.arraycopy(data, pa+1, data, dest+1, na);
355                             data[dest] = a[pb];
356                             nb = 0;
357                             return;
358                         }
359                         /* nb==0 is impossible now if the comparison
360                          * function is consistent, but we can't assume
361                          * that it is.
362                          */

363                         if (nb == 0)
364                             return;
365                     }
366                     data[dest--] = data[pa--];
367                     --na;
368                     if (na == 0)
369                         return;
370                 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
371             }
372         } finally {
373             if (nb != 0)
374                 System.arraycopy(a, 0, data, dest-(nb-1), nb);
375
376             //dump_data("result", origpa, cnt);
377
}
378     }
379
380
381
382    /*
383     Locate the proper position of key in a sorted vector; if the vector contains
384     an element equal to key, return the position immediately to the left of
385     the leftmost equal element. [gallop_right() does the same except returns
386     the position to the right of the rightmost equal element (if any).]
387     
388     "a" is a sorted vector with n elements, starting at a[0]. n must be > 0.
389     
390     "hint" is an index at which to begin the search, 0 <= hint < n. The closer
391     hint is to the final result, the faster this runs.
392     
393     The return value is the int k in 0..n such that
394     
395         a[k-1] < key <= a[k]
396     
397     pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW,
398     key belongs at index k; or, IOW, the first k elements of a should precede
399     key, and the last n-k should follow key.
400
401     Returns -1 on error. See listsort.txt for info on the method.
402     */

403
404     private int gallop_left(PyObject key, PyObject[] data, int a, int n,
405                             int hint)
406     {
407         //assert_(n > 0 && hint >= 0 && hint < n);
408
a += hint;
409         int ofs = 1;
410         int lastofs = 0;
411     
412         if (iflt(data[a], key)) {
413             /* a[hint] < key -- gallop right, until
414              * a[hint + lastofs] < key <= a[hint + ofs]
415              */

416             int maxofs = n - hint; // data[a + n - 1] is highest
417
while (ofs < maxofs) {
418                 if (iflt(data[a + ofs], key)) {
419                     lastofs = ofs;
420                     ofs = (ofs << 1) + 1;
421                     if (ofs <= 0) // int overflow
422
ofs = maxofs;
423                 } else {
424                     // key < data[a + hint + ofs]
425
break;
426                 }
427             }
428             if (ofs > maxofs)
429                 ofs = maxofs;
430             // Translate back to offsets relative to a.
431
lastofs += hint;
432             ofs += hint;
433         } else {
434             /* key <= a[hint] -- gallop left, until
435              * a[hint - ofs] < key <= a[hint - lastofs]
436              */

437             int maxofs = hint + 1; // data[a] is lowest
438
while (ofs < maxofs) {
439                 if (iflt(data[a - ofs], key))
440                     break;
441                 // key <= data[a + hint - ofs]
442
lastofs = ofs;
443                 ofs = (ofs << 1) + 1;
444                 if (ofs <= 0) // int overflow
445
ofs = maxofs;
446             }
447             if (ofs > maxofs)
448                 ofs = maxofs;
449             // Translate back to offsets relative to a.
450
int k = lastofs;
451             lastofs = hint - ofs;
452             ofs = hint - k;
453         }
454         a -= hint;
455         //assert_(-1 <= lastofs && lastofs < ofs && ofs <= n);
456
/* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
457          * right of lastofs but no farther right than ofs. Do a binary
458          * search, with invariant a[lastofs-1] < key <= a[ofs].
459          */

460         ++lastofs;
461         while (lastofs < ofs) {
462             int m = lastofs + ((ofs - lastofs) >> 1);
463             if (iflt(data[a + m], key))
464                 lastofs = m+1; // data[a + m] < key
465
else
466                 ofs = m; // key <= data[a + m]
467
}
468         //assert_(lastofs == ofs); // so data[a + ofs -1] < key <= data[a+ofs]
469
return ofs;
470     }
471
472
473     /*
474     * Exactly like gallop_left(), except that if key already exists in a[0:n],
475     * finds the position immediately to the right of the rightmost equal value.
476     *
477     * The return value is the int k in 0..n such that
478     * a[k-1] <= key < a[k]
479     * or -1 if error.
480     *
481     * The code duplication is massive, but this is enough different given that
482     * we're sticking to "<" comparisons that it's much harder to follow if
483     * written as one routine with yet another "left or right?" flag.
484     */

485
486     private int gallop_right(PyObject key, PyObject[] data, int a, int n,
487                              int hint)
488     {
489         //assert_(n > 0 && hint >= 0 && hint < n);
490
a += hint;
491         int lastofs = 0;
492         int ofs = 1;
493
494         if (iflt(key, data[a])) {
495             /* key < a[hint] -- gallop left, until
496              * a[hint - ofs] <= key < a[hint - lastofs]
497              */

498             int maxofs = hint + 1; /* data[a] is lowest */
499             while (ofs < maxofs) {
500                 if (iflt(key, data[a - ofs])) {
501                     lastofs = ofs;
502                     ofs = (ofs << 1) + 1;
503                     if (ofs <= 0) // int overflow
504
ofs = maxofs;
505                 } else {
506                     /* a[hint - ofs] <= key */
507                     break;
508                 }
509             }
510             if (ofs > maxofs)
511                 ofs = maxofs;
512             /* Translate back to positive offsets relative to &a[0]. */
513             int k = lastofs;
514             lastofs = hint - ofs;
515             ofs = hint - k;
516         } else {
517             /* a[hint] <= key -- gallop right, until
518              * a[hint + lastofs] <= key < a[hint + ofs]
519              */

520             int maxofs = n - hint; /* data[a + n - 1] is highest */
521             while (ofs < maxofs) {
522                 if (iflt(key, data[a + ofs]))
523                     break;
524                 /* a[hint + ofs] <= key */
525                 lastofs = ofs;
526                 ofs = (ofs << 1) + 1;
527                 if (ofs <= 0) /* int overflow */
528                     ofs = maxofs;
529             }
530             if (ofs > maxofs)
531                 ofs = maxofs;
532             /* Translate back to offsets relative to &a[0]. */
533             lastofs += hint;
534             ofs += hint;
535         }
536         a -= hint;
537
538         //assert_(-1 <= lastofs && lastofs < ofs && ofs <= n);
539

540         /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the
541          * right of lastofs but no farther right than ofs. Do a binary
542          * search, with invariant a[lastofs-1] <= key < a[ofs].
543          */

544         ++lastofs;
545         while (lastofs < ofs) {
546             int m = lastofs + ((ofs - lastofs) >> 1);
547             if (iflt(key, data[a + m]))
548                 ofs = m; // key < data[a + m]
549
else
550                 lastofs = m+1; // data[a + m] <= key
551
}
552         //assert_(lastofs == ofs); // so data[a + ofs -1] <= key < data[a+ofs]
553
return ofs;
554     }
555
556
557     void merge_at(int i) {
558         //assert_(n >= 2);
559
//assert_(i >= 0);
560
//assert_(i == n - 2 || i == n - 3);
561

562         int pa = base[i];
563         int pb = base[i+1];
564         int na = len[i];
565         int nb = len[i+1];
566
567         //assert_(na > 0 && nb > 0);
568
//assert_(pa + na == pb);
569

570         // Record the length of the combined runs; if i is the 3rd-last
571
// run now, also slide over the last run (which isn't involved
572
// in this merge). The current run i+1 goes away in any case.
573
if (i == n - 3) {
574             len[i+1] = len[i+2];
575             base[i+1] = base[i+2];
576         }
577         len[i] = na + nb;
578         --n;
579
580         // Where does b start in a? Elements in a before that can be
581
// ignored (already in place).
582
int k = gallop_right(data[pb], data, pa, na, 0);
583         pa += k;
584         na -= k;
585         if (na == 0)
586             return;
587         
588         // Where does a end in b? Elements in b after that can be
589
// ignored (already in place).
590
nb = gallop_left(data[pa + na - 1], data, pb, nb, nb-1);
591         if (nb == 0)
592             return;
593
594         // Merge what remains of the runs, using a temp array with
595
// min(na, nb) elements.
596
if (na <= nb)
597             merge_lo(pa, na, pb, nb);
598         else
599             merge_hi(pa, na, pb, nb);
600     }
601
602
603     /* Examine the stack of runs waiting to be merged, merging adjacent runs
604      * until the stack invariants are re-established:
605      *
606      * 1. len[-3] > len[-2] + len[-1]
607      * 2. len[-2] > len[-1]
608      *
609      * See listsort.txt for more info.
610      */

611     void merge_collapse() {
612         while (this.n > 1) {
613             int n = this.n - 2;
614             if (n > 0 && len[n-1] <= len[n] + len[n+1]) {
615                 if (len[n-1] < len[n+1])
616                     --n;
617                 merge_at(n);
618             } else if (len[n] <= len[n+1]) {
619                 merge_at(n);
620             } else {
621                 break;
622             }
623         }
624     }
625
626
627     /* Regardless of invariants, merge all runs on the stack until only one
628      * remains. This is used at the end of the mergesort.
629      *
630      * Returns 0 on success, -1 on error.
631      */

632     void merge_force_collapse() {
633         while (this.n > 1) {
634             int n = this.n - 2;
635             if (n > 0 && len[n-1] < len[n+1])
636                 --n;
637             merge_at(n);
638         }
639     }
640
641
642     /* Compute a good value for the minimum run length; natural runs shorter
643      * than this are boosted artificially via binary insertion.
644      *
645      * If n < 64, return n (it's too small to bother with fancy stuff).
646      * Else if n is an exact power of 2, return 32.
647      * Else return an int k, 32 <= k <= 64, such that n/k is close to, but
648      * strictly less than, an exact power of 2.
649      *
650      * See listsort.txt for more info.
651      */

652     int merge_compute_minrun(int n) {
653         int r = 0; // becomes 1 if any 1 bits are shifted off
654

655         //assert_(n >= 0);
656
while (n >= 64) {
657              r |= n & 1;
658              n >>= 1;
659         }
660         return n + r;
661     }
662
663
664     void assert_(boolean expr) {
665         if (!expr)
666             throw new RuntimeException JavaDoc("assert");
667     }
668
669
670    private boolean iflt(PyObject x, PyObject y) {
671         //assert_(x != null);
672
//assert_(y != null);
673

674         if (compare == null) {
675             /* NOTE: we rely on the fact here that the sorting algorithm
676                only ever checks whether k<0, i.e., whether x<y. So we
677                invoke the rich comparison function with _lt ('<'), and
678                return -1 when it returns true and 0 when it returns
679                false. */

680             return x._lt(y).__nonzero__();
681         }
682
683         PyObject ret = compare.__call__(x, y);
684
685         if (ret instanceof PyInteger) {
686             int v = ((PyInteger)ret).getValue();
687             return v < 0;
688         }
689         throw Py.TypeError("comparision function must return int");
690     }
691
692
693     void reverse_slice(int lo, int hi) {
694         --hi;
695         while (lo < hi) {
696             PyObject t = data[lo];
697             data[lo] = data[hi];
698             data[hi] = t;
699             ++lo;
700             --hi;
701         }
702     }
703
704
705
706     /*
707      * binarysort is the best method for sorting small arrays: it does
708      * few compares, but can do data movement quadratic in the number of
709      * elements.
710      * [lo, hi) is a contiguous slice of a list, and is sorted via
711      * binary insertion. This sort is stable.
712      * On entry, must have lo <= start <= hi, and that [lo, start) is already
713      * sorted (pass start == lo if you don't know!).
714      * If islt() complains return -1, else 0.
715      * Even in case of error, the output slice will be some permutation of
716      * the input (nothing is lost or duplicated).
717      */

718     void binarysort(int lo, int hi, int start) {
719         //debug("binarysort: lo:" + lo + " hi:" + hi + " start:" + start);
720
int p;
721
722         //assert_(lo <= start && start <= hi);
723
/* assert [lo, start) is sorted */
724         if (lo == start)
725                 ++start;
726         for (; start < hi; ++start) {
727             /* set l to where *start belongs */
728             int l = lo;
729             int r = start;
730             PyObject pivot = data[r];
731             // Invariants:
732
// pivot >= all in [lo, l).
733
// pivot < all in [r, start).
734
// The second is vacuously true at the start.
735
//assert_(l < r);
736
do {
737                 p = l + ((r - l) >> 1);
738                 if (iflt(pivot, data[p]))
739                     r = p;
740                 else
741                     l = p+1;
742             } while (l < r);
743             //assert_(l == r);
744
// The invariants still hold, so pivot >= all in [lo, l) and
745
// pivot < all in [l, start), so pivot belongs at l. Note
746
// that if there are elements equal to pivot, l points to the
747
// first slot after them -- that's why this sort is stable.
748
// Slide over to make room.
749
for (p = start; p > l; --p)
750                 data[p] = data[p - 1];
751             data[l] = pivot;
752         }
753         //dump_data("binsort", lo, hi - lo);
754
}
755
756
757     private void dump_data(String JavaDoc txt, int lo, int n) {
758 /*
759         System.out.print(txt + ":");
760         for (int i = 0; i < n; i++)
761             System.out.print(data[lo + i] + " ");
762         System.out.println();
763 */

764     }
765
766     private void debug(String JavaDoc str) {
767         //System.out.println(str);
768
}
769 }
770
Popular Tags