/[escript]/branches/intelc_win32/paso/src/qsort.c
ViewVC logotype

Contents of /branches/intelc_win32/paso/src/qsort.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 752 - (show annotations)
Mon Jun 26 02:25:41 2006 UTC (13 years ago) by woo409
File MIME type: text/plain
File size: 14408 byte(s)
+ Added a qsort.c file which contains a drop in replacement for qsort (call it as qsortG). This one appears to be a stable implementation and the test .msh files on windows have been set up to be the same as unix again except for the exponent digits (3 instead of 2).
With ALL the qsorts replaced with qsortG only two tests fail now on win32:
test_normal_onFunctionOnContactOne
test_normal_onFunctionOnContactZero

Both give wrong result errors.

I will check this same code on the altix (including the use of qsortG and see if Altix has the same problem.
1 /* qsort.c
2 * (c) 1998 Gareth McCaughan
3 *
4 * This is a drop-in replacement for the C library's |qsort()| routine.
5 *
6 * Features:
7 * - Median-of-three pivoting (and more)
8 * - Truncation and final polishing by a single insertion sort
9 * - Early truncation when no swaps needed in pivoting step
10 * - Explicit recursion, guaranteed not to overflow
11 * - A few little wrinkles stolen from the GNU |qsort()|.
12 * - separate code for non-aligned / aligned / word-size objects
13 *
14 * This code may be reproduced freely provided
15 * - this file is retained unaltered apart from minor
16 * changes for portability and efficiency
17 * - no changes are made to this comment
18 * - any changes that *are* made are clearly flagged
19 * - the _ID string below is altered by inserting, after
20 * the date, the string " altered" followed at your option
21 * by other material. (Exceptions: you may change the name
22 * of the exported routine without changing the ID string.
23 * You may change the values of the macros TRUNC_* and
24 * PIVOT_THRESHOLD without changing the ID string, provided
25 * they remain constants with TRUNC_nonaligned, TRUNC_aligned
26 * and TRUNC_words/WORD_BYTES between 8 and 24, and
27 * PIVOT_THRESHOLD between 32 and 200.)
28 *
29 * You may use it in anything you like; you may make money
30 * out of it; you may distribute it in object form or as
31 * part of an executable without including source code;
32 * you don't have to credit me. (But it would be nice if
33 * you did.)
34 *
35 * If you find problems with this code, or find ways of
36 * making it significantly faster, please let me know!
37 * My e-mail address, valid as of early 1998 and certainly
38 * OK for at least the next 18 months, is
39 * gjm11@dpmms.cam.ac.uk
40 * Thanks!
41 *
42 * Gareth McCaughan Peterhouse Cambridge 1998
43 */
44
45 #include <assert.h>
46 #include <stdlib.h>
47 #include <string.h>
48
49 static char _ID[]="<qsort.c gjm 1.12 1998-03-19>";
50
51 /* How many bytes are there per word? (Must be a power of 2,
52 * and must in fact equal sizeof(int).)
53 */
54 #define WORD_BYTES sizeof(int)
55
56 /* How big does our stack need to be? Answer: one entry per
57 * bit in a |size_t|.
58 */
59 #define STACK_SIZE (8*sizeof(size_t))
60
61 /* Different situations have slightly different requirements,
62 * and we make life epsilon easier by using different truncation
63 * points for the three different cases.
64 * So far, I have tuned TRUNC_words and guessed that the same
65 * value might work well for the other two cases. Of course
66 * what works well on my machine might work badly on yours.
67 */
68 #define TRUNC_nonaligned 12
69 #define TRUNC_aligned 12
70 #define TRUNC_words 12*WORD_BYTES /* nb different meaning */
71
72 /* We use a simple pivoting algorithm for shortish sub-arrays
73 * and a more complicated one for larger ones. The threshold
74 * is PIVOT_THRESHOLD.
75 */
76 #define PIVOT_THRESHOLD 40
77
78 typedef struct { char * first; char * last; } stack_entry;
79 #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
80 #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
81 #define doLeft {first=ffirst;llast=last;continue;}
82 #define doRight {ffirst=first;last=llast;continue;}
83 #define pop {if (--stacktop<0) break;\
84 first=ffirst=stack[stacktop].first;\
85 last=llast=stack[stacktop].last;\
86 continue;}
87
88 /* Some comments on the implementation.
89 * 1. When we finish partitioning the array into "low"
90 * and "high", we forget entirely about short subarrays,
91 * because they'll be done later by insertion sort.
92 * Doing lots of little insertion sorts might be a win
93 * on large datasets for locality-of-reference reasons,
94 * but it makes the code much nastier and increases
95 * bookkeeping overhead.
96 * 2. We always save the shorter and get to work on the
97 * longer. This guarantees that every time we push
98 * an item onto the stack its size is <= 1/2 of that
99 * of its parent; so the stack can't need more than
100 * log_2(max-array-size) entries.
101 * 3. We choose a pivot by looking at the first, last
102 * and middle elements. We arrange them into order
103 * because it's easy to do that in conjunction with
104 * choosing the pivot, and it makes things a little
105 * easier in the partitioning step. Anyway, the pivot
106 * is the middle of these three. It's still possible
107 * to construct datasets where the algorithm takes
108 * time of order n^2, but it simply never happens in
109 * practice.
110 * 3' Newsflash: On further investigation I find that
111 * it's easy to construct datasets where median-of-3
112 * simply isn't good enough. So on large-ish subarrays
113 * we do a more sophisticated pivoting: we take three
114 * sets of 3 elements, find their medians, and then
115 * take the median of those.
116 * 4. We copy the pivot element to a separate place
117 * because that way we can always do our comparisons
118 * directly against a pointer to that separate place,
119 * and don't have to wonder "did we move the pivot
120 * element?". This makes the inner loop better.
121 * 5. It's possible to make the pivoting even more
122 * reliable by looking at more candidates when n
123 * is larger. (Taking this to its logical conclusion
124 * results in a variant of quicksort that doesn't
125 * have that n^2 worst case.) However, the overhead
126 * from the extra bookkeeping means that it's just
127 * not worth while.
128 * 6. This is pretty clean and portable code. Here are
129 * all the potential portability pitfalls and problems
130 * I know of:
131 * - In one place (the insertion sort) I construct
132 * a pointer that points just past the end of the
133 * supplied array, and assume that (a) it won't
134 * compare equal to any pointer within the array,
135 * and (b) it will compare equal to a pointer
136 * obtained by stepping off the end of the array.
137 * These might fail on some segmented architectures.
138 * - I assume that there are 8 bits in a |char| when
139 * computing the size of stack needed. This would
140 * fail on machines with 9-bit or 16-bit bytes.
141 * - I assume that if |((int)base&(sizeof(int)-1))==0|
142 * and |(size&(sizeof(int)-1))==0| then it's safe to
143 * get at array elements via |int*|s, and that if
144 * actually |size==sizeof(int)| as well then it's
145 * safe to treat the elements as |int|s. This might
146 * fail on systems that convert pointers to integers
147 * in non-standard ways.
148 * - I assume that |8*sizeof(size_t)<=INT_MAX|. This
149 * would be false on a machine with 8-bit |char|s,
150 * 16-bit |int|s and 4096-bit |size_t|s. :-)
151 */
152
153 /* The recursion logic is the same in each case: */
154 #define Recurse(Trunc) \
155 { size_t l=last-ffirst,r=llast-first; \
156 if (l<Trunc) { \
157 if (r>=Trunc) doRight \
158 else pop \
159 } \
160 else if (l<=r) { pushLeft; doRight } \
161 else if (r>=Trunc) { pushRight; doLeft }\
162 else doLeft \
163 }
164
165 /* and so is the pivoting logic: */
166 #define Pivot(swapper,sz) \
167 if (last-first>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
168 else { \
169 if (compare(first,mid)<0) { \
170 if (compare(mid,last)>0) { \
171 swapper(mid,last); \
172 if (compare(first,mid)>0) swapper(first,mid);\
173 } \
174 } \
175 else { \
176 if (compare(mid,last)>0) swapper(first,last)\
177 else { \
178 swapper(first,mid); \
179 if (compare(mid,last)>0) swapper(mid,last);\
180 } \
181 } \
182 first+=sz; last-=sz; \
183 }
184
185 #ifdef DEBUG_QSORT
186 #include <stdio.h>
187 #endif
188
189 /* and so is the partitioning logic: */
190 #define Partition(swapper,sz) { \
191 int swapped=0; \
192 do { \
193 while (compare(first,pivot)<0) first+=sz; \
194 while (compare(pivot,last)<0) last-=sz; \
195 if (first<last) { \
196 swapper(first,last); swapped=1; \
197 first+=sz; last-=sz; } \
198 else if (first==last) { first+=sz; last-=sz; break; }\
199 } while (first<=last); \
200 if (!swapped) pop \
201 }
202
203 /* and so is the pre-insertion-sort operation of putting
204 * the smallest element into place as a sentinel.
205 * Doing this makes the inner loop nicer. I got this
206 * idea from the GNU implementation of qsort().
207 */
208 #define PreInsertion(swapper,limit,sz) \
209 first=base; \
210 last=first + (nmemb>limit ? limit : nmemb-1)*sz;\
211 while (last!=base) { \
212 if (compare(first,last)>0) first=last; \
213 last-=sz; } \
214 if (first!=base) swapper(first,(char*)base);
215
216 /* and so is the insertion sort, in the first two cases: */
217 #define Insertion(swapper) \
218 last=((char*)base)+nmemb*size; \
219 for (first=((char*)base)+size;first!=last;first+=size) { \
220 char *test; \
221 /* Find the right place for |first|. \
222 * My apologies for var reuse. */ \
223 for (test=first-size;compare(test,first)>0;test-=size) ; \
224 test+=size; \
225 if (test!=first) { \
226 /* Shift everything in [test,first) \
227 * up by one, and place |first| \
228 * where |test| is. */ \
229 memcpy(pivot,first,size); \
230 memmove(test+size,test,first-test); \
231 memcpy(test,pivot,size); \
232 } \
233 }
234
235 #define SWAP_nonaligned(a,b) { \
236 register char *aa=(a),*bb=(b); \
237 register size_t sz=size; \
238 do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
239
240 #define SWAP_aligned(a,b) { \
241 register int *aa=(int*)(a),*bb=(int*)(b); \
242 register size_t sz=size; \
243 do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
244
245 #define SWAP_words(a,b) { \
246 register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
247
248 /* ---------------------------------------------------------------------- */
249
250 static char * pivot_big(char *first, char *mid, char *last, size_t size,
251 int compare(const void *, const void *)) {
252 int d=(((last-first)/size)>>3)*size;
253 char *m1,*m2,*m3;
254 { char *a=first, *b=first+d, *c=first+2*d;
255 #ifdef DEBUG_QSORT
256 fprintf(stderr,"< %d %d %d\n",*(int*)a,*(int*)b,*(int*)c);
257 #endif
258 m1 = compare(a,b)<0 ?
259 (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
260 : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
261 }
262 { char *a=mid-d, *b=mid, *c=mid+d;
263 #ifdef DEBUG_QSORT
264 fprintf(stderr,". %d %d %d\n",*(int*)a,*(int*)b,*(int*)c);
265 #endif
266 m2 = compare(a,b)<0 ?
267 (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
268 : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
269 }
270 { char *a=last-2*d, *b=last-d, *c=last;
271 #ifdef DEBUG_QSORT
272 fprintf(stderr,"> %d %d %d\n",*(int*)a,*(int*)b,*(int*)c);
273 #endif
274 m3 = compare(a,b)<0 ?
275 (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
276 : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
277 }
278 #ifdef DEBUG_QSORT
279 fprintf(stderr,"-> %d %d %d\n",*(int*)m1,*(int*)m2,*(int*)m3);
280 #endif
281 return compare(m1,m2)<0 ?
282 (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1))
283 : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2));
284 }
285
286 /* ---------------------------------------------------------------------- */
287
288 static void qsort_nonaligned(void *base, size_t nmemb, size_t size,
289 int (*compare)(const void *, const void *)) {
290
291 stack_entry stack[STACK_SIZE];
292 int stacktop=0;
293 char *first,*last;
294 char *pivot=malloc(size);
295 size_t trunc=TRUNC_nonaligned*size;
296 assert(pivot!=0);
297
298 first=(char*)base; last=first+(nmemb-1)*size;
299
300 if (last-first>trunc) {
301 char *ffirst=first, *llast=last;
302 while (1) {
303 /* Select pivot */
304 { char * mid=first+size*((last-first)/size >> 1);
305 Pivot(SWAP_nonaligned,size);
306 memcpy(pivot,mid,size);
307 }
308 /* Partition. */
309 Partition(SWAP_nonaligned,size);
310 /* Prepare to recurse/iterate. */
311 Recurse(trunc)
312 }
313 }
314 PreInsertion(SWAP_nonaligned,TRUNC_nonaligned,size);
315 Insertion(SWAP_nonaligned);
316 free(pivot);
317 }
318
319 static void qsort_aligned(void *base, size_t nmemb, size_t size,
320 int (*compare)(const void *, const void *)) {
321
322 stack_entry stack[STACK_SIZE];
323 int stacktop=0;
324 char *first,*last;
325 char *pivot=malloc(size);
326 size_t trunc=TRUNC_aligned*size;
327 assert(pivot!=0);
328
329 first=(char*)base; last=first+(nmemb-1)*size;
330
331 if (last-first>trunc) {
332 char *ffirst=first,*llast=last;
333 while (1) {
334 /* Select pivot */
335 { char * mid=first+size*((last-first)/size >> 1);
336 Pivot(SWAP_aligned,size);
337 memcpy(pivot,mid,size);
338 }
339 /* Partition. */
340 Partition(SWAP_aligned,size);
341 /* Prepare to recurse/iterate. */
342 Recurse(trunc)
343 }
344 }
345 PreInsertion(SWAP_aligned,TRUNC_aligned,size);
346 Insertion(SWAP_aligned);
347 free(pivot);
348 }
349
350 static void qsort_words(void *base, size_t nmemb,
351 int (*compare)(const void *, const void *)) {
352
353 stack_entry stack[STACK_SIZE];
354 int stacktop=0;
355 char *first,*last;
356 char *pivot=malloc(WORD_BYTES);
357 assert(pivot!=0);
358
359 first=(char*)base; last=first+(nmemb-1)*WORD_BYTES;
360
361 if (last-first>TRUNC_words) {
362 char *ffirst=first, *llast=last;
363 while (1) {
364 #ifdef DEBUG_QSORT
365 fprintf(stderr,"Doing %d:%d: ",
366 (first-(char*)base)/WORD_BYTES,
367 (last-(char*)base)/WORD_BYTES);
368 #endif
369 /* Select pivot */
370 { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES));
371 Pivot(SWAP_words,WORD_BYTES);
372 *(int*)pivot=*(int*)mid;
373 }
374 #ifdef DEBUG_QSORT
375 fprintf(stderr,"pivot=%d\n",*(int*)pivot);
376 #endif
377 /* Partition. */
378 Partition(SWAP_words,WORD_BYTES);
379 /* Prepare to recurse/iterate. */
380 Recurse(TRUNC_words)
381 }
382 }
383 PreInsertion(SWAP_words,(TRUNC_words/WORD_BYTES),WORD_BYTES);
384 /* Now do insertion sort. */
385 last=((char*)base)+nmemb*WORD_BYTES;
386 for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) {
387 /* Find the right place for |first|. My apologies for var reuse */
388 int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first;
389 *(int*)pivot=*(int*)first;
390 for (;compare(pl,pivot)>0;pr=pl,--pl) {
391 *pr=*pl; }
392 if (pr!=(int*)first) *pr=*(int*)pivot;
393 }
394 free(pivot);
395 }
396
397 /* ---------------------------------------------------------------------- */
398
399 extern void qsortG(void *base, size_t nmemb, size_t size,
400 int (*compare)(const void *, const void *)) {
401
402 if (nmemb<=1) return;
403 if (((int)base|size)&(WORD_BYTES-1))
404 qsort_nonaligned(base,nmemb,size,compare);
405 else if (size!=WORD_BYTES)
406 qsort_aligned(base,nmemb,size,compare);
407 else
408 qsort_words(base,nmemb,compare);
409 }

  ViewVC Help
Powered by ViewVC 1.1.26