/*1Simple DirectMedia Layer2Copyright (C) 1997-2025 Sam Lantinga <[email protected]>34This software is provided 'as-is', without any express or implied5warranty. In no event will the authors be held liable for any damages6arising from the use of this software.78Permission is granted to anyone to use this software for any purpose,9including commercial applications, and to alter it and redistribute it10freely, subject to the following restrictions:11121. The origin of this software must not be misrepresented; you must not13claim that you wrote the original software. If you use this software14in a product, an acknowledgment in the product documentation would be15appreciated but is not required.162. Altered source versions must be plainly marked as such, and must not be17misrepresented as being the original software.183. This notice may not be removed or altered from any source distribution.19*/20#include "SDL_internal.h"2122// SDL3 always uses its own internal qsort implementation, below, so23// it can guarantee stable sorts across platforms and not have to24// tapdance to support the various qsort_r interfaces, or bridge from25// the C runtime's non-SDLCALL compare functions.2627#ifdef assert28#undef assert29#endif30#define assert SDL_assert31#ifdef malloc32#undef malloc33#endif34#define malloc SDL_malloc35#ifdef free36#undef free37#endif38#define free SDL_free39#ifdef memcpy40#undef memcpy41#endif42#define memcpy SDL_memcpy43#ifdef memmove44#undef memmove45#endif46#define memmove SDL_memmove4748/*49This code came from Gareth McCaughan, under the zlib license.50Specifically this: https://www.mccaughan.org.uk/software/qsort.c-1.165152Everything below this comment until the HAVE_QSORT #endif was from Gareth53(any minor changes will be noted inline).5455Thank you to Gareth for relicensing this code under the zlib license for our56benefit!5758Update for SDL3: we have modified this from a qsort function to qsort_r.5960--ryan.61*/6263/* This is a drop-in replacement for the C library's |qsort()| routine.64*65* It is intended for use where you know or suspect that your66* platform's qsort is bad. If that isn't the case, then you67* should probably use the qsort your system gives you in preference68* to mine -- it will likely have been tested and tuned better.69*70* Features:71* - Median-of-three pivoting (and more)72* - Truncation and final polishing by a single insertion sort73* - Early truncation when no swaps needed in pivoting step74* - Explicit recursion, guaranteed not to overflow75* - A few little wrinkles stolen from the GNU |qsort()|.76* (For the avoidance of doubt, no code was stolen, only77* broad ideas.)78* - separate code for non-aligned / aligned / word-size objects79*80* Earlier releases of this code used an idiosyncratic licence81* I wrote myself, because I'm an idiot. The code is now released82* under the "zlib/libpng licence"; you will find the actual83* terms in the next comment. I request (but do not require)84* that if you make any changes beyond the name of the exported85* routine and reasonable tweaks to the TRUNC_* and86* PIVOT_THRESHOLD values, you modify the _ID string so as87* to make it clear that you have changed the code.88*89* If you find problems with this code, or find ways of90* making it significantly faster, please let me know!91* My e-mail address, valid as of early 2016 and for the92* foreseeable future, is93* [email protected]94* Thanks!95*96* Gareth McCaughan97*/9899/* Copyright (c) 1998-2021 Gareth McCaughan100*101* This software is provided 'as-is', without any express or implied102* warranty. In no event will the authors be held liable for any103* damages arising from the use of this software.104*105* Permission is granted to anyone to use this software for any purpose,106* including commercial applications, and to alter it and redistribute it107* freely, subject to the following restrictions:108*109* 1. The origin of this software must not be misrepresented;110* you must not claim that you wrote the original software.111* If you use this software in a product, an acknowledgment112* in the product documentation would be appreciated but113* is not required.114*115* 2. Altered source versions must be plainly marked as such,116* and must not be misrepresented as being the original software.117*118* 3. This notice may not be removed or altered from any source119* distribution.120*/121122/* Revision history since release:123* 1998-03-19 v1.12 First release I have any records of.124* 2007-09-02 v1.13 Fix bug kindly reported by Dan Bodoh125* (premature termination of recursion).126* Add a few clarifying comments.127* Minor improvements to debug output.128* 2016-02-21 v1.14 Replace licence with 2-clause BSD,129* and clarify a couple of things in130* comments. No code changes.131* 2016-03-10 v1.15 Fix bug kindly reported by Ryan Gordon132* (pre-insertion-sort messed up).133* Disable DEBUG_QSORT by default.134* Tweak comments very slightly.135* 2021-02-20 v1.16 Fix bug kindly reported by Ray Gardner136* (error in recursion leading to possible137* stack overflow).138* When checking alignment, avoid casting139* pointer to possibly-smaller integer.140*/141142/* BEGIN SDL CHANGE ... commented this out with an #if 0 block. --ryan. */143#if 0144#include <assert.h>145#include <stdint.h>146#include <stdlib.h>147#include <string.h>148149#undef DEBUG_QSORT150151static char _ID[]="<qsort.c gjm WITH CHANGES FOR SDL3 1.16 2021-02-20>";152#endif153/* END SDL CHANGE ... commented this out with an #if 0 block. --ryan. */154155/* How many bytes are there per word? (Must be a power of 2,156* and must in fact equal sizeof(int).)157*/158#define WORD_BYTES sizeof(int)159160/* How big does our stack need to be? Answer: one entry per161* bit in a |size_t|. (Actually, a bit less because we don't162* recurse all the way down to size-1 subarrays.)163*/164#define STACK_SIZE (8*sizeof(size_t))165166/* Different situations have slightly different requirements,167* and we make life epsilon easier by using different truncation168* points for the three different cases.169* So far, I have tuned TRUNC_words and guessed that the same170* value might work well for the other two cases. Of course171* what works well on my machine might work badly on yours.172*/173#define TRUNC_nonaligned 12174#define TRUNC_aligned 12175#define TRUNC_words 12*WORD_BYTES /* nb different meaning */176177/* We use a simple pivoting algorithm for shortish sub-arrays178* and a more complicated one for larger ones. The threshold179* is PIVOT_THRESHOLD.180*/181#define PIVOT_THRESHOLD 40182183typedef struct { char * first; char * last; } stack_entry;184#define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}185#define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}186#define doLeft {first=ffirst;llast=last;continue;}187#define doRight {ffirst=first;last=llast;continue;}188#define pop {if (--stacktop<0) break;\189first=ffirst=stack[stacktop].first;\190last=llast=stack[stacktop].last;\191continue;}192193/* Some comments on the implementation.194* 1. When we finish partitioning the array into "low"195* and "high", we forget entirely about short subarrays,196* because they'll be done later by insertion sort.197* Doing lots of little insertion sorts might be a win198* on large datasets for locality-of-reference reasons,199* but it makes the code much nastier and increases200* bookkeeping overhead.201* 2. We always save the longer and get to work on the202* shorter. This guarantees that whenever we push203* a k'th entry onto the stack we are about to get204* working on something of size <= N/2^k where N is205* the original array size; so the stack can't need206* more than log_2(max-array-size) entries.207* 3. We choose a pivot by looking at the first, last208* and middle elements. We arrange them into order209* because it's easy to do that in conjunction with210* choosing the pivot, and it makes things a little211* easier in the partitioning step. Anyway, the pivot212* is the middle of these three. It's still possible213* to construct datasets where the algorithm takes214* time of order n^2, but it simply never happens in215* practice.216* 3' Newsflash: On further investigation I find that217* it's easy to construct datasets where median-of-3218* simply isn't good enough. So on large-ish subarrays219* we do a more sophisticated pivoting: we take three220* sets of 3 elements, find their medians, and then221* take the median of those.222* 4. We copy the pivot element to a separate place223* because that way we can always do our comparisons224* directly against a pointer to that separate place,225* and don't have to wonder "did we move the pivot226* element?". This makes the inner loop better.227* 5. It's possible to make the pivoting even more228* reliable by looking at more candidates when n229* is larger. (Taking this to its logical conclusion230* results in a variant of quicksort that doesn't231* have that n^2 worst case.) However, the overhead232* from the extra bookkeeping means that it's just233* not worth while.234* 6. This is pretty clean and portable code. Here are235* all the potential portability pitfalls and problems236* I know of:237* - In one place (the insertion sort) I construct238* a pointer that points just past the end of the239* supplied array, and assume that (a) it won't240* compare equal to any pointer within the array,241* and (b) it will compare equal to a pointer242* obtained by stepping off the end of the array.243* These might fail on some segmented architectures.244* - I assume that there are 8 bits in a |char| when245* computing the size of stack needed. This would246* fail on machines with 9-bit or 16-bit bytes.247* - I assume that if |((int)base&(sizeof(int)-1))==0|248* and |(size&(sizeof(int)-1))==0| then it's safe to249* get at array elements via |int*|s, and that if250* actually |size==sizeof(int)| as well then it's251* safe to treat the elements as |int|s. This might252* fail on systems that convert pointers to integers253* in non-standard ways.254* - I assume that |8*sizeof(size_t)<=INT_MAX|. This255* would be false on a machine with 8-bit |char|s,256* 16-bit |int|s and 4096-bit |size_t|s. :-)257*/258259/* The recursion logic is the same in each case.260* We keep chopping up until we reach subarrays of size261* strictly less than Trunc; we leave these unsorted. */262#define Recurse(Trunc) \263{ size_t l=last-ffirst,r=llast-first; \264if (l<Trunc) { \265if (r>=Trunc) doRight \266else pop \267} \268else if (l<=r) { pushRight; doLeft } \269else if (r>=Trunc) { pushLeft; doRight }\270else doLeft \271}272273/* and so is the pivoting logic (note: last is inclusive): */274#define Pivot(swapper,sz) \275if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare,userdata);\276else { \277if (compare(userdata,first,mid)<0) { \278if (compare(userdata,mid,last)>0) { \279swapper(mid,last); \280if (compare(userdata,first,mid)>0) swapper(first,mid);\281} \282} \283else { \284if (compare(userdata,mid,last)>0) swapper(first,last)\285else { \286swapper(first,mid); \287if (compare(userdata,mid,last)>0) swapper(mid,last);\288} \289} \290first+=sz; last-=sz; \291}292293#ifdef DEBUG_QSORT294#include <stdio.h>295#endif296297/* and so is the partitioning logic: */298#define Partition(swapper,sz) { \299do { \300while (compare(userdata,first,pivot)<0) first+=sz; \301while (compare(userdata,pivot,last)<0) last-=sz; \302if (first<last) { \303swapper(first,last); \304first+=sz; last-=sz; } \305else if (first==last) { first+=sz; last-=sz; break; }\306} while (first<=last); \307}308309/* and so is the pre-insertion-sort operation of putting310* the smallest element into place as a sentinel.311* Doing this makes the inner loop nicer. I got this312* idea from the GNU implementation of qsort().313* We find the smallest element from the first |nmemb|,314* or the first |limit|, whichever is smaller;315* therefore we must have ensured that the globally smallest316* element is in the first |limit| (because our317* quicksort recursion bottoms out only once we318* reach subarrays smaller than |limit|).319*/320#define PreInsertion(swapper,limit,sz) \321first=base; \322last=first + ((nmemb>limit ? limit : nmemb)-1)*sz;\323while (last!=base) { \324if (compare(userdata,first,last)>0) first=last; \325last-=sz; } \326if (first!=base) swapper(first,(char*)base);327328/* and so is the insertion sort, in the first two cases: */329#define Insertion(swapper) \330last=((char*)base)+nmemb*size; \331for (first=((char*)base)+size;first!=last;first+=size) { \332char *test; \333/* Find the right place for |first|. \334* My apologies for var reuse. */ \335for (test=first-size;compare(userdata,test,first)>0;test-=size) ; \336test+=size; \337if (test!=first) { \338/* Shift everything in [test,first) \339* up by one, and place |first| \340* where |test| is. */ \341memcpy(pivot,first,size); \342memmove(test+size,test,first-test); \343memcpy(test,pivot,size); \344} \345}346347#define SWAP_nonaligned(a,b) { \348register char *aa=(a),*bb=(b); \349register size_t sz=size; \350do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }351352#define SWAP_aligned(a,b) { \353register int *aa=(int*)(a),*bb=(int*)(b); \354register size_t sz=size; \355do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }356357#define SWAP_words(a,b) { \358register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }359360/* ---------------------------------------------------------------------- */361362static char * pivot_big(char *first, char *mid, char *last, size_t size,363int (SDLCALL *compare)(void *, const void *, const void *), void *userdata) {364size_t d=(((last-first)/size)>>3)*size;365#ifdef DEBUG_QSORT366fprintf(stderr, "pivot_big: first=%p last=%p size=%lu n=%lu\n", first, (unsigned long)last, size, (unsigned long)((last-first+1)/size));367#endif368char *m1,*m2,*m3;369{ char *a=first, *b=first+d, *c=first+2*d;370#ifdef DEBUG_QSORT371fprintf(stderr,"< %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);372#endif373m1 = compare(userdata,a,b)<0 ?374(compare(userdata,b,c)<0 ? b : (compare(userdata,a,c)<0 ? c : a))375: (compare(userdata,a,c)<0 ? a : (compare(userdata,b,c)<0 ? c : b));376}377{ char *a=mid-d, *b=mid, *c=mid+d;378#ifdef DEBUG_QSORT379fprintf(stderr,". %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);380#endif381m2 = compare(userdata,a,b)<0 ?382(compare(userdata,b,c)<0 ? b : (compare(userdata,a,c)<0 ? c : a))383: (compare(userdata,a,c)<0 ? a : (compare(userdata,b,c)<0 ? c : b));384}385{ char *a=last-2*d, *b=last-d, *c=last;386#ifdef DEBUG_QSORT387fprintf(stderr,"> %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);388#endif389m3 = compare(userdata,a,b)<0 ?390(compare(userdata,b,c)<0 ? b : (compare(userdata,a,c)<0 ? c : a))391: (compare(userdata,a,c)<0 ? a : (compare(userdata,b,c)<0 ? c : b));392}393#ifdef DEBUG_QSORT394fprintf(stderr,"-> %d %d %d @ %p %p %p\n",*(int*)m1,*(int*)m2,*(int*)m3, m1,m2,m3);395#endif396return compare(userdata,m1,m2)<0 ?397(compare(userdata,m2,m3)<0 ? m2 : (compare(userdata,m1,m3)<0 ? m3 : m1))398: (compare(userdata,m1,m3)<0 ? m1 : (compare(userdata,m2,m3)<0 ? m3 : m2));399}400401/* ---------------------------------------------------------------------- */402403static void qsort_r_nonaligned(void *base, size_t nmemb, size_t size,404int (SDLCALL *compare)(void *, const void *, const void *), void *userdata) {405406stack_entry stack[STACK_SIZE];407int stacktop=0;408char *first,*last;409char *pivot=malloc(size);410size_t trunc=TRUNC_nonaligned*size;411assert(pivot != NULL);412413first=(char*)base; last=first+(nmemb-1)*size;414415if ((size_t)(last-first)>=trunc) {416char *ffirst=first, *llast=last;417while (1) {418/* Select pivot */419{ char * mid=first+size*((last-first)/size >> 1);420Pivot(SWAP_nonaligned,size);421memcpy(pivot,mid,size);422}423/* Partition. */424Partition(SWAP_nonaligned,size);425/* Prepare to recurse/iterate. */426Recurse(trunc)427}428}429PreInsertion(SWAP_nonaligned,TRUNC_nonaligned,size);430Insertion(SWAP_nonaligned);431free(pivot);432}433434static void qsort_r_aligned(void *base, size_t nmemb, size_t size,435int (SDLCALL *compare)(void *,const void *, const void *), void *userdata) {436437stack_entry stack[STACK_SIZE];438int stacktop=0;439char *first,*last;440char *pivot=malloc(size);441size_t trunc=TRUNC_aligned*size;442assert(pivot != NULL);443444first=(char*)base; last=first+(nmemb-1)*size;445446if ((size_t)(last-first)>=trunc) {447char *ffirst=first,*llast=last;448while (1) {449/* Select pivot */450{ char * mid=first+size*((last-first)/size >> 1);451Pivot(SWAP_aligned,size);452memcpy(pivot,mid,size);453}454/* Partition. */455Partition(SWAP_aligned,size);456/* Prepare to recurse/iterate. */457Recurse(trunc)458}459}460PreInsertion(SWAP_aligned,TRUNC_aligned,size);461Insertion(SWAP_aligned);462free(pivot);463}464465static void qsort_r_words(void *base, size_t nmemb,466int (SDLCALL *compare)(void *,const void *, const void *), void *userdata) {467468stack_entry stack[STACK_SIZE];469int stacktop=0;470char *first,*last;471char *pivot=malloc(WORD_BYTES);472assert(pivot != NULL);473474first=(char*)base; last=first+(nmemb-1)*WORD_BYTES;475476if (last-first>=TRUNC_words) {477char *ffirst=first, *llast=last;478while (1) {479#ifdef DEBUG_QSORT480fprintf(stderr,"Doing %d:%d: ",481(first-(char*)base)/WORD_BYTES,482(last-(char*)base)/WORD_BYTES);483#endif484/* Select pivot */485{ char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES));486Pivot(SWAP_words,WORD_BYTES);487*(int*)pivot=*(int*)mid;488#ifdef DEBUG_QSORT489fprintf(stderr,"pivot = %p = #%lu = %d\n", mid, (unsigned long)(((int*)mid)-((int*)base)), *(int*)mid);490#endif491}492/* Partition. */493Partition(SWAP_words,WORD_BYTES);494#ifdef DEBUG_QSORT495fprintf(stderr, "after partitioning first=#%lu last=#%lu\n", (first-(char*)base)/4lu, (last-(char*)base)/4lu);496#endif497/* Prepare to recurse/iterate. */498Recurse(TRUNC_words)499}500}501PreInsertion(SWAP_words,TRUNC_words/WORD_BYTES,WORD_BYTES);502/* Now do insertion sort. */503last=((char*)base)+nmemb*WORD_BYTES;504for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) {505/* Find the right place for |first|. My apologies for var reuse */506int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first;507*(int*)pivot=*(int*)first;508for (;compare(userdata,pl,pivot)>0;pr=pl,--pl) {509*pr=*pl; }510if (pr!=(int*)first) *pr=*(int*)pivot;511}512free(pivot);513}514515/* ---------------------------------------------------------------------- */516517void SDL_qsort_r(void *base, size_t nmemb, size_t size,518SDL_CompareCallback_r compare, void *userdata) {519520if (nmemb<=1) return;521if (((uintptr_t)base|size)&(WORD_BYTES-1))522qsort_r_nonaligned(base,nmemb,size,compare,userdata);523else if (size!=WORD_BYTES)524qsort_r_aligned(base,nmemb,size,compare,userdata);525else526qsort_r_words(base,nmemb,compare,userdata);527}528529static int SDLCALL qsort_non_r_bridge(void *userdata, const void *a, const void *b)530{531int (SDLCALL *compare)(const void *, const void *) = (int (SDLCALL *)(const void *, const void *)) userdata;532return compare(a, b);533}534535void SDL_qsort(void *base, size_t nmemb, size_t size, SDL_CompareCallback compare)536{537SDL_qsort_r(base, nmemb, size, qsort_non_r_bridge, compare);538}539540// Don't use the C runtime for such a simple function, since we want to allow SDLCALL callbacks and userdata.541// SDL's replacement: Taken from the Public Domain C Library (PDCLib):542// Permission is granted to use, modify, and / or redistribute at will.543void *SDL_bsearch_r(const void *key, const void *base, size_t nmemb, size_t size, SDL_CompareCallback_r compare, void *userdata)544{545const void *pivot;546size_t corr;547int rc;548549while (nmemb) {550/* algorithm needs -1 correction if remaining elements are an even number. */551corr = nmemb % 2;552nmemb /= 2;553pivot = (const char *)base + (nmemb * size);554rc = compare(userdata, key, pivot);555556if (rc > 0) {557base = (const char *)pivot + size;558/* applying correction */559nmemb -= (1 - corr);560} else if (rc == 0) {561return (void *)pivot;562}563}564565return NULL;566}567568void *SDL_bsearch(const void *key, const void *base, size_t nmemb, size_t size, SDL_CompareCallback compare)569{570// qsort_non_r_bridge just happens to match calling conventions, so reuse it.571return SDL_bsearch_r(key, base, nmemb, size, qsort_non_r_bridge, compare);572}573574575576