#ifndef search_sort_util_h__
#define search_sort_util_h__

#include "straighten_core.h"


//##########################################
//# PRINT FUNCTIONS
//##########################################


#define print_2d_array(data, size1, size2)                    \
  _Generic((*data),                                           \
  			uint8_t: print_2d_uint8_t,                        \
            __mpz_struct*: print_2d_mpz_t,                    \
            int32_t: print_2d_int32_t,                        \
            int64_t: print_2d_int64_t,                        \
  		    uint32_t: print_2d_uint32_t)(data, size1, size2)  \

#define print_2d_array_compact(data, size1, size2)            \
  _Generic((*data),                                           \
            uint8_t: print_2d_compact_uint8_t,                \
            uint32_t: print_2d_compact_uint32_t)(data, size1, size2)  \

#define print_array(data, size)                   \
  _Generic((*data),                               \
  		    uint8_t: print_uint8_t,               \
            int32_t: print_int32_t,               \
            uint32_t: print_uint32_t)(data, size) \

void print_uint8_t(const uint8_t* data, size_t size);

void print_uint32_t(const uint32_t* data, size_t size);

void print_int32_t(const int32_t* data, size_t size);

void print_2d_uint8_t(const uint8_t* data, size_t size1, size_t size2);

void print_2d_uint32_t(const uint32_t* data, size_t size1, size_t size2);

void print_2d_int32_t(const int32_t* data, size_t size1, size_t size2);

void print_2d_int64_t(const int64_t* data, size_t size1, size_t size2);

void print_2d_mpz_t(const mpz_t* data, size_t size1, size_t size2);

void print_2d_compact_uint32_t(const uint32_t* data, size_t size1, size_t size2);

void print_2d_compact_uint8_t(const uint8_t* data, size_t size1, size_t size2);

void print_tableau(struct tableau * t, struct shape_data_c * s_data, int coeff, int topbar, int bottombar, int tableau_display_mode);

void print_packed_tableau(struct packed_tableau * t, struct shape_data_c * s_data, int topbar, int bottombar, int tableau_display_mode);

void print_straighten_result_int64_t(int64_t * straighten_result, struct sstd_data_c * sstd_data, struct shape_data_c * s_data, int index_only, int tableau_display_mode);

//void print_straighten_result_bigint(mpz_t * straighten_result, struct sstd_data_c * sstd_data, struct shape_data_c * s_data, int pretty_print);


//##########################################
//# TABLEAU MANIPULATION AND UTILITY FUNCTIONS
//##########################################

void* realloc_zero(void* pBuffer, size_t oldSize, size_t newSize);

inline int8_t get_packed(const uint8_t * d, const uint8_t pos) {
    const uint8_t * a = d + pos / 2;
    return (pos & 1) ? (*a & 0x0F) : ((*a)>>4);
}

inline void set_packed(uint8_t * d, const uint8_t pos, const uint8_t val) {
    d += pos / 2;
    *d = (pos & 1) ? (*d & ~0xF) | (val & 0xF) : ((val & 0xF)<<4) | (*d & 0xF);
}

FORCE_INLINE void set_packed_mult(uint8_t * d, const uint8_t * s, const int len) {
    uint8_t* q;
    if(len & 1) {
      const uint8_t* sentry = d + ((len-1)>>1);

      for ( q = d; q < sentry; ++q ) {
          *q = ((*s<<4)+*(s+1));
          s++;s++;
      }
      *q = ((*s<<4));
    }
    else {
      const uint8_t* sentry = d + (len>>1);

      for ( q = d; q < sentry; ++q ) {
          *q = ((*s<<4)+*(s+1));
          s++;s++;
      }
    }
}

inline void mpz_set_sll(mpz_t n, long long sll) {
    mpz_set_si(n, (int)(sll >> 32));     /* n = (int)sll >> 32 */
    mpz_mul_2exp(n, n, 32 );             /* n <<= 32 */
    mpz_add_ui(n, n, (unsigned int)sll); /* n += (unsigned int)sll */
}

inline unsigned long long mpz_get_ull(mpz_t n) {
    unsigned int lo, hi;
    mpz_t tmp;

    mpz_init( tmp );
    mpz_mod_2exp( tmp, n, 64 );   /* tmp = (lower 64 bits of n) */

    lo = mpz_get_ui( tmp );       /* lo = tmp & 0xffffffff */ 
    mpz_div_2exp( tmp, tmp, 32 ); /* tmp >>= 32 */
    hi = mpz_get_ui( tmp );       /* hi = tmp & 0xffffffff */

    mpz_clear( tmp );

    return (((unsigned long long)hi) << 32) + lo;
}

inline long long mpz_get_sll(mpz_t n) {
    return (long long)mpz_get_ull(n); /* just use unsigned version */
}

inline int factorial(int x) {
    int result = 1;
    for(int i = 2; i <= x; i++) {
        result *= i;
    }
    return result;
}

void straighten_gen_permutations(uint8_t * perm, size_t size, uint8_t * storage);

void set_tableau_bulk(struct tableau * tableau_array, size_t size, struct shape_data_c * s_data);

void copy_tableau_bulk(struct tableau * dest, struct tableau * src, size_t size, struct shape_data_c * s_data, int contiguous);

void set_packed_tableau_bulk(struct packed_tableau * tableau_array, size_t size, struct shape_data_c * s_data);

void copy_packed_tableau_bulk(struct packed_tableau * dest, struct packed_tableau * src, size_t size, struct shape_data_c * s_data, int contiguous);

void realloc_packed_tableau_bulk_contiguous(struct packed_tableau ** tableau_array, size_t old_size, size_t new_size, struct shape_data_c * s_data);

void realloc_tableau_bulk_contiguous(struct tableau ** tableau_array, size_t old_size, size_t new_size, struct shape_data_c * s_data);

int32_t shape_to_string(uint32_t * shape, uint32_t shape_length, char * shape_string);

int32_t string_to_content(uint8_t ** content, uint8_t * content_length, char * shape_string);

int32_t string_to_shape(uint32_t ** shape, uint32_t * shape_length, char * shape_string);

int32_t string_to_tableau(struct tableau ** tableau_arr, uint32_t * arr_max_length, uint32_t * arr_pos, char * shape_string, struct shape_data_c * s_data);

int32_t file_to_tableau(struct tableau ** tableau_arr, uint32_t * arr_max_length, uint32_t * arr_pos, char * filename, struct shape_data_c * s_data);

int32_t get_tableau_content(uint8_t ** content, uint8_t * content_length, struct tableau * tab, struct shape_data_c * s_data);

int32_t tableau_to_string(struct tableau * tableau_arr, char * shape_string, int32_t coeff, struct shape_data_c * s_data, int row_word);

int32_t packed_tableau_to_string(struct packed_tableau * tableau_arr, char * shape_string, struct shape_data_c * s_data);


//##########################################
//# COMPARISON FUNCTIONS
//##########################################


inline int compare_uint8_lex(const uint8_t* x, const uint8_t* y, const size_t len) {
    const uint8_t* p;
    const uint8_t* q;
    const uint8_t* sentry = x + len;
    for ( p = x, q = y; p < sentry; ++p, ++q ) {
        const int t = *p - *q;
        if (t != 0) { return t; }
    }   
    return 0;
}

inline int compare_uint8(const void* x, const void* y)
{
    uint8_t a = *(uint8_t*)x;
    uint8_t b = *(uint8_t*)y;

    if(a==b)
        return 0;

    return a > b ? 1 : -1;
}

inline int compare_uint8_lex_s(void *arg, const void *lhs, const void *rhs) {
    const size_t len = (size_t)arg;
    const uint8_t*left = (uint8_t*)lhs;
    const uint8_t*right = (uint8_t*)rhs;

    const uint8_t* p;
    const uint8_t* q;
    const uint8_t* sentry = left + len;
    for ( p = left, q = right; p < sentry; ++p, ++q ) {
        const int t = *p - *q;
        if (t != 0) { return t; }
    }   
    return 0;
}

inline int compare_uint8_lex_r(const void *lhs, const void *rhs, void *arg) {
    const size_t len = (size_t)arg;
    const uint8_t*left = (uint8_t*)lhs;
    const uint8_t*right = (uint8_t*)rhs;

    const uint8_t* p;
    const uint8_t* q;
    const uint8_t* sentry = left + len;
    for ( p = left, q = right; p < sentry; ++p, ++q ) {
        const int t = *p - *q;
        if (t != 0) { return t; }
    }   
    return 0;
}

inline int sstd_tableau_colword_cmp_s(void *arg, const void *lhs, const void *rhs) {
    struct shape_data_c *s_data = (struct shape_data_c *)arg;
    struct tableau *left = (struct tableau *)lhs;
    struct tableau *right = (struct tableau *)rhs; 
    
    for(int box = 0; box < s_data[0].num_boxes; box++) {
    	if(left[0].entries[box] < right[0].entries[box]) {
    		return -1;
    	}
    	if(left[0].entries[box] > right[0].entries[box]) {
    		return 1;
    	}
    }
    return 0;
}

inline int sstd_tableau_colword_cmp_r(const void *lhs, const void *rhs, void *arg) {
    struct shape_data_c *s_data = (struct shape_data_c *)arg;
    struct tableau *left = (struct tableau *)lhs;
    struct tableau *right = (struct tableau *)rhs; 
    
    for(int box = 0; box < s_data[0].num_boxes; box++) {
        if(left[0].entries[box] < right[0].entries[box]) {
            return -1;
        }
        if(left[0].entries[box] > right[0].entries[box]) {
            return 1;
        }
    }
    return 0;
}

inline int sstd_tableau_rowword_cmp_rev_s(void *arg, const void *lhs, const void *rhs) {
    struct shape_data_c *s_data = (struct shape_data_c *)arg;
    struct tableau *left = (struct tableau *)lhs;
    struct tableau *right = (struct tableau *)rhs; 
    
    for(int box = 0; box < s_data[0].num_boxes; box++) {
    	if(left[0].entries[s_data[0].roworder_to_box[box]] < right[0].entries[s_data[0].roworder_to_box[box]]) {
    		return 1;
    	}
    	if(left[0].entries[s_data[0].roworder_to_box[box]] > right[0].entries[s_data[0].roworder_to_box[box]]) {
    		return -1;
    	}
    }
    return 0;
}

inline int sstd_tableau_rowword_cmp_rev_r(const void *lhs, const void *rhs, void *arg) {
    struct shape_data_c *s_data = (struct shape_data_c *)arg;
    struct tableau *left = (struct tableau *)lhs;
    struct tableau *right = (struct tableau *)rhs; 
    
    for(int box = 0; box < s_data[0].num_boxes; box++) {
    	if(left[0].entries[s_data[0].roworder_to_box[box]] < right[0].entries[s_data[0].roworder_to_box[box]]) {
    		return 1;
    	}
    	if(left[0].entries[s_data[0].roworder_to_box[box]] > right[0].entries[s_data[0].roworder_to_box[box]]) {
    		return -1;
    	}
    }
    return 0;
}

inline int sstd_tableau_rowword_single_cmp_rev_r(const void *arg1, const void *arg2, void *arg) {
    uint8_t *key = (uint8_t *)arg1;
    struct tableau *entry = (struct tableau *)arg2; 
    struct shape_data_c *s_data = (struct shape_data_c *)arg;
    
    for(int box = 0; box < s_data[0].num_boxes; box++) {
    	if(key[box] < entry[0].entries[s_data[0].roworder_to_box[box]]) {
    		return 1;
    	}
    	if(key[box] > entry[0].entries[s_data[0].roworder_to_box[box]]) {
    		return -1;
    	}
    }
    return 0;
}

inline void * _straighten_bsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
                 int (*compar) (const void *, const void *, void *), void *arg) {
        size_t l, u, idx;
        const void *p;
        int comparison;

        l = 0;
        u = nmemb-1;
        while (l <= u) {
                idx = l + (u - l) / 2;
                p = (void *)(((const char *) base) + (idx * size));
                comparison = compar(key, p, arg);
                if (comparison < 0)
                        u = idx - 1;
                else if (comparison > 0)
                        l = idx + 1;
                else
                        return (void *)p;
        }
        return NULL;
}

inline int straighten_bsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
                  int (*compar) (const void *, const void *, void *), void *arg) {
    int8_t * p = (int8_t *)_straighten_bsearch_r(key,base,nmemb,size,compar,arg);
    if (p != NULL) {  
        int64_t index = p - (int8_t *)base;
        index = index / size; 
        return index; 
    } 
    return -1;
}


//##########################################
//# SORT FUNCTIONS
//##########################################

inline int signed_sort2_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y,s) { int tmp = min(d[x], d[y]); s = ((tmp != d[x]) ? s+1: s); d[y] = max(d[x], d[y]); d[x] = tmp; }
    int i=0;
    SWAP(0, 1, i);
    return i;
#undef min
#undef max
#undef SWAP
}

inline int signed_sort3_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y,s) { int tmp = min(d[x], d[y]); s = ((tmp != d[x]) ? s+1: s); d[y] = max(d[x], d[y]); d[x] = tmp; }
    int i=0;
    SWAP(1, 2, i);
    SWAP(0, 2, i);
    SWAP(0, 1, i);
    return i;
#undef min
#undef max
#undef SWAP
}

inline int signed_sort4_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y,s) { int tmp = min(d[x], d[y]); s = ((tmp != d[x]) ? s+1: s); d[y] = max(d[x], d[y]); d[x] = tmp; }
    int i=0;
    SWAP(0, 1, i);
    SWAP(2, 3, i);
    SWAP(0, 2, i);
    SWAP(1, 3, i);
    SWAP(1, 2, i);
    return i;
#undef min
#undef max
#undef SWAP
}

inline int signed_sort5_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y,s) { int tmp = min(d[x], d[y]); s = ((tmp != d[x]) ? s+1: s); d[y] = max(d[x], d[y]); d[x] = tmp; }
    int i=0;
    SWAP(0, 1, i);
    SWAP(3, 4, i);
    SWAP(2, 4, i);
    SWAP(2, 3, i);
    SWAP(0, 3, i);
    SWAP(0, 2, i);
    SWAP(1, 4, i);
    SWAP(1, 3, i);
    SWAP(1, 2, i);
    return i;
#undef min
#undef max
#undef SWAP
}

inline int signed_sort6_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y,s) { int tmp = min(d[x], d[y]); s = ((tmp != d[x]) ? s+1: s); d[y] = max(d[x], d[y]); d[x] = tmp; }
    int i=0;
    SWAP(1, 2, i);
    SWAP(4, 5, i);
    SWAP(0, 2, i);
    SWAP(3, 5, i);
    SWAP(0, 1, i);
    SWAP(3, 4, i);
    SWAP(1, 4, i);
    SWAP(0, 3, i);
    SWAP(2, 5, i);
    SWAP(1, 3, i);
    SWAP(2, 4, i);
    SWAP(2, 3, i);
    return i;
#undef min
#undef max
#undef SWAP
}

inline int signed_sort7_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y,s) { int tmp = min(d[x], d[y]); s = ((tmp != d[x]) ? s+1: s); d[y] = max(d[x], d[y]); d[x] = tmp; }
    int i=0;
    SWAP(1, 2, i);
    SWAP(0, 2, i);
    SWAP(0, 1, i);
    SWAP(3, 4, i);
    SWAP(5, 6, i);
    SWAP(3, 5, i);
    SWAP(4, 6, i);
    SWAP(4, 5, i);
    SWAP(0, 4, i);
    SWAP(0, 3, i);
    SWAP(1, 5, i);
    SWAP(2, 6, i);
    SWAP(2, 5, i);
    SWAP(1, 3, i);
    SWAP(2, 4, i);
    SWAP(2, 3, i);
    return i;
#undef min
#undef max
#undef SWAP
}

inline int signed_sort_fast(uint8_t * d, size_t start, uint8_t len) {
    switch (len)
		{
      case 1:
          return 0;
          break;
			case 2:
			 		return signed_sort2_fast(d+start);
			 		break;
			case 3:
					return signed_sort3_fast(d+start);
					break;
			case 4:
					return signed_sort4_fast(d+start);
					break;
			case 5:
					return signed_sort5_fast(d+start);
					break;
			case 6:
					return signed_sort6_fast(d+start);
					break;
			case 7:
					return signed_sort7_fast(d+start);
					break;		
			default:
					printf("Fatal Error: Can not do signed sort for lengths greater than 7.");
					exit(EXIT_FAILURE);
					break;
		}
}

inline void sort2_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(0, 1);
#undef min
#undef max
#undef SWAP
}

inline void sort3_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(1, 2);
    SWAP(0, 2);
    SWAP(0, 1);
#undef min
#undef max
#undef SWAP
}

inline void sort4_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(1, 2);
#undef min
#undef max
#undef SWAP
}

inline void sort5_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(0, 1);
    SWAP(3, 4);
    SWAP(2, 4);
    SWAP(2, 3);
    SWAP(0, 3);
    SWAP(0, 2);
    SWAP(1, 4);
    SWAP(1, 3);
    SWAP(1, 2);
#undef min
#undef max
#undef SWAP
}

inline void sort6_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(1, 2);
    SWAP(4, 5);
    SWAP(0, 2);
    SWAP(3, 5);
    SWAP(0, 1);
    SWAP(3, 4);
    SWAP(1, 4);
    SWAP(0, 3);
    SWAP(2, 5);
    SWAP(1, 3);
    SWAP(2, 4);
    SWAP(2, 3);
#undef min
#undef max
#undef SWAP
}

inline void sort7_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(1, 2);
    SWAP(0, 2);
    SWAP(0, 1);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(3, 5);
    SWAP(4, 6);
    SWAP(4, 5);
    SWAP(0, 4);
    SWAP(0, 3);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(2, 5);
    SWAP(1, 3);
    SWAP(2, 4);
    SWAP(2, 3);
    
#undef min
#undef max
#undef SWAP
}

inline void sort8_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    
#undef min
#undef max
#undef SWAP
}

inline void sort9_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(0, 8);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(4, 8);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(2, 8);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(1, 8);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
        
#undef min
#undef max
#undef SWAP
}

inline void sort10_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(1, 8);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
            
#undef min
#undef max
#undef SWAP
}

inline void sort11_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
                
#undef min
#undef max
#undef SWAP
}

inline void sort12_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
                    
#undef min
#undef max
#undef SWAP
}


inline void sort13_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = d[x], dy = d[y], tmp; tmp = d[x] = dx < dy ? dx : dy; d[y] ^= dx ^ tmp; }
    SWAP(1, 7);
    SWAP(9, 11);
    SWAP(3, 4);   
    SWAP(5, 8);    
    SWAP(0, 12);
    SWAP(2, 6);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 6);
    SWAP(8, 11);
    SWAP(7, 12);
    SWAP(5, 9);
    SWAP(0, 2);
    SWAP(3, 7);
    SWAP(10, 11);
    SWAP(1, 4);
    SWAP(6, 12);
    SWAP(7, 8);
    SWAP(11, 12);
    SWAP(4, 9);
    SWAP(6, 10);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(1, 7);
    SWAP(2, 6);
    SWAP(9, 11);
    SWAP(1, 3);
    SWAP(4, 7);
    SWAP(8, 10);
    SWAP(0, 5);
    SWAP(2, 5);
    SWAP(6, 8);
    SWAP(9, 10);
    SWAP(1, 2);
    SWAP(3, 5);
    SWAP(7, 8);
    SWAP(4, 6);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(3, 4);
    SWAP(5, 6);
    
#undef min
#undef max
#undef SWAP
}

inline void sort14_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(4, 12);
    SWAP(5, 13);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(8, 12);
    SWAP(9, 13);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(6, 12);
    SWAP(7, 13);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(10, 12);
    SWAP(11, 13);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(12, 13);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(5, 12);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(9, 12);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
    SWAP(11, 12);
                    
#undef min
#undef max
#undef SWAP
}

inline void sort15_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(4, 12);
    SWAP(5, 13);
    SWAP(6, 14);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(8, 12);
    SWAP(9, 13);
    SWAP(10, 14);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(12, 14);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(6, 12);
    SWAP(7, 13);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(10, 12);
    SWAP(11, 13);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(12, 13);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(5, 12);
    SWAP(7, 14);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(9, 12);
    SWAP(11, 14);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
    SWAP(11, 12);
    SWAP(13, 14);
                    
#undef min
#undef max
#undef SWAP
}

inline void sort16_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(4, 12);
    SWAP(5, 13);
    SWAP(6, 14);
    SWAP(7, 15);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(8, 12);
    SWAP(9, 13);
    SWAP(10, 14);
    SWAP(11, 15);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(12, 14);
    SWAP(13, 15);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(6, 12);
    SWAP(7, 13);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(10, 12);
    SWAP(11, 13);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(12, 13);
    SWAP(14, 15);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(5, 12);
    SWAP(7, 14);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(9, 12);
    SWAP(11, 14);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
    SWAP(11, 12);
    SWAP(13, 14);
                        
#undef min
#undef max
#undef SWAP
}

inline void sort17_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(0, 16);
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(4, 12);
    SWAP(5, 13);
    SWAP(6, 14);
    SWAP(7, 15);
    SWAP(8, 16);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(8, 12);
    SWAP(9, 13);
    SWAP(10, 14);
    SWAP(11, 15);
    SWAP(4, 16);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(12, 16);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(12, 14);
    SWAP(13, 15);
    SWAP(2, 16);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(6, 12);
    SWAP(7, 13);
    SWAP(10, 16);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(10, 12);
    SWAP(11, 13);
    SWAP(14, 16);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(12, 13);
    SWAP(14, 15);
    SWAP(1, 16);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(5, 12);
    SWAP(7, 14);
    SWAP(9, 16);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(9, 12);
    SWAP(11, 14);
    SWAP(13, 16);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
    SWAP(11, 12);
    SWAP(13, 14);
    SWAP(15, 16);
    
#undef min
#undef max
#undef SWAP
}

inline void sort18_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(0, 16);
    SWAP(1, 17);
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(4, 12);
    SWAP(5, 13);
    SWAP(6, 14);
    SWAP(7, 15);
    SWAP(8, 16);
    SWAP(9, 17);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(8, 12);
    SWAP(9, 13);
    SWAP(10, 14);
    SWAP(11, 15);
    SWAP(4, 16);
    SWAP(5, 17);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(12, 16);
    SWAP(13, 17);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(12, 14);
    SWAP(13, 15);
    SWAP(2, 16);
    SWAP(3, 17);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(6, 12);
    SWAP(7, 13);
    SWAP(10, 16);
    SWAP(11, 17);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(10, 12);
    SWAP(11, 13);
    SWAP(14, 16);
    SWAP(15, 17);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(12, 13);
    SWAP(14, 15);
    SWAP(16, 17);
    SWAP(1, 16);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(5, 12);
    SWAP(7, 14);
    SWAP(9, 16);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(9, 12);
    SWAP(11, 14);
    SWAP(13, 16);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
    SWAP(11, 12);
    SWAP(13, 14);
    SWAP(15, 16);
        
#undef min
#undef max
#undef SWAP
}

inline void sort19_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = x, dy = y, tmp; tmp = x = dx < dy ? dx : dy; y ^= dx ^ tmp; }
    SWAP(0, 16);
    SWAP(1, 17);
    SWAP(2, 18);
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(4, 12);
    SWAP(5, 13);
    SWAP(6, 14);
    SWAP(7, 15);
    SWAP(8, 16);
    SWAP(9, 17);
    SWAP(10, 18);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(8, 12);
    SWAP(9, 13);
    SWAP(10, 14);
    SWAP(11, 15);
    SWAP(4, 16);
    SWAP(5, 17);
    SWAP(6, 18);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(12, 16);
    SWAP(13, 17);
    SWAP(14, 18);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(12, 14);
    SWAP(13, 15);
    SWAP(16, 18);
    SWAP(2, 16);
    SWAP(3, 17);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(6, 12);
    SWAP(7, 13);
    SWAP(10, 16);
    SWAP(11, 17);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(10, 12);
    SWAP(11, 13);
    SWAP(14, 16);
    SWAP(15, 17);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(12, 13);
    SWAP(14, 15);
    SWAP(16, 17);
    SWAP(1, 16);
    SWAP(3, 18);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(5, 12);
    SWAP(7, 14);
    SWAP(9, 16);
    SWAP(11, 18);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(9, 12);
    SWAP(11, 14);
    SWAP(13, 16);
    SWAP(15, 18);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
    SWAP(11, 12);
    SWAP(13, 14);
    SWAP(15, 16);
    SWAP(17, 18);
        
#undef min
#undef max
#undef SWAP
}

inline void sort20_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = d[x], dy = d[y], tmp; tmp = d[x] = ((dx < dy) ? dx : dy); d[y] ^= dx ^ tmp; }
    SWAP(0, 16);
    SWAP(1, 17);
    SWAP(2, 18);
    SWAP(3, 19);
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(4, 12);
    SWAP(5, 13);
    SWAP(6, 14);
    SWAP(7, 15);
    SWAP(8, 16);
    SWAP(9, 17);
    SWAP(10, 18);
    SWAP(11, 19);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(8, 12);
    SWAP(9, 13);
    SWAP(10, 14);
    SWAP(11, 15);
    SWAP(4, 16);
    SWAP(5, 17);
    SWAP(6, 18);
    SWAP(7, 19);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(12, 16);
    SWAP(13, 17);
    SWAP(14, 18);
    SWAP(15, 19);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(12, 14);
    SWAP(13, 15);
    SWAP(16, 18);
    SWAP(17, 19);
    SWAP(2, 16);
    SWAP(3, 17);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(6, 12);
    SWAP(7, 13);
    SWAP(10, 16);
    SWAP(11, 17);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(10, 12);
    SWAP(11, 13);
    SWAP(14, 16);
    SWAP(15, 17);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(12, 13);
    SWAP(14, 15);
    SWAP(16, 17);
    SWAP(18, 19);
    SWAP(1, 16);
    SWAP(3, 18);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(5, 12);
    SWAP(7, 14);
    SWAP(9, 16);
    SWAP(11, 18);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(9, 12);
    SWAP(11, 14);
    SWAP(13, 16);
    SWAP(15, 18);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
    SWAP(11, 12);
    SWAP(13, 14);
    SWAP(15, 16);
    SWAP(17, 18);
    
#undef min
#undef max
#undef SWAP
}

inline void sort21_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = d[x], dy = d[y], tmp; tmp = d[x] = ((dx < dy) ? dx : dy); d[y] ^= dx ^ tmp; }
    SWAP(0, 16);
    SWAP(1, 17);
    SWAP(2, 18);
    SWAP(3, 19);
    SWAP(4, 20);
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(4, 12);
    SWAP(5, 13);
    SWAP(6, 14);
    SWAP(7, 15);
    SWAP(8, 16);
    SWAP(9, 17);
    SWAP(10, 18);
    SWAP(11, 19);
    SWAP(12, 20);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(8, 12);
    SWAP(9, 13);
    SWAP(10, 14);
    SWAP(11, 15);
    SWAP(16, 20);
    SWAP(4, 16);
    SWAP(5, 17);
    SWAP(6, 18);
    SWAP(7, 19);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(12, 16);
    SWAP(13, 17);
    SWAP(14, 18);
    SWAP(15, 19);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(12, 14);
    SWAP(13, 15);
    SWAP(16, 18);
    SWAP(17, 19);
    SWAP(2, 16);
    SWAP(3, 17);
    SWAP(6, 20);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(6, 12);
    SWAP(7, 13);
    SWAP(10, 16);
    SWAP(11, 17);
    SWAP(14, 20);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(10, 12);
    SWAP(11, 13);
    SWAP(14, 16);
    SWAP(15, 17);
    SWAP(18, 20);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(12, 13);
    SWAP(14, 15);
    SWAP(16, 17);
    SWAP(18, 19);
    SWAP(1, 16);
    SWAP(3, 18);
    SWAP(5, 20);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(5, 12);
    SWAP(7, 14);
    SWAP(9, 16);
    SWAP(11, 18);
    SWAP(13, 20);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(9, 12);
    SWAP(11, 14);
    SWAP(13, 16);
    SWAP(15, 18);
    SWAP(17, 20);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
    SWAP(11, 12);
    SWAP(13, 14);
    SWAP(15, 16);
    SWAP(17, 18);
    SWAP(19, 20);
    
#undef min
#undef max
#undef SWAP
}

inline void sort22_fast(uint8_t * d) {
#define min(a,b) ((a<b) ? a : b )
#define max(a,b) ((a<b) ? b : a )
#define SWAP(x,y) { int tmp = min(d[x], d[y]); d[y] = max(d[x], d[y]); d[x] = tmp; }
//#define SWAP(x,y) { int dx = d[x], dy = d[y], tmp; tmp = d[x] = ((dx < dy) ? dx : dy); d[y] ^= dx ^ tmp; }
    SWAP(0, 16);
    SWAP(1, 17);
    SWAP(2, 18);
    SWAP(3, 19);
    SWAP(4, 20);
    SWAP(5, 21);
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(4, 12);
    SWAP(5, 13);
    SWAP(6, 14);
    SWAP(7, 15);
    SWAP(8, 16);
    SWAP(9, 17);
    SWAP(10, 18);
    SWAP(11, 19);
    SWAP(12, 20);
    SWAP(13, 21);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(8, 12);
    SWAP(9, 13);
    SWAP(10, 14);
    SWAP(11, 15);
    SWAP(16, 20);
    SWAP(17, 21);
    SWAP(4, 16);
    SWAP(5, 17);
    SWAP(6, 18);
    SWAP(7, 19);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(12, 16);
    SWAP(13, 17);
    SWAP(14, 18);
    SWAP(15, 19);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(12, 14);
    SWAP(13, 15);
    SWAP(16, 18);
    SWAP(17, 19);
    SWAP(2, 16);
    SWAP(3, 17);
    SWAP(6, 20);
    SWAP(7, 21);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(6, 12);
    SWAP(7, 13);
    SWAP(10, 16);
    SWAP(11, 17);
    SWAP(14, 20);
    SWAP(15, 21);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(10, 12);
    SWAP(11, 13);
    SWAP(14, 16);
    SWAP(15, 17);
    SWAP(18, 20);
    SWAP(19, 21);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(12, 13);
    SWAP(14, 15);
    SWAP(16, 17);
    SWAP(18, 19);
    SWAP(20, 21);
    SWAP(1, 16);
    SWAP(3, 18);
    SWAP(5, 20);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(5, 12);
    SWAP(7, 14);
    SWAP(9, 16);
    SWAP(11, 18);
    SWAP(13, 20);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(9, 12);
    SWAP(11, 14);
    SWAP(13, 16);
    SWAP(15, 18);
    SWAP(17, 20);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
    SWAP(11, 12);
    SWAP(13, 14);
    SWAP(15, 16);
    SWAP(17, 18);
    SWAP(19, 20);

#undef min
#undef max
#undef SWAP
}

void sort23_fast(uint8_t * d);

void sort24_fast(uint8_t * d);

void sort25_fast(uint8_t * d);

void sort26_fast(uint8_t * d);

void sort27_fast(uint8_t * d);

void sort28_fast(uint8_t * d);

void sort29_fast(uint8_t * d);

void sort30_fast(uint8_t * d);

inline void sort_fast(uint8_t * d, int start, int len) {
    switch (len)
		{
        case 1:
                break;
		case 2:
		 		sort2_fast(d+start);
		 		break;
		case 3:
				sort3_fast(d+start);
				break;
		case 4:
				sort4_fast(d+start);
				break;
		case 5:
                sort5_fast(d+start);
				break;
		case 6:
				sort6_fast(d+start);
				break;
		case 7:
				sort7_fast(d+start);
				break;
        case 8:
                sort8_fast(d+start);
                break;
        case 9:
                sort9_fast(d+start);
                break;
        case 10:
                sort10_fast(d+start);
                break;
        case 11:
                sort11_fast(d+start);
                break;
        case 12:
                sort12_fast(d+start);
                break; 
        case 13:
                sort13_fast(d+start);
                break;
        case 14:
                sort14_fast(d+start);
                break;
        case 15:
                sort15_fast(d+start);
                break;
        case 16:
                sort16_fast(d+start);
                break;
        case 17:
                sort17_fast(d+start);
                break;
        case 18:
                sort18_fast(d+start);
                break;
        case 19:
                sort19_fast(d+start);
                break;
        case 20:
                sort20_fast(d+start);
                break;
        case 21:
                sort21_fast(d+start);
                break;
        case 22:
                sort22_fast(d+start);
                break;
        case 23:
                sort23_fast(d+start);
                break;
        case 24:
                sort24_fast(d+start);
                break;
        case 25:
                sort25_fast(d+start);
                break;
        case 26:
                sort26_fast(d+start);
                break;
        case 27:
                sort27_fast(d+start);
                break;
        case 28:
                sort28_fast(d+start);
                break;
        case 29:
                sort29_fast(d+start);
                break;
        case 30:
                sort30_fast(d+start);
                break;
		default:
                qsort(d+start, len, sizeof(uint8_t),compare_uint8);
				break;  
		}      
}

inline int comp_fast1(const uint8_t * lhs, const uint8_t * rhs) {
  return (*lhs > *rhs) - (*lhs < *rhs);
}

inline int comp_fast2(const uint8_t * lhs, const uint8_t * rhs) {
  const uint16_t*nl1 = (uint16_t*)lhs;
  const uint16_t*nr1 = (uint16_t*)rhs;
  const uint16_t v1 = __builtin_bswap16(*nl1);
  const uint16_t v2 = __builtin_bswap16(*nr1);
  return (v1 > v2) - (v1 < v2);
}

inline int comp_fast3(const uint8_t * lhs, const uint8_t * rhs) {
  const uint32_t*nl1 = (uint32_t*)lhs;
  const uint32_t*nr1 = (uint32_t*)rhs;
  const uint32_t v1 = __builtin_bswap32(*nl1)>>4;
  const uint32_t v2 = __builtin_bswap32(*nr1)>>4;
  return (v1 > v2) - (v1 < v2);
}

inline int comp_fast4(const uint8_t * lhs, const uint8_t * rhs) {
  const uint32_t*nl1 = (uint32_t*)lhs;
  const uint32_t*nr1 = (uint32_t*)rhs;
  const uint32_t v1 = __builtin_bswap32(*nl1);
  const uint32_t v2 = __builtin_bswap32(*nr1);
  return (v1 > v2) - (v1 < v2);
}

inline int comp_fast5(const uint8_t * lhs, const uint8_t * rhs) {
  const uint32_t*nl1 = (uint32_t*)lhs;
  const uint32_t*nr1 = (uint32_t*)rhs;

  const uint32_t v1 = __builtin_bswap32(*nl1);
  const uint32_t v2 = __builtin_bswap32(*nr1);
  if (v1 > v2) return 1;
  if (v1 < v2) return -1;

  nl1++;nr1++;
  
  const uint8_t* nl2 = (uint8_t*)nl1;
  const uint8_t* nr2 = (uint8_t*)nr1;

  return (*nl2 > *nr2) - (*nl2 < *nr2);
}

inline int comp_fast6(const uint8_t * lhs, const uint8_t * rhs) {
  const uint32_t*nl1 = (uint32_t*)lhs;
  const uint32_t*nr1 = (uint32_t*)rhs;

  const uint32_t v1 = __builtin_bswap32(*nl1);
  const uint32_t v2 = __builtin_bswap32(*nr1);
  if (v1 > v2) return 1;
  if (v1 < v2) return -1;

  nl1++;nr1++;
  
  const uint16_t* nl2 = (uint16_t*)nl1;
  const uint16_t* nr2 = (uint16_t*)nr1;
  const uint16_t v3 = __builtin_bswap16(*nl2);
  const uint16_t v4 = __builtin_bswap16(*nr2);
  
  return (v3 > v4) - (v3 < v4);
}

inline int comp_fast(const uint8_t * lhs, const uint8_t * rhs, const int len) {
    switch (len)
    {
      case 1:
          return comp_fast1(lhs, rhs);
          break;
      case 2:
          return comp_fast2(lhs, rhs);
          break;
      case 3:
          return comp_fast3(lhs, rhs);
          break;
      case 4:
          return comp_fast4(lhs, rhs);
          break;
      case 5:
          return comp_fast5(lhs, rhs);
          break;
      case 6:
          return comp_fast6(lhs, rhs);
          break;
      default:
          printf("Fatal Error: Fast array comparison function only takes arrays of length at most 6.");
          exit(EXIT_FAILURE);
          break;
    }
}

inline void swap_blocks_with_loop( uint8_t* a, uint8_t* b, const size_t n)
{
  uint8_t* p;
  uint8_t* q;
  const uint8_t* sentry = a + n;

  for ( p = a, q = b; p < sentry; ++p, ++q ) {
     const uint8_t t = *p;
     *p = *q;
     *q = t;
  }
}

inline void sort2_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(0, 1);
#undef SWAP
}

inline void sort3_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(1, 2);
    SWAP(0, 2);
    SWAP(0, 1);
#undef SWAP
}

inline void sort4_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(1, 2);
#undef SWAP
}

inline void sort5_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(0, 1);
    SWAP(3, 4);
    SWAP(2, 4);
    SWAP(2, 3);
    SWAP(0, 3);
    SWAP(0, 2);
    SWAP(1, 4);
    SWAP(1, 3);
    SWAP(1, 2);
#undef SWAP
}

inline void sort6_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(1, 2);
    SWAP(4, 5);
    SWAP(0, 2);
    SWAP(3, 5);
    SWAP(0, 1);
    SWAP(3, 4);
    SWAP(1, 4);
    SWAP(0, 3);
    SWAP(2, 5);
    SWAP(1, 3);
    SWAP(2, 4);
    SWAP(2, 3);
#undef SWAP
}

inline void sort7_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(1, 2);
    SWAP(0, 2);
    SWAP(0, 1);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(3, 5);
    SWAP(4, 6);
    SWAP(4, 5);
    SWAP(0, 4);
    SWAP(0, 3);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(2, 5);
    SWAP(1, 3);
    SWAP(2, 4);
    SWAP(2, 3);
    
#undef SWAP
}

inline void sort8_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    
#undef SWAP
}

inline void sort9_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(0, 8);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(4, 8);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(2, 8);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(1, 8);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
        
#undef SWAP
}

inline void sort10_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(1, 8);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
            
#undef SWAP
}

inline void sort11_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
                
#undef SWAP
}

inline void sort12_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
                    
#undef SWAP
}


inline void sort13_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(1, 7);
    SWAP(9, 11);
    SWAP(3, 4);   
    SWAP(5, 8);    
    SWAP(0, 12);
    SWAP(2, 6);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 6);
    SWAP(8, 11);
    SWAP(7, 12);
    SWAP(5, 9);
    SWAP(0, 2);
    SWAP(3, 7);
    SWAP(10, 11);
    SWAP(1, 4);
    SWAP(6, 12);
    SWAP(7, 8);
    SWAP(11, 12);
    SWAP(4, 9);
    SWAP(6, 10);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(1, 7);
    SWAP(2, 6);
    SWAP(9, 11);
    SWAP(1, 3);
    SWAP(4, 7);
    SWAP(8, 10);
    SWAP(0, 5);
    SWAP(2, 5);
    SWAP(6, 8);
    SWAP(9, 10);
    SWAP(1, 2);
    SWAP(3, 5);
    SWAP(7, 8);
    SWAP(4, 6);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(3, 4);
    SWAP(5, 6);
    
#undef SWAP
}

inline void sort14_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(4, 12);
    SWAP(5, 13);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(8, 12);
    SWAP(9, 13);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(6, 12);
    SWAP(7, 13);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(10, 12);
    SWAP(11, 13);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(12, 13);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(5, 12);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(9, 12);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
    SWAP(11, 12);
                    
#undef SWAP
}

inline void sort15_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(4, 12);
    SWAP(5, 13);
    SWAP(6, 14);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(8, 12);
    SWAP(9, 13);
    SWAP(10, 14);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(12, 14);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(6, 12);
    SWAP(7, 13);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(10, 12);
    SWAP(11, 13);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(12, 13);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(5, 12);
    SWAP(7, 14);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(9, 12);
    SWAP(11, 14);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
    SWAP(11, 12);
    SWAP(13, 14);
                    
#undef SWAP
}

inline void sort16_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(4, 12);
    SWAP(5, 13);
    SWAP(6, 14);
    SWAP(7, 15);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(8, 12);
    SWAP(9, 13);
    SWAP(10, 14);
    SWAP(11, 15);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(12, 14);
    SWAP(13, 15);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(6, 12);
    SWAP(7, 13);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(10, 12);
    SWAP(11, 13);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(12, 13);
    SWAP(14, 15);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(5, 12);
    SWAP(7, 14);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(9, 12);
    SWAP(11, 14);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
    SWAP(11, 12);
    SWAP(13, 14);
                        
#undef SWAP
}

inline void sort17_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(0, 16);
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(4, 12);
    SWAP(5, 13);
    SWAP(6, 14);
    SWAP(7, 15);
    SWAP(8, 16);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(8, 12);
    SWAP(9, 13);
    SWAP(10, 14);
    SWAP(11, 15);
    SWAP(4, 16);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(12, 16);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(12, 14);
    SWAP(13, 15);
    SWAP(2, 16);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(6, 12);
    SWAP(7, 13);
    SWAP(10, 16);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(10, 12);
    SWAP(11, 13);
    SWAP(14, 16);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(12, 13);
    SWAP(14, 15);
    SWAP(1, 16);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(5, 12);
    SWAP(7, 14);
    SWAP(9, 16);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(9, 12);
    SWAP(11, 14);
    SWAP(13, 16);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
    SWAP(11, 12);
    SWAP(13, 14);
    SWAP(15, 16);
    
#undef SWAP
}

inline void sort18_fast_array(uint8_t * d, size_t size_of_element) {
//#define SWAP(x,y) if (simple_str_compare_uint8_lex(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
#define SWAP(x,y) if (comp_fast(d + (x*size_of_element), d + (y*size_of_element), size_of_element) > 0) { swap_blocks_with_loop(d + (x*size_of_element), d + (y*size_of_element), size_of_element); }
    SWAP(0, 16);
    SWAP(1, 17);
    SWAP(0, 8);
    SWAP(1, 9);
    SWAP(2, 10);
    SWAP(3, 11);
    SWAP(4, 12);
    SWAP(5, 13);
    SWAP(6, 14);
    SWAP(7, 15);
    SWAP(8, 16);
    SWAP(9, 17);
    SWAP(0, 4);
    SWAP(1, 5);
    SWAP(2, 6);
    SWAP(3, 7);
    SWAP(8, 12);
    SWAP(9, 13);
    SWAP(10, 14);
    SWAP(11, 15);
    SWAP(4, 16);
    SWAP(5, 17);
    SWAP(4, 8);
    SWAP(5, 9);
    SWAP(6, 10);
    SWAP(7, 11);
    SWAP(12, 16);
    SWAP(13, 17);
    SWAP(0, 2);
    SWAP(1, 3);
    SWAP(4, 6);
    SWAP(5, 7);
    SWAP(8, 10);
    SWAP(9, 11);
    SWAP(12, 14);
    SWAP(13, 15);
    SWAP(2, 16);
    SWAP(3, 17);
    SWAP(2, 8);
    SWAP(3, 9);
    SWAP(6, 12);
    SWAP(7, 13);
    SWAP(10, 16);
    SWAP(11, 17);
    SWAP(2, 4);
    SWAP(3, 5);
    SWAP(6, 8);
    SWAP(7, 9);
    SWAP(10, 12);
    SWAP(11, 13);
    SWAP(14, 16);
    SWAP(15, 17);
    SWAP(0, 1);
    SWAP(2, 3);
    SWAP(4, 5);
    SWAP(6, 7);
    SWAP(8, 9);
    SWAP(10, 11);
    SWAP(12, 13);
    SWAP(14, 15);
    SWAP(16, 17);
    SWAP(1, 16);
    SWAP(1, 8);
    SWAP(3, 10);
    SWAP(5, 12);
    SWAP(7, 14);
    SWAP(9, 16);
    SWAP(1, 4);
    SWAP(3, 6);
    SWAP(5, 8);
    SWAP(7, 10);
    SWAP(9, 12);
    SWAP(11, 14);
    SWAP(13, 16);
    SWAP(1, 2);
    SWAP(3, 4);
    SWAP(5, 6);
    SWAP(7, 8);
    SWAP(9, 10);
    SWAP(11, 12);
    SWAP(13, 14);
    SWAP(15, 16);
        
#undef SWAP
}

inline void sort_fast_array(uint8_t * d, size_t size_of_element, uint8_t len) {
    switch (len)
        {
            case 1:
                    break;
            case 2:
                    sort2_fast_array(d, size_of_element);
                    break;
            case 3:
                    sort3_fast_array(d, size_of_element);
                    break;
            case 4:
                    sort4_fast_array(d, size_of_element);
                    break;
            case 5:
                    sort5_fast_array(d, size_of_element);
                    break;
            case 6:
                    sort6_fast_array(d, size_of_element);
                    break;
            case 7:
                    sort7_fast_array(d, size_of_element);
                    break;
            case 8:
                    sort8_fast_array(d, size_of_element);
                    break;
            case 9:
                    sort9_fast_array(d, size_of_element);
                    break;
            case 10:
                    sort10_fast_array(d, size_of_element);
                    break;
            case 11:
                    sort11_fast_array(d, size_of_element);
                    break;
            case 12:
                    sort12_fast_array(d, size_of_element);
                    break; 
            case 13:
                    sort13_fast_array(d, size_of_element);
                    break;
            case 14:
                    sort14_fast_array(d, size_of_element);
                    break;
            case 15:
                    sort15_fast_array(d, size_of_element);
                    break;
            case 16:
                    sort16_fast_array(d, size_of_element);
                    break;
            case 17:
                    sort17_fast_array(d, size_of_element);
                    break;
            case 18:
                    sort18_fast_array(d, size_of_element);
                    break;
            default:
                    #ifdef _STRAIGHTEN_USE_QSORT_S
                    qsort_s(d, len, size_of_element * sizeof(uint8_t), compare_uint8_lex_s, &size_of_element);
                    #else
                    qsort_r(d, len, size_of_element * sizeof(uint8_t), compare_uint8_lex_r, &size_of_element);
                    #endif
                    //printf("Error this function does not sort arrays of size greater than 18.");
                    //exit(EXIT_FAILURE);
                    break;
        }
}

#endif  // search_sort_util_h__