#include "plethysm.h"

extern int sstd_index_order_column_factorial_r(const void *arg1, const void *arg2, void *arg);

extern int sstd_index_order_column_factorial_s(void *arg, const void *arg1, const void *arg2);

int array_nonzero(int64_t * arr, size_t size) {
	for(size_t entry = 0; entry < size; entry++) {
		if(arr[entry] != 0) {
			return 1;
		}
	}
	return 0;
}

void isotypic_to_string(char * str, uint32_t * shape, uint32_t shape_length, uint32_t dimension, uint32_t optimal_sstd_lower_bound, uint32_t * isotypic_basis_index) {
	int index = 0;
	index += sprintf(&str[index], "([");
	for(int entry = 0; entry < shape_length; entry++) {
		index += sprintf(&str[index], "%u,", shape[entry]);
	}
	index--; //removes the trailing comma
	index += sprintf(&str[index], "],");
	index += sprintf(&str[index], "%u,", dimension);
	index += sprintf(&str[index], "%u,[", optimal_sstd_lower_bound);
	for(int entry = 0; entry < dimension; entry++) {
		if(entry != (dimension-1)) {
			index += sprintf(&str[index], "%u,", isotypic_basis_index[entry]);
		}
		else {
			index += sprintf(&str[index], "%u", isotypic_basis_index[entry]);
		}
	}
	index += sprintf(&str[index], "])\n");
}

void store_isotypic(uint32_t outer, uint32_t inner, struct shape_data_c * s_data, uint32_t dimension, uint32_t optimal_sstd_lower_bound, uint32_t * isotypic_basis_index) {
	char filename[128];
	char shape_str[128];
	char buffer[4096];
	int index = 0;
	int s_index = 0;

	index += sprintf(filename, "HWV_Isotypic");
	struct stat st = {0};
	if (stat(filename, &st) == -1) {
		#ifdef _STRAIGHTEN_USING_LINUX
	    mkdir(filename, 0700);
	    #else
	    mkdir(filename);
	    #endif
	}

	index += sprintf(&filename[index], "/PlethysmCOMB%d/", outer);
	if (stat(filename, &st) == -1) {
	    #ifdef _STRAIGHTEN_USING_LINUX
	    mkdir(filename, 0700);
	    #else
	    mkdir(filename);
	    #endif
	}

	index += sprintf(&filename[index], "/Inner%d/", inner);
	if (stat(filename, &st) == -1) {
	    #ifdef _STRAIGHTEN_USING_LINUX
	    mkdir(filename, 0700);
	    #else
	    mkdir(filename);
	    #endif
	}

	s_index += sprintf(&shape_str[s_index], "[");
	for(int entry = 0; entry < s_data[0].conjugate[0]; entry++) {
		s_index += sprintf(&shape_str[s_index], "%u,", s_data[0].shape[entry]);
	}
	s_index--; //removes the trailing comma
	s_index += sprintf(&shape_str[s_index], "]");


	index += sprintf(&filename[index], "%s.txt", shape_str);
	
	FILE *out_file = fopen(filename, "w");
	if(out_file == NULL) {
    	printf("Fatal Error: Could not open file %s.\n", filename);
		exit(EXIT_FAILURE);
    }
	
	isotypic_to_string(&buffer[0], s_data[0].shape, s_data[0].shape_length, dimension, optimal_sstd_lower_bound, isotypic_basis_index);

	fprintf(out_file, "%s", buffer);
	fclose(out_file);
}

void isotypic_to_string_full(char * str, uint32_t * shape, uint32_t shape_length, uint32_t dimension, struct tableau * isotypic_basis, struct shape_data_c * s_data) {
	int index = 0;
	index += sprintf(&str[index], "([");
	for(int entry = 0; entry < shape_length; entry++) {
		index += sprintf(&str[index], "%u,", shape[entry]);
	}
	index--; //removes the trailing comma
	index += sprintf(&str[index], "],");
	index += sprintf(&str[index], "%u,[", dimension);

	for(int tab = 0; tab < dimension; tab++) {
		index += tableau_to_string(isotypic_basis + tab, &str[index], 0, s_data, 1);
		if(tab != (dimension-1)) {
			index += sprintf(&str[index], ",");
		}

	}
	index += sprintf(&str[index], "])\n");
}

void store_isotypic_full(uint32_t outer, uint32_t inner, struct shape_data_c * s_data, uint32_t dimension, struct tableau * isotypic_basis) {
	char filename[128];
	char shape_str[128];
	//char buffer[24096];
	char * buffer = (char*) calloc(1280000, sizeof(char));
	int index = 0;
	int s_index = 0;

	index += sprintf(filename, "HWV_Isotypic");
	struct stat st = {0};
	if (stat(filename, &st) == -1) {
		#ifdef _STRAIGHTEN_USING_LINUX
	    mkdir(filename, 0700);
	    #else
	    mkdir(filename);
	    #endif
	}

	index += sprintf(&filename[index], "/PlethysmDB%d/", outer);
	if (stat(filename, &st) == -1) {
	    #ifdef _STRAIGHTEN_USING_LINUX
	    mkdir(filename, 0700);
	    #else
	    mkdir(filename);
	    #endif
	}

	index += sprintf(&filename[index], "/Inner%d/", inner);
	if (stat(filename, &st) == -1) {
	    #ifdef _STRAIGHTEN_USING_LINUX
	    mkdir(filename, 0700);
	    #else
	    mkdir(filename);
	    #endif
	}

	s_index += sprintf(&shape_str[s_index], "[");
	for(int entry = 0; entry < s_data[0].conjugate[0]; entry++) {
		s_index += sprintf(&shape_str[s_index], "%u,", s_data[0].shape[entry]);
	}
	s_index--; //removes the trailing comma
	s_index += sprintf(&shape_str[s_index], "]");


	index += sprintf(&filename[index], "%s.txt", shape_str);
	
	FILE *out_file = fopen(filename, "w");
	if(out_file == NULL) {
		free(buffer);
    	printf("Fatal Error: Could not open file %s.\n", filename);
		exit(EXIT_FAILURE);
    }
	
	isotypic_to_string_full(&buffer[0], s_data[0].shape, s_data[0].shape_length, dimension, isotypic_basis, s_data);

	fprintf(out_file, "%s", buffer);
	fclose(out_file);
	free(buffer);
}

void string_to_isotypic(char * str, uint32_t ** shape, uint32_t * shape_length, uint32_t * dimension, uint32_t * optimal_sstd_lower_bound, uint32_t ** isotypic_basis_index) {
	char * outer_rest = NULL;
	char * inner_rest = NULL;
	char * outer_token;
	char * inner_token;
	uint32_t entry = 0;

	outer_token = strtok_r(str, "([])\n", &outer_rest);

	shape[0] = (uint32_t*) calloc(128, sizeof(uint32_t));
	inner_token = strtok_r(outer_token, "([,])\n", &inner_rest);
	while(inner_token != NULL) {
		shape[0][entry] = atoi(inner_token);
		entry++;
		inner_token = strtok_r(NULL, "([,])\n", &inner_rest);
	}
	shape_length[0] = entry;
	shape[0] = realloc(shape[0], shape_length[0] * sizeof(uint32_t));

	outer_token = strtok_r(NULL, "([])\n", &outer_rest);
	inner_token = strtok_r(outer_token, "([,])\n", &inner_rest);
	dimension[0] = atoi(inner_token);
	inner_token = strtok_r(NULL, "([,])\n", &inner_rest);
	optimal_sstd_lower_bound[0] = atoi(inner_token);

	outer_token = strtok_r(NULL, "([])\n", &outer_rest);
	isotypic_basis_index[0] = (uint32_t*) calloc(dimension[0], sizeof(uint32_t));
	entry = 0;
	inner_token = strtok_r(outer_token, "([,])\n", &inner_rest);
	while(inner_token != NULL) {
		isotypic_basis_index[0][entry] = atoi(inner_token);
		entry++;
		inner_token = strtok_r(NULL, "([,])\n", &inner_rest);
	}
}

int32_t load_isotypic(uint32_t outer, uint32_t inner, struct shape_data_c * s_data, uint32_t * dimension, uint32_t * optimal_sstd_lower_bound, uint32_t ** isotypic_basis_index) {
	char filename[128];
	int index = 0;
	index += sprintf(filename, "HWV_Isotypic/PlethysmCOMB%d/Inner%d/[", outer, inner);
	for(int entry = 0; entry < s_data[0].conjugate[0]; entry++) {
		index += sprintf(&filename[index], "%u,", s_data[0].shape[entry]);
	}
	index--; //removes the trailing comma
	index += sprintf(&filename[index], "].txt");
	filename[index] = '\0';

	FILE *in_file = fopen(filename, "r");
	if(in_file == NULL) {
		straighten_log(STRAIGHTEN_VVINFO, "Could not open file %s.", filename);
		return -1;
    }
	
	char line[2048];
	uint32_t * shape;
	uint32_t shape_length;

	fgets(line, sizeof(line), in_file);
	string_to_isotypic(&line[0], &shape, &shape_length, dimension, optimal_sstd_lower_bound, isotypic_basis_index);
	free(shape);

	fclose(in_file);
	return 1;
}

uint32_t load_foulkes(uint32_t outer, uint32_t inner, uint32_t ** partitions, uint32_t ** plethysms) {
	char filename[128];
	char target[128];
	sprintf(filename, "HWV_Isotypic/FoulkesAll/FoulkesAll%d.txt", inner);
	sprintf(target, "m = %d, n = %d\n", inner, outer);

	FILE *in_file = fopen(filename, "r");

	if(in_file == NULL) {
    	printf("Fatal Error: No database for outer plethysm value %u.\n", outer);
		exit(EXIT_FAILURE);
    }

	char line[2048];
	uint32_t max_partitions = 1000;
	uint32_t num_partitions = 0;
	partitions[0] = (uint32_t*) calloc(max_partitions * outer, sizeof(uint32_t));
	plethysms[0] = (uint32_t*) calloc(max_partitions, sizeof(uint32_t));
	char * token;
	char * rest = NULL;
	uint32_t entry = 0;
	uint32_t value = 0;
	
	int32_t success = 0;
	uint32_t read_in = 0;
    while (fgets(line, sizeof(line), in_file)) {
    	if(read_in) {
    		if(strcmp(line,"\n")==0) {
    			max_partitions = num_partitions;
    			// these are shrinking the arrays so no need to zero memory
			    partitions[0] = realloc(partitions[0], max_partitions * outer * sizeof(uint32_t));
				plethysms[0] = realloc(plethysms[0], max_partitions * sizeof(uint32_t));
    			read_in = 0;
    		}
    		else {
    			entry = 0;
    			token = strtok_r(line,"([,])\n", &rest);
				while (token != NULL) {
					value = atoi(token);
					//s_printf("%s :  %u :: ", token, value);
					token = strtok_r(NULL, "([,])\n", &rest);
					if(token != NULL) {
						partitions[0][num_partitions * outer + entry] = value;
						entry++;
					}
				}
				plethysms[0][num_partitions] = value;
				//s_printf("\n");
				num_partitions++;
				if(num_partitions == max_partitions) {
					
					//partitions[0] = realloc(partitions[0], max_partitions * outer * sizeof(uint32_t));
					//plethysms[0] = realloc(plethysms[0], max_partitions * sizeof(uint32_t));
					//realloc_zero
					partitions[0] = realloc_zero(partitions[0], max_partitions * outer * sizeof(uint32_t),(max_partitions + 1000) * outer * sizeof(uint32_t));
					plethysms[0] = realloc_zero(plethysms[0], max_partitions * sizeof(uint32_t), (max_partitions + 1000) * sizeof(uint32_t));
					max_partitions += 1000;

				}
    		}
    	}
    	else if(strcmp(line,target)==0) {
    		success = 1;
    		read_in = 1;
    		fgets(line, sizeof(line), in_file);
    	}
    }
    if(success == 0) {
    	straighten_log(STRAIGHTEN_FATAL, "The database for outer plethysm value %u does not contain any coefficients for inner plethysm value %u.", outer, inner);
		exit(EXIT_FAILURE);
    }
    straighten_log(STRAIGHTEN_INFO, "Read in %d partitions and associated plethysm coefficients.", max_partitions);
    return max_partitions;
}

void construct_isotypic_basis_all(uint32_t outer, uint32_t inner, int use_lower_bound) {
	uint32_t * partitions;
	uint32_t * plethysms;
	uint32_t max_partitions = load_foulkes(outer, inner, &partitions, &plethysms);

	struct shape_data_c s_data = {};
    struct sstd_data_c sstd_data = {};
    struct sstd_data_c_options options = {};


    uint8_t max_filling_value = outer;
    uint8_t * content = (uint8_t*) calloc(max_filling_value, sizeof(uint8_t));
    for(int c = 0; c < max_filling_value; c++) {
    	content[c] = inner;
    }

	uint32_t shape_length;
	uint32_t optimal_sstd_lower_bound = 0;
    uint32_t dimension;
    uint32_t * isotypic_basis_index = NULL;
    uint32_t * optimal_rcoeff_sstd_index = NULL;
    //char shape_str[128];
    //int32_t s_index;
	for(uint32_t partition_index = 0; partition_index < max_partitions; partition_index++) {
		shape_length = 0;
		for(int s = 0; s < outer; s++) {
			if(partitions[(partition_index*outer) + s] != 0) {
				shape_length++;
			}
		}
		
		optimal_sstd_lower_bound = 0;
	    dimension = plethysms[partition_index];

	    construct_shape_data_c(&s_data, &partitions[(partition_index*outer)], shape_length);
    	
	    memset(&options, 0, sizeof(struct sstd_data_c_options));
	    if(dimension > 0) {	    	
	    	options.set_sstd = 1;
		    options.set_sstd_column_factorial = 1;
		    options.set_rowcontent = 1;
		    options.set_dictionary = 1;
		    options.set_dictionary_hash = 1;
		    options.dictionary_hash_copy_data = 0;
		    options.set_cache = 1;	
		    if(use_lower_bound) {
		    	options.use_lower_bound = 1;
				options.lower_bound_fraction = 0.75f; 	
			}
	    }

		if(load_isotypic(outer, inner, &s_data, &dimension, &optimal_sstd_lower_bound, &isotypic_basis_index) == -1) {
			straighten_log(STRAIGHTEN_VVINFO, "Constructing isotypic basis for %s.", s_data.shape_string);
	    	if(dimension > 0) {
	    		isotypic_basis_index = (uint32_t*) calloc(dimension, sizeof(uint32_t));
	    		optimal_rcoeff_sstd_index = (uint32_t*) calloc(dimension, sizeof(uint32_t));
	    		construct_sstd_data_c(&sstd_data, content, max_filling_value, &s_data, &options);
	    		straighten_timing_macro(optimal_sstd_lower_bound = construct_isotypic_basis(&sstd_data, &s_data, dimension, optimal_rcoeff_sstd_index, isotypic_basis_index), "Finding isotypic basis");
	    	
	    		/*
	    		uint32_t * column_repeats = NULL;

			    for(int bv = 0; bv < dimension; bv++) {
			        set_sstd_column_repeat_data(&sstd_data, &s_data, optimal_rcoeff_sstd_index[bv], &column_repeats);
			        s_printf("Basis SSTD Tableau %d of %d (%u): ", bv+1, dimension, optimal_rcoeff_sstd_index[bv]);
			        print_2d_array_compact(column_repeats, s_data.unique_columns[0].length, sstd_data.max_filling_value);
			        printf("\n");
			        print_tableau(&sstd_data.all_sstd_tableau[optimal_rcoeff_sstd_index[bv]], &s_data, 0, 0, 0, 0);
			        free(column_repeats);
			    }
			    printf("=================================\n");*/

	    	}
	    	straighten_log(STRAIGHTEN_INFO, "Storing isotypic basis for %s.", s_data.shape_string);
	    	store_isotypic(outer, inner, &s_data, dimension, optimal_sstd_lower_bound, isotypic_basis_index);
	    }
	    else {
	    	straighten_log(STRAIGHTEN_INFO, "Isotypic basis for %s already exists in the database.", s_data.shape_string);
	    }

    	destruct_shape_data_c(&s_data);
    	destruct_sstd_data_c(&sstd_data);
		
		if(isotypic_basis_index != NULL) {
    		free(isotypic_basis_index);
    		isotypic_basis_index = NULL;
    	}
    	if(optimal_rcoeff_sstd_index != NULL) {
    		free(optimal_rcoeff_sstd_index);
    		optimal_rcoeff_sstd_index = NULL;
    	}
	}
}

uint32_t construct_isotypic_basis_investigate(struct sstd_data_c * sstd_data, struct shape_data_c * s_data, uint32_t dimension, uint32_t * optimal_rcoeff_sstd_index, uint32_t * isotypic_basis_index) {
	uint32_t lower_bound_increment;
	if(sstd_data[0].num_sstd_tableau < 10) {
		lower_bound_increment = sstd_data[0].num_sstd_tableau;
	}
	else {
		lower_bound_increment = (uint32_t)(0.25f * (double)sstd_data[0].num_sstd_tableau);
	} 
	sstd_data[0].lower_bound_sstd = sstd_data[0].num_sstd_tableau - lower_bound_increment;
	
	//uint8_t inner = sstd_data[0].content[0];
	uint8_t outer = sstd_data[0].max_filling_value;
	int64_t precision_loss_upperbound = 9007199254740991; //2^53 - 1
	int64_t precision_loss_lowerbound = -9007199254740993; //-2^53 + 1

	int64_t * straighten_result = (int64_t*) calloc(_STRAIGHTEN_NUM_THREADS * sstd_data[0].num_sstd_tableau, sizeof(int64_t));
	int64_t * isotypic_basis = (int64_t*) calloc(dimension * sstd_data[0].num_sstd_tableau, sizeof(int64_t));
	
	/* Initialize stuff needed for Lapack methods */
	double * g_elim_double = (double*) calloc(dimension*sstd_data[0].num_sstd_tableau, sizeof(double));
    double * g_elim_double_sing_vals = (double*) calloc(dimension, sizeof(double));
    int32_t * iwork = (int32_t*) calloc(8*dimension, sizeof(int32_t));
    double wkopt;
    double * work;
    int32_t g_elim_double_m = sstd_data[0].num_sstd_tableau;
    int32_t g_elim_double_n = dimension, info, lwork;
    /* end lapack stuff */

	uint32_t * all_sstd_tableau_index = (uint32_t*) calloc(sstd_data[0].num_sstd_tableau, sizeof(uint32_t));
	uint8_t * index_visited = (uint8_t*) calloc(sstd_data[0].num_sstd_tableau, sizeof(uint32_t));
	for(int index = 0; index < sstd_data[0].num_sstd_tableau; index++) {
		all_sstd_tableau_index[index] = index;
	}

    //#ifdef _STRAIGHTEN_USE_QSORT_S
    //qsort_s(all_sstd_tableau_index, sstd_data[0].num_sstd_tableau, sizeof(uint32_t), sstd_index_order_column_factorial_s, sstd_data[0].sstd_column_factorial);
    //#else
    //qsort_r(all_sstd_tableau_index, sstd_data[0].num_sstd_tableau, sizeof(uint32_t), sstd_index_order_column_factorial_r, sstd_data[0].sstd_column_factorial);
    //#endif

    uint32_t outer_fac = factorial(outer);
    uint8_t * id = (uint8_t*) calloc(outer, sizeof(uint8_t));
    uint8_t * perm_storage = (uint8_t*) calloc(outer * outer_fac, sizeof(uint8_t));
    
    for(int val = 0; val < outer; val++) {
        id[val]=val+1;
    }

    // this stores all permutations of id in perm_storage
    straighten_gen_permutations(id, outer, perm_storage);
    
    struct tableau * generator_tableau = (struct tableau*) calloc(outer_fac, sizeof(struct tableau));
    set_tableau_bulk(generator_tableau, outer_fac, s_data);
    uint8_t * generator_tableau_entries_ptr = generator_tableau[0].entries;

    //openblas_set_num_threads(1);

    int sstd_index = 0;
    int rank = 0;
    uint32_t num_computed=0;
    //uint32_t rank_incremented=0;
    uint32_t offset;
    uint32_t increment=0;
    uint32_t no_increase = 0;
    uint32_t no_increase_bound = lower_bound_increment;
    

    while(rank < dimension) {
	    while(num_computed < sstd_data[0].num_sstd_tableau && rank < dimension) {
	    	index_visited[sstd_index] = 1;
	    	num_computed++;
	    	no_increase++;
	    	if((num_computed % 100) == 0) {
	    		straighten_log(STRAIGHTEN_VINFO, "Semistandard tableau %u processed.", num_computed);
	    	}
	    	else {
	    		straighten_log(STRAIGHTEN_VVINFO, "Semistandard tableau %u processed.", num_computed);
	    	}
	    	for(int perm = 0; perm < outer_fac; perm++) {
	    		for(int box = 0; box < s_data[0].num_boxes; box++){
	    			generator_tableau[perm].entries[box] = perm_storage[(perm * outer) + sstd_data[0].all_sstd_tableau[all_sstd_tableau_index[sstd_index]].entries[box] - 1];
	    		}

	    		generator_tableau[perm].coefficient = 1;
	    		dictionary_straighten(&generator_tableau[perm], s_data);
	    	}   

	        tableau_coeff_simplify(generator_tableau, outer_fac, s_data);

	        //straighten the tableau
	        straighten_array_to_sstd_basis_threaded_cached_int64(generator_tableau, outer_fac, sstd_data, s_data, straighten_result, 0, 0);

	        //rank_incremented = 0;
	        if(array_nonzero(straighten_result, sstd_data[0].num_sstd_tableau)) {
	        	if(rank == 0) {
	        		memcpy(isotypic_basis, straighten_result, sstd_data[0].num_sstd_tableau * sizeof(int64_t));
	        		rank = 1;
	        		isotypic_basis_index[0] = all_sstd_tableau_index[sstd_index];

	        		//rank_incremented = 1;

	        		straighten_log(STRAIGHTEN_VINFO, "Adding %u to isotypic basis. Rank is now %d.", all_sstd_tableau_index[sstd_index], rank);

	        		no_increase = 0;
	        	}
	        	else {
	        		g_elim_double_m = sstd_data[0].num_sstd_tableau - sstd_data[0].lower_bound_sstd;

	        		for(int row = 0; row < rank; row++) {
	        			for(int col = sstd_data[0].lower_bound_sstd; col < sstd_data[0].num_sstd_tableau; col++) {
	        				if(isotypic_basis[row*sstd_data[0].num_sstd_tableau + col] < precision_loss_upperbound && isotypic_basis[row*sstd_data[0].num_sstd_tableau + col] > precision_loss_lowerbound) {
	        					g_elim_double[row*g_elim_double_m + (col-sstd_data[0].lower_bound_sstd)] = (double) isotypic_basis[row*sstd_data[0].num_sstd_tableau + col];
	        				}
	        				else {
	        					straighten_log(STRAIGHTEN_FATAL, "Value in int64_t array too large to fit in a double without loss of precision.");
	                            exit(EXIT_FAILURE);
	        				}
	        			}
	        		}
	        		for(int col = sstd_data[0].lower_bound_sstd; col < sstd_data[0].num_sstd_tableau; col++) {
	        			if(straighten_result[col] < precision_loss_upperbound && straighten_result[col] > precision_loss_lowerbound) {
	    					g_elim_double[rank*g_elim_double_m + (col-sstd_data[0].lower_bound_sstd)] = (double) straighten_result[col];
	    				}
	    				else {
	    					straighten_log(STRAIGHTEN_FATAL, "Value in int64_t array too large to fit in a double without loss of precision.");
	                        exit(EXIT_FAILURE);
	    				}
	        		}
					
					// call the lapack svd to get the rank of g_elim double
	                // note that the accuracy here is not 100% guaranteed! this is why we perform an integer only rank calculation at the end to verify
	                g_elim_double_n = rank+1;

	                // NOTE: Important! The LAPACK routines expect things to be stored in column major (so this is why m=sstd_data[0].num_sstd_tableau - sstd_data[0].lower_bound_sstd, n=rank+1)
	                lwork = -1;
	                LAPACK_dgesdd("N", &g_elim_double_m, &g_elim_double_n, g_elim_double, &g_elim_double_m, g_elim_double_sing_vals, NULL, &g_elim_double_m, NULL, &g_elim_double_n, &wkopt, &lwork, iwork, &info);
	                lwork = (int32_t)wkopt;
	                work = (double*)calloc(lwork, sizeof(double));
	                LAPACK_dgesdd("N", &g_elim_double_m, &g_elim_double_n, g_elim_double, &g_elim_double_m, g_elim_double_sing_vals, NULL, &g_elim_double_m, NULL, &g_elim_double_n, work, &lwork, iwork, &info);
	                free(work);

	                if(round(g_elim_double_sing_vals[rank]) != 0 && 0) {
	                	memcpy(isotypic_basis + (rank*sstd_data[0].num_sstd_tableau), straighten_result, sstd_data[0].num_sstd_tableau * sizeof(int64_t));
	                	isotypic_basis_index[rank] = all_sstd_tableau_index[sstd_index];
	                	rank++;
	                	
	                	/*if(rank==20) {
	                		openblas_set_num_threads(2);
	                	}
	                	if(rank==50) {
	                		openblas_set_num_threads(4);
	                	}
	                	if(rank==80) {
	                		openblas_set_num_threads(8);
	                	} */
	        			straighten_log(STRAIGHTEN_VINFO, "Adding %u to isotypic basis. Rank is now %d.", all_sstd_tableau_index[sstd_index], rank);

	        			//rank_incremented = 1;
	        			no_increase = 0;
	                }
	                else if(round(g_elim_double_sing_vals[rank]) != 0) {
	                	printf("Tableau %d and %d are lin indep\n", all_sstd_tableau_index[sstd_index], isotypic_basis_index[0]);
	                }
	                else {
	                	printf("Tableau %d and %d are lin dep\n", all_sstd_tableau_index[sstd_index], isotypic_basis_index[0]);
	                	print_tableau(&sstd_data[0].all_sstd_tableau[isotypic_basis_index[0]], s_data, 1,1,1,0);
	                	print_tableau(&sstd_data[0].all_sstd_tableau[all_sstd_tableau_index[sstd_index]], s_data, 1,1,1,0);
	                	print_tableau(&sstd_data[0].all_sstd_tableau[229], s_data, 1,1,1,0);
	                	print_tableau(&sstd_data[0].all_sstd_tableau[297], s_data, 1,1,1,0);
	                	print_tableau(&sstd_data[0].all_sstd_tableau[298], s_data, 1,1,1,0);
	                	print_tableau(&sstd_data[0].all_sstd_tableau[299], s_data, 1,1,1,0);
	                	//printf("%d %d %d %d \n", isotypic_basis[229], isotypic_basis[297], isotypic_basis[298], isotypic_basis[299]);
	                	//printf("%d %d %d %d \n", straighten_result[229], straighten_result[297], straighten_result[298], straighten_result[299]);
	                	getchar();
	                }
	        	}
	        	memset(straighten_result, 0, sstd_data[0].num_sstd_tableau * sizeof(int64_t));
	        }
	        else {
	        	printf("SSTD tableau %d results in zero.\n", all_sstd_tableau_index[sstd_index]);
	        	print_tableau(&sstd_data[0].all_sstd_tableau[all_sstd_tableau_index[sstd_index]], s_data, 1,1,1,0);
	        }

	        increment = 1;
	        // attempt to increment sstd_index after a rank increase to a index where the column factorial is different
	        /*
	        if(rank_incremented) {
	        	offset = 1;
	        	while(offset < sstd_data[0].num_sstd_tableau) {
	        		if(index_visited[(sstd_index + offset) % sstd_data[0].num_sstd_tableau] == 0 && sstd_data[0].sstd_column_factorial[all_sstd_tableau_index[sstd_index]] != sstd_data[0].sstd_column_factorial[all_sstd_tableau_index[(sstd_index + offset) % sstd_data[0].num_sstd_tableau]]) {
	        			sstd_index = (sstd_index + offset) % sstd_data[0].num_sstd_tableau;
	        			offset = sstd_data[0].num_sstd_tableau;
	        			increment = 0;
	        		}
	        		offset++;
	        	}
	        }*/
	        
	        // this will always increment sstd_index if the rank was not increased - or if the increment after a rank increase was unsuccessful
	        if(increment) {
		        offset = 1;
		        while(offset < sstd_data[0].num_sstd_tableau) {
		        	if(index_visited[(sstd_index + offset) % sstd_data[0].num_sstd_tableau] == 0) {
		        		sstd_index = (sstd_index + offset) % sstd_data[0].num_sstd_tableau;
		        		offset = sstd_data[0].num_sstd_tableau;	        		
		        	}
		        	offset++;
		        }
	    	}

	    	// if loop has gone for too long without finding a new linearly independent vector then we should break out of the loop and decrease the lower bound
	    	/*
	    	if(sstd_data[0].num_sstd_tableau > 8000 && (no_increase > no_increase_bound && sstd_data[0].lower_bound_sstd != 0)) {
	    		num_computed = sstd_data[0].num_sstd_tableau;
	    		no_increase_bound = (uint32_t)(((double)no_increase_bound) * 1.5f);
	    		straighten_log(STRAIGHTEN_WARNING, "No new linearly independent vectors found in %d rounds, decreasing lower bound.",no_increase);
	    	}*/
	    }

	    if(rank < dimension) {
	    	if(straighten_unlikely(sstd_data[0].lower_bound_sstd == 0 || 1)) {
	    		straighten_log(STRAIGHTEN_FATAL, "Could not find %d linearly independent basis vectors for the isotypic subspace.", dimension);
	    		exit(EXIT_FAILURE);
	    	}

	    	if(sstd_data[0].lower_bound_sstd < (2*lower_bound_increment)) {
	    		sstd_data[0].lower_bound_sstd = 0;
	    	}
	    	else {
	    		sstd_data[0].lower_bound_sstd -= lower_bound_increment;
	    	}

	    	straighten_log(STRAIGHTEN_WARNING, "Reducing the semistandard lower bound to %d and recomputing.", sstd_data[0].lower_bound_sstd);
	    	sstd_index = 0;
	    	rank = 0;
	    	num_computed=0;
	    	memset(index_visited, 0, sstd_data[0].num_sstd_tableau * sizeof(uint32_t));
	    	clear_straightening_cache(sstd_data, 0);
	    }
	}
	
	straighten_log(STRAIGHTEN_VVINFO, "Current semistandard lower bound is %d.", sstd_data[0].lower_bound_sstd);
	int interim_rank = rank;
	int32_t l,u;
	while(interim_rank > 0) {
		l=sstd_data[0].lower_bound_sstd;
		u=sstd_data[0].num_sstd_tableau-1;
		while(l < (u-1)) {
			sstd_data[0].lower_bound_sstd = l + (u - l) / 2;
			g_elim_double_m = sstd_data[0].num_sstd_tableau - sstd_data[0].lower_bound_sstd;

			for(int row = 0; row < rank; row++) {
				for(int col = sstd_data[0].lower_bound_sstd; col < sstd_data[0].num_sstd_tableau; col++) {
					g_elim_double[row*g_elim_double_m + (col-sstd_data[0].lower_bound_sstd)] = (double) isotypic_basis[row*sstd_data[0].num_sstd_tableau + col];
				}
			}
			
			// call the lapack svd to get the rank of g_elim double
	        // note that the accuracy here is not 100% guaranteed! this is why we perform an integer only rank calculation at the end to verify
	        g_elim_double_n = rank;

	        lwork = -1;
	        LAPACK_dgesdd("N", &g_elim_double_m, &g_elim_double_n, g_elim_double, &g_elim_double_m, g_elim_double_sing_vals, NULL, &g_elim_double_m, NULL, &g_elim_double_n, &wkopt, &lwork, iwork, &info);
	        lwork = (int32_t)wkopt;
	        work = (double*)calloc(lwork, sizeof(double));
	        LAPACK_dgesdd("N", &g_elim_double_m, &g_elim_double_n, g_elim_double, &g_elim_double_m, g_elim_double_sing_vals, NULL, &g_elim_double_m, NULL, &g_elim_double_n, work, &lwork, iwork, &info);
	        free(work);

	        if(round(g_elim_double_sing_vals[interim_rank-1]) == 0) {
	        	u = sstd_data[0].lower_bound_sstd;
	        }
	        else {
	        	l = sstd_data[0].lower_bound_sstd;
	        }
		}

		if(u==(sstd_data[0].num_sstd_tableau-1) && interim_rank == 1) {
			int zero = 1;
			for(int row = 0; row < rank; row++) {
				if(isotypic_basis[row*sstd_data[0].num_sstd_tableau + (sstd_data[0].num_sstd_tableau-1)] != 0) {
					zero = 0;
				}
			}
			if(!zero) {
				sstd_data[0].lower_bound_sstd = u;
			}
			else {
				sstd_data[0].lower_bound_sstd = l;
			}

		}
		else {
			sstd_data[0].lower_bound_sstd = l;
		}
		optimal_rcoeff_sstd_index[rank - interim_rank] = sstd_data[0].lower_bound_sstd;
		straighten_log(STRAIGHTEN_WARNING, "Optimal semistandard lower bound is %d.", sstd_data[0].lower_bound_sstd);
		sstd_data[0].lower_bound_sstd++;
		interim_rank--;
	}
	sstd_data[0].lower_bound_sstd = optimal_rcoeff_sstd_index[0];
	/*
	int32_t * display = (int32_t*) calloc(dimension * dimension, sizeof(int32_t));
	for(int row = 0; row < rank; row++) {
		for(int col=0; col < rank; col++) {
			display[(row * rank) + col] = isotypic_basis[row*sstd_data[0].num_sstd_tableau + optimal_rcoeff_sstd_index[col]];
		}
	}
	print_2d_array(display, rank, rank);*/

	clear_straightening_cache(sstd_data, 0);	
	load_check_isotypic_basis(sstd_data, s_data, dimension, isotypic_basis_index, isotypic_basis);
	clear_straightening_cache(sstd_data, 0);

	//free data
	free(straighten_result);
	free(isotypic_basis);
	free(g_elim_double);
    free(g_elim_double_sing_vals);
    free(iwork);
    free(all_sstd_tableau_index);
    free(id);
    free(perm_storage);
    free(generator_tableau_entries_ptr);
    free(generator_tableau);

	return sstd_data[0].lower_bound_sstd;
}

uint32_t construct_isotypic_basis(struct sstd_data_c * sstd_data, struct shape_data_c * s_data, uint32_t dimension, uint32_t * optimal_rcoeff_sstd_index, uint32_t * isotypic_basis_index) {
	uint32_t lower_bound_increment;
	if(sstd_data[0].num_sstd_tableau < 10) {
		lower_bound_increment = sstd_data[0].num_sstd_tableau;
	}
	else {
		lower_bound_increment = (uint32_t)(0.25f * (double)sstd_data[0].num_sstd_tableau);
	} 
	sstd_data[0].lower_bound_sstd = sstd_data[0].num_sstd_tableau - lower_bound_increment;

	//uint8_t inner = sstd_data[0].content[0];
	uint8_t outer = sstd_data[0].max_filling_value;
	int64_t precision_loss_upperbound = 9007199254740991; //2^53 - 1
	int64_t precision_loss_lowerbound = -9007199254740993; //-2^53 + 1

	int64_t * straighten_result = (int64_t*) calloc(_STRAIGHTEN_NUM_THREADS * sstd_data[0].num_sstd_tableau, sizeof(int64_t));
	int64_t * isotypic_basis = (int64_t*) calloc(dimension * sstd_data[0].num_sstd_tableau, sizeof(int64_t));
	
	/* Initialize stuff needed for Lapack methods */
	double * g_elim_double = (double*) calloc(dimension*sstd_data[0].num_sstd_tableau, sizeof(double));
    double * g_elim_double_sing_vals = (double*) calloc(dimension, sizeof(double));
    int32_t * iwork = (int32_t*) calloc(8*dimension, sizeof(int32_t));
    double wkopt;
    double * work;
    int32_t g_elim_double_m = sstd_data[0].num_sstd_tableau;
    int32_t g_elim_double_n = dimension, info, lwork;
    /* end lapack stuff */

	uint32_t * all_sstd_tableau_index = (uint32_t*) calloc(sstd_data[0].num_sstd_tableau, sizeof(uint32_t));
	uint8_t * index_visited = (uint8_t*) calloc(sstd_data[0].num_sstd_tableau, sizeof(uint32_t));
	for(int index = 0; index < sstd_data[0].num_sstd_tableau; index++) {
		all_sstd_tableau_index[index] = index;
	}

    #ifdef _STRAIGHTEN_USE_QSORT_S
    qsort_s(all_sstd_tableau_index, sstd_data[0].num_sstd_tableau, sizeof(uint32_t), sstd_index_order_column_factorial_s, sstd_data[0].sstd_column_factorial);
    #else
    qsort_r(all_sstd_tableau_index, sstd_data[0].num_sstd_tableau, sizeof(uint32_t), sstd_index_order_column_factorial_r, sstd_data[0].sstd_column_factorial);
    #endif

    uint32_t outer_fac = factorial(outer);
    uint8_t * id = (uint8_t*) calloc(outer, sizeof(uint8_t));
    uint8_t * perm_storage = (uint8_t*) calloc(outer * outer_fac, sizeof(uint8_t));
    
    for(int val = 0; val < outer; val++) {
        id[val]=val+1;
    }

    // this stores all permutations of id in perm_storage
    straighten_gen_permutations(id, outer, perm_storage);
    
    struct tableau * generator_tableau = (struct tableau*) calloc(outer_fac, sizeof(struct tableau));
    set_tableau_bulk(generator_tableau, outer_fac, s_data);
    uint8_t * generator_tableau_entries_ptr = generator_tableau[0].entries;

    //openblas_set_num_threads(1);

    int sstd_index = 0;
    int rank = 0;
    uint32_t num_computed=0;
    uint32_t rank_incremented=0;
    uint32_t offset;
    uint32_t increment=0;
    uint32_t no_increase = 0;
    uint32_t no_increase_bound = lower_bound_increment;
    

    while(rank < dimension) {
	    while(num_computed < sstd_data[0].num_sstd_tableau && rank < dimension) {
	    	index_visited[sstd_index] = 1;
	    	num_computed++;
	    	no_increase++;
	    	if((num_computed % 100) == 0) {
	    		straighten_log(STRAIGHTEN_VINFO, "Semistandard tableau %u processed.", num_computed);
	    	}
	    	else {
	    		straighten_log(STRAIGHTEN_VVINFO, "Semistandard tableau %u processed.", num_computed);
	    	}
	    	for(int perm = 0; perm < outer_fac; perm++) {
	    		for(int box = 0; box < s_data[0].num_boxes; box++){
	    			generator_tableau[perm].entries[box] = perm_storage[(perm * outer) + sstd_data[0].all_sstd_tableau[all_sstd_tableau_index[sstd_index]].entries[box] - 1];
	    		}

	    		generator_tableau[perm].coefficient = 1;
	    		dictionary_straighten(&generator_tableau[perm], s_data);
	    	}   

	        tableau_coeff_simplify(generator_tableau, outer_fac, s_data);

	        //straighten the tableau
	        straighten_array_to_sstd_basis_threaded_cached_int64(generator_tableau, outer_fac, sstd_data, s_data, straighten_result, 0, 0);

	        rank_incremented = 0;
	        if(array_nonzero(straighten_result, sstd_data[0].num_sstd_tableau)) {
	        	if(rank == 0) {
	        		memcpy(isotypic_basis, straighten_result, sstd_data[0].num_sstd_tableau * sizeof(int64_t));
	        		rank = 1;
	        		isotypic_basis_index[0] = all_sstd_tableau_index[sstd_index];

	        		rank_incremented = 1;

	        		straighten_log(STRAIGHTEN_VINFO, "Adding %u to isotypic basis. Rank is now %d.", all_sstd_tableau_index[sstd_index], rank);

	        		no_increase = 0;
	        	}
	        	else {
	        		g_elim_double_m = sstd_data[0].num_sstd_tableau - sstd_data[0].lower_bound_sstd;

	        		for(int row = 0; row < rank; row++) {
	        			for(int col = sstd_data[0].lower_bound_sstd; col < sstd_data[0].num_sstd_tableau; col++) {
	        				if(isotypic_basis[row*sstd_data[0].num_sstd_tableau + col] < precision_loss_upperbound && isotypic_basis[row*sstd_data[0].num_sstd_tableau + col] > precision_loss_lowerbound) {
	        					g_elim_double[row*g_elim_double_m + (col-sstd_data[0].lower_bound_sstd)] = (double) isotypic_basis[row*sstd_data[0].num_sstd_tableau + col];
	        				}
	        				else {
	        					straighten_log(STRAIGHTEN_FATAL, "Value in int64_t array too large to fit in a double without loss of precision.");
	                            exit(EXIT_FAILURE);
	        				}
	        			}
	        		}
	        		for(int col = sstd_data[0].lower_bound_sstd; col < sstd_data[0].num_sstd_tableau; col++) {
	        			if(straighten_result[col] < precision_loss_upperbound && straighten_result[col] > precision_loss_lowerbound) {
	    					g_elim_double[rank*g_elim_double_m + (col-sstd_data[0].lower_bound_sstd)] = (double) straighten_result[col];
	    				}
	    				else {
	    					straighten_log(STRAIGHTEN_FATAL, "Value in int64_t array too large to fit in a double without loss of precision.");
	                        exit(EXIT_FAILURE);
	    				}
	        		}
					
					// call the lapack svd to get the rank of g_elim double
	                // note that the accuracy here is not 100% guaranteed! this is why we perform an integer only rank calculation at the end to verify
	                g_elim_double_n = rank+1;

	                // NOTE: Important! The LAPACK routines expect things to be stored in column major (so this is why m=sstd_data[0].num_sstd_tableau - sstd_data[0].lower_bound_sstd, n=rank+1)
	                lwork = -1;
	                LAPACK_dgesdd("N", &g_elim_double_m, &g_elim_double_n, g_elim_double, &g_elim_double_m, g_elim_double_sing_vals, NULL, &g_elim_double_m, NULL, &g_elim_double_n, &wkopt, &lwork, iwork, &info);
	                lwork = (int32_t)wkopt;
	                work = (double*)calloc(lwork, sizeof(double));
	                LAPACK_dgesdd("N", &g_elim_double_m, &g_elim_double_n, g_elim_double, &g_elim_double_m, g_elim_double_sing_vals, NULL, &g_elim_double_m, NULL, &g_elim_double_n, work, &lwork, iwork, &info);
	                free(work);

	                if(round(g_elim_double_sing_vals[rank]) != 0) {
	                	memcpy(isotypic_basis + (rank*sstd_data[0].num_sstd_tableau), straighten_result, sstd_data[0].num_sstd_tableau * sizeof(int64_t));
	                	isotypic_basis_index[rank] = all_sstd_tableau_index[sstd_index];
	                	rank++;

	        			straighten_log(STRAIGHTEN_VINFO, "Adding %u to isotypic basis. Rank is now %d.", all_sstd_tableau_index[sstd_index], rank);

	        			rank_incremented = 1;
	        			no_increase = 0;
	                }
	        	}
	        	memset(straighten_result, 0, sstd_data[0].num_sstd_tableau * sizeof(int64_t));
	        }

	        increment = 1;
	        // attempt to increment sstd_index after a rank increase to a index where the column factorial is different
	        
	        if(rank_incremented) {
	        	offset = 1;
	        	while(offset < sstd_data[0].num_sstd_tableau) {
	        		if(index_visited[(sstd_index + offset) % sstd_data[0].num_sstd_tableau] == 0 && sstd_data[0].sstd_column_factorial[all_sstd_tableau_index[sstd_index]] != sstd_data[0].sstd_column_factorial[all_sstd_tableau_index[(sstd_index + offset) % sstd_data[0].num_sstd_tableau]]) {
	        			sstd_index = (sstd_index + offset) % sstd_data[0].num_sstd_tableau;
	        			offset = sstd_data[0].num_sstd_tableau;
	        			increment = 0;
	        		}
	        		offset++;
	        	}
	        }
	        
	        // this will always increment sstd_index if the rank was not increased - or if the increment after a rank increase was unsuccessful
	        if(increment) {
		        offset = 1;
		        while(offset < sstd_data[0].num_sstd_tableau) {
		        	if(index_visited[(sstd_index + offset) % sstd_data[0].num_sstd_tableau] == 0) {
		        		sstd_index = (sstd_index + offset) % sstd_data[0].num_sstd_tableau;
		        		offset = sstd_data[0].num_sstd_tableau;	        		
		        	}
		        	offset++;
		        }
	    	}

	    	// if loop has gone for too long without finding a new linearly independent vector then we should break out of the loop and decrease the lower bound
	    	
	    	if(sstd_data[0].num_sstd_tableau > 8000 && (no_increase > no_increase_bound && sstd_data[0].lower_bound_sstd != 0)) {
	    		num_computed = sstd_data[0].num_sstd_tableau;
	    		no_increase_bound = (uint32_t)(((double)no_increase_bound) * 1.5f);
	    		straighten_log(STRAIGHTEN_WARNING, "No new linearly independent vectors found in %d rounds, decreasing lower bound.",no_increase);
	    	}
	    }

	    if(rank < dimension) {
	    	if(straighten_unlikely(sstd_data[0].lower_bound_sstd == 0)) {
	    		straighten_log(STRAIGHTEN_FATAL, "Could not find %d linearly independent basis vectors for the isotypic subspace.", dimension);
	    		exit(EXIT_FAILURE);
	    	}

	    	if(sstd_data[0].lower_bound_sstd < (2*lower_bound_increment)) {
	    		sstd_data[0].lower_bound_sstd = 0;
	    	}
	    	else {
	    		sstd_data[0].lower_bound_sstd -= lower_bound_increment;
	    	}

	    	straighten_log(STRAIGHTEN_WARNING, "Reducing the semistandard lower bound to %d and recomputing.", sstd_data[0].lower_bound_sstd);
	    	sstd_index = 0;
	    	rank = 0;
	    	num_computed=0;
	    	memset(index_visited, 0, sstd_data[0].num_sstd_tableau * sizeof(uint32_t));
	    	clear_straightening_cache(sstd_data, 0);
	    }
	}
	
	straighten_log(STRAIGHTEN_VVINFO, "Current semistandard lower bound is %d.", sstd_data[0].lower_bound_sstd);
	int interim_rank = rank;
	int32_t l,u;
	while(interim_rank > 0) {
		l=sstd_data[0].lower_bound_sstd;
		u=sstd_data[0].num_sstd_tableau-1;
		while(l < (u-1)) {
			sstd_data[0].lower_bound_sstd = l + (u - l) / 2;
			g_elim_double_m = sstd_data[0].num_sstd_tableau - sstd_data[0].lower_bound_sstd;

			for(int row = 0; row < rank; row++) {
				for(int col = sstd_data[0].lower_bound_sstd; col < sstd_data[0].num_sstd_tableau; col++) {
					g_elim_double[row*g_elim_double_m + (col-sstd_data[0].lower_bound_sstd)] = (double) isotypic_basis[row*sstd_data[0].num_sstd_tableau + col];
				}
			}
			
			// call the lapack svd to get the rank of g_elim double
	        // note that the accuracy here is not 100% guaranteed! this is why we perform an integer only rank calculation at the end to verify
	        g_elim_double_n = rank;

	        lwork = -1;
	        LAPACK_dgesdd("N", &g_elim_double_m, &g_elim_double_n, g_elim_double, &g_elim_double_m, g_elim_double_sing_vals, NULL, &g_elim_double_m, NULL, &g_elim_double_n, &wkopt, &lwork, iwork, &info);
	        lwork = (int32_t)wkopt;
	        work = (double*)calloc(lwork, sizeof(double));
	        LAPACK_dgesdd("N", &g_elim_double_m, &g_elim_double_n, g_elim_double, &g_elim_double_m, g_elim_double_sing_vals, NULL, &g_elim_double_m, NULL, &g_elim_double_n, work, &lwork, iwork, &info);
	        free(work);

	        if(round(g_elim_double_sing_vals[interim_rank-1]) == 0) {
	        	u = sstd_data[0].lower_bound_sstd;
	        }
	        else {
	        	l = sstd_data[0].lower_bound_sstd;
	        }
		}

		if(u==(sstd_data[0].num_sstd_tableau-1) && interim_rank == 1) {
			int zero = 1;
			for(int row = 0; row < rank; row++) {
				if(isotypic_basis[row*sstd_data[0].num_sstd_tableau + (sstd_data[0].num_sstd_tableau-1)] != 0) {
					zero = 0;
				}
			}
			if(!zero) {
				sstd_data[0].lower_bound_sstd = u;
			}
			else {
				sstd_data[0].lower_bound_sstd = l;
			}

		}
		else {
			sstd_data[0].lower_bound_sstd = l;
		}
		optimal_rcoeff_sstd_index[rank - interim_rank] = sstd_data[0].lower_bound_sstd;
		straighten_log(STRAIGHTEN_VINFO, "Optimal semistandard lower bound is %d.", sstd_data[0].lower_bound_sstd);
		sstd_data[0].lower_bound_sstd++;
		interim_rank--;
	}
	sstd_data[0].lower_bound_sstd = optimal_rcoeff_sstd_index[0];

	clear_straightening_cache(sstd_data, 0);	
	load_check_isotypic_basis(sstd_data, s_data, dimension, isotypic_basis_index, isotypic_basis);
	clear_straightening_cache(sstd_data, 0);

	//free data
	free(straighten_result);
	free(isotypic_basis);
	free(g_elim_double);
    free(g_elim_double_sing_vals);
    free(iwork);
    free(all_sstd_tableau_index);
    free(id);
    free(perm_storage);
    free(generator_tableau_entries_ptr);
    free(generator_tableau);

	return sstd_data[0].lower_bound_sstd;
}


void load_check_isotypic_basis(struct sstd_data_c * sstd_data, struct shape_data_c * s_data, int dimension, uint32_t * isotypic_basis_index, int64_t * isotypic_basis) {
	int old_lower_bound_sstd = sstd_data[0].lower_bound_sstd;
	sstd_data[0].lower_bound_sstd = 0;
	uint8_t outer = sstd_data[0].max_filling_value;
	uint32_t outer_fac = factorial(outer);
    uint8_t * id = (uint8_t*) calloc(outer, sizeof(uint8_t));
    uint8_t * perm_storage = (uint8_t*) calloc(outer * outer_fac, sizeof(uint8_t));
    
    for(int val = 0; val < outer; val++) {
        id[val]=val+1;
    }

    int64_t * straighten_result = (int64_t*) calloc(_STRAIGHTEN_NUM_THREADS * sstd_data[0].num_sstd_tableau, sizeof(int64_t));

    // this stores all permutations of id in perm_storage
    straighten_gen_permutations(id, outer, perm_storage);
    
    struct tableau * generator_tableau = (struct tableau*) calloc(outer_fac, sizeof(struct tableau));
    set_tableau_bulk(generator_tableau, outer_fac, s_data);
    uint8_t * generator_tableau_entries_ptr = generator_tableau[0].entries;

    for(int tab = 0; tab < dimension; tab++) {
    	straighten_log(STRAIGHTEN_VINFO, "Loading candidate basis vector %d for integer Gaussian elimination.", tab);
    	for(int perm = 0; perm < outer_fac; perm++) {
    		for(int box = 0; box < s_data[0].num_boxes; box++){
    			generator_tableau[perm].entries[box] = perm_storage[(perm * outer) + sstd_data[0].all_sstd_tableau[isotypic_basis_index[tab]].entries[box] - 1];
    		}

    		generator_tableau[perm].coefficient = 1;
    		dictionary_straighten(&generator_tableau[perm], s_data);
    	}   

        tableau_coeff_simplify(generator_tableau, outer_fac, s_data);

        //straighten the tableau
        straighten_array_to_sstd_basis_threaded_cached_int64(generator_tableau, outer_fac, sstd_data, s_data, straighten_result, 0, 0);

        memcpy(isotypic_basis + (tab*sstd_data[0].num_sstd_tableau), straighten_result, sstd_data[0].num_sstd_tableau * sizeof(int64_t));
        memset(straighten_result, 0, sstd_data[0].num_sstd_tableau * sizeof(int64_t));	    
    }


	mpz_t* g_elim = (mpz_t*) calloc(dimension*sstd_data[0].num_sstd_tableau, sizeof(mpz_t));
    for(int init = 0; init < dimension*sstd_data[0].num_sstd_tableau; init++) {
        mpz_init2(g_elim[init], 64);
    }

    int64_t val = 0;
    for(int row = 0; row < dimension; row++) {
    	for(int col = 0; col < sstd_data[0].num_sstd_tableau; col++) {
    		val = isotypic_basis[(row * sstd_data[0].num_sstd_tableau) + col];
    		if(val > INT_MAX || val < INT_MIN) {
    			mpz_set_sll(g_elim[(col * dimension) + row], val);
    		}
    		else {
    			mpz_set_si(g_elim[(col * dimension) + row], val);
    		}
    	}
    }

    barreis_gauss_elim_mpz_inplace(g_elim, sstd_data[0].num_sstd_tableau, dimension);

    if(mpz_cmp_si(g_elim[(dimension * dimension) - 1],0) == 0) {
    	//the last pivot is zero, this means the supplied isotypic "basis" is not linearly independent (fatal error)
    	straighten_log(STRAIGHTEN_FATAL, "The supplied basis was found to be not linearly independent when performing Gaussian elimination over the integers.");
		exit(EXIT_FAILURE);
    }
    else {
    	straighten_log(STRAIGHTEN_INFO, "The supplied basis was found to be linearly independent when performing Gaussian elimination over the integers.");
    }

    //print_2d_array(g_elim, dimension, dimension);

    sstd_data[0].lower_bound_sstd = old_lower_bound_sstd;

    //free data
	free(straighten_result);
    free(id);
    free(perm_storage);
    free(generator_tableau_entries_ptr);
    free(generator_tableau);
    for(int init = 0; init < dimension*sstd_data[0].num_sstd_tableau; init++) {
        mpz_clear(g_elim[init]);
    }
    free(g_elim);
}


void barreis_gauss_elim_mpz_inplace(mpz_t* g_elim, int g_rows, int g_cols) {
    mpz_t div;
    mpz_init(div);
    mpz_t D_val ;
    mpz_init(D_val);
    mpz_t D_val1;
    mpz_init(D_val1);
    mpz_t D_val2;
    mpz_init(D_val2);
    int row = 0;
    int index;
    
    for(int k=0; k < g_cols; k++) {
        if(row < (g_rows-1)) {
            // look for a pivot in the current column
            index = row;
            while((index < g_rows) && (mpz_cmp_si(g_elim[index*g_cols + k], 0) == 0)) {
                index += 1;
            }
            
            if(index < g_rows) {
                if(index > row) {
                    for(int swap = 0; swap < g_cols; swap++) {
                        mpz_swap(g_elim[row*g_cols + swap],g_elim[index*g_cols + swap]);
                    }
                }
                    
                // proceed with Bareis' fraction-free (FF)
                // form of Gaussian elimination algorithm
                for(int row1=row+1; row1 < g_rows; row1++) {
                    for(int col1 = k+1; col1 < g_cols; col1++) {
                        mpz_mul(D_val1, g_elim[row*g_cols + k], g_elim[row1*g_cols + col1]);
                        mpz_mul(D_val2, g_elim[row1*g_cols + k], g_elim[row*g_cols + col1]);
                        mpz_sub(D_val, D_val1, D_val2);
                        if(row > 0) {
                            mpz_divexact(D_val, D_val, div);
                        }
                            
                        mpz_set(g_elim[row1*g_cols + col1], D_val);
                    }
                        
                    // zero out the elements to the left
                    for(int col1 = row; col1 < k+1; col1++) {
                        mpz_set_si(g_elim[row1*g_cols + col1], 0);
                    }
                }
                if((k < g_cols) && (row < g_rows-1)) {
                    mpz_set(div, g_elim[row*g_cols + k]);
                }
                row += 1;
            }
        }
    }
    
    //zero the remaining rows
    for(int row1 = row+1; row1 < g_rows; row1++) {
        for(int col1=0; col1 < g_cols; col1++) {
            mpz_set_si(g_elim[row1*g_cols + col1], 0);
        }
    }
    mpz_clear(div);
    mpz_clear(D_val);
    mpz_clear(D_val1);
    mpz_clear(D_val2);
}