int main(int argc, char *argv[]) { //int c[] = {1,2,3,4,5,6}; srand(time(NULL)); int random_array[rand()%11]; int array_size = sizeof(random_array)/sizeof(int); printf("%i, %i\n", sizeof(random_array), sizeof(int)); fill_int_array(random_array); printf("The length of the array is %i\n", array_size); print_int_array(random_array); //int current; //int close = 1; //char response[3]; /* printf("Please enter a non-negative integer value\n"); scanf("%i", ¤t); printf("The value at index %i is %i\n",current, c[current]); while (close == 1) { printf("Enter 'n' for next and 'p' for previous or 'x' to close: \n" ); scanf("%s", response); switch (response[0]) { case 'p': previous(¤t); printf("The value at index %i is %i\n",current, c[current]); continue; case 'n': next(¤t); printf("The value at index %i is %i\n",current, c[current]); continue; case 'x': close = 0; continue; default: printf("You didn't put a recognised character."); break; } }*/ return 0; }
int do_trna_search ( char seq[], int seq_length, int user_start, int user_end, TrnaSpec *t, TrnaRes ***r, int *nmatch, int *max_total_bp_score) { int aa_left, aa_right, max_aa_start, aa_left_start, min_aa_end, max_aa_end; int aa_right_end, aa_score; int tu_number, tu_right, tu_left, tu_score, tu_left_match[10], tu_match_score[10]; int tu_right_match=0, tu_match_number; int i,j,start,end,intron_length; int ac_min_start, ac_max_start, ac_left, d_left, d_right,d_score; int ac_right_start, ac_right, lac, rac, ac_score, ac_right_end; int base_pair_score [ 25 ]; int total_base_pair; int max_trna = MAX_TRNA; *nmatch = 0; fill_int_array ( base_pair_score, 25, 0 ); base_pair_score [3] = 2; base_pair_score [7] = 2; base_pair_score [11] = 2; base_pair_score [13] = 1; base_pair_score [15] = 2; base_pair_score [17] = 1; start = user_start - 1; end = user_end - 1; /* loop for all aa stem left starts */ max_aa_start = end - ( t->min_trna_length - 1 ); for ( aa_left_start = start; aa_left_start <= max_aa_start; aa_left_start++ ) { /* loop for all aa stem right ends */ min_aa_end = aa_left_start + t->min_trna_length - 1; max_aa_end = MIN ( aa_left_start + t->max_trna_length + t->max_intron_length - 1, end); for ( aa_right_end = min_aa_end; aa_right_end <= max_aa_end; aa_right_end++ ) { /* get the aa score */ for ( aa_left = aa_left_start, aa_right = aa_right_end, aa_score = 0, i=0; i<7; aa_left++, aa_right--, i++ ) { aa_score += base_pair_score [ char_lookup [ seq [ aa_left ]] + char_lookup [ seq [ aa_right ] ] * 5 ]; } if ( aa_score >= t->min_aa_score ) { /* do the tu loop */ for ( i = t->min_tu_loop_length, tu_number = 0; i <= t->max_tu_loop_length; i++ ) { tu_right = aa_right; tu_left = aa_right - 9 - i; tu_score = 0; for ( j=0; j<5; j++, tu_left++, tu_right-- ) { tu_score += base_pair_score [ char_lookup [ seq [ tu_left ]] + char_lookup [ seq [ tu_right ] ] * 5 ]; } if ( tu_score >= t->min_tu_score ) { tu_left_match [ tu_number ] = tu_left - 5; tu_match_score [ tu_number ] = tu_score; tu_right_match = aa_right; tu_number++; } } /* loop for all tu stems to find ac stem */ for ( tu_match_number = 0; tu_match_number < tu_number; tu_match_number++ ) { /* try all ac left starts */ ac_min_start = aa_left_start + t->min_aa_to_ac_length; ac_max_start = MIN ( ( tu_left_match [ tu_match_number ] - t->min_aa_to_ac_length ), ( aa_left_start + t->max_aa_to_ac_length ) ); for ( ac_left = ac_min_start; ac_left <= ac_max_start; ac_left++ ) { /* do the d stem first */ d_left = aa_left_start + 8; d_right = ac_left -1; for ( i=0, d_score = 0; i<5; i++ ) { d_left++; d_right--; d_score += base_pair_score [ char_lookup [ seq [ d_left ]] + char_lookup [ seq [ d_right ] ] * 5 ]; } if ( d_score >= t->min_d_score ) { /* try all ac right end positions */ ac_right_start = MAX ( ( ac_left + t->min_acs_to_ace_length ), ( tu_left_match [ tu_match_number ] - t->max_var_loop_length )); ac_right_end = MIN ( ( ac_left + t->min_acs_to_ace_length + t->max_intron_length), ( tu_left_match [ tu_match_number ] - 4 )); for ( ac_right = ac_right_start; ac_right <= ac_right_end; ac_right++ ) { lac = ac_left - 1; rac = ac_right + 1; for ( i=0, ac_score = 0; i<5; i++ ) { lac++; rac--; ac_score += base_pair_score [ char_lookup [ seq [ lac ]] + char_lookup [ seq [ rac ] ] * 5 ]; } if ( ac_score >= t->min_ac_score ) { /* we have got all stems !!! */ /* intron length sensisble ? */ intron_length = ac_right - ac_left - 16; if ( ( ( intron_length == 0 ) || ( intron_length >= t->min_intron_length ) ) && ( ( aa_right_end - aa_left_start + 1 - intron_length ) <= t->max_trna_length )) { /* high enough overall base pairing score ? */ total_base_pair = aa_score + ac_score + d_score + tu_match_score [ tu_match_number ]; if ( total_base_pair >= t->min_total_bp_score ) { /* fudge factors to fit fortran * r->aa_right += 1; * r->ac_left += 4; * r->ac_right -= 4; * r->tu_right -= 4; * r->tu_left += 4; */ (*r)[*nmatch]->seq = seq; (*r)[*nmatch]->seq_length = seq_length; (*r)[*nmatch]->aa_right = aa_right_end + 1; (*r)[*nmatch]->aa_left = aa_left_start; (*r)[*nmatch]->ac_left = ac_left + 4; (*r)[*nmatch]->ac_right = ac_right - 4; (*r)[*nmatch]->tu_right = tu_right_match - 4; (*r)[*nmatch]->tu_left = tu_left_match[tu_match_number] + 4; /* do conserved base search in an odd place ! */ if ( t->min_total_cb_score ) { trna_base_scores ( (*r)[*nmatch], t ); if ( (*r)[*nmatch]->total_cb_score < t->min_total_cb_score ) continue; } (*r)[*nmatch]->intron_length = intron_length; (*r)[*nmatch]->aa_score = aa_score; (*r)[*nmatch]->ac_score = ac_score; (*r)[*nmatch]->tu_score = tu_match_score[tu_match_number]; (*r)[*nmatch]->d_score = d_score; (*r)[*nmatch]->total_bp_score = total_base_pair; if ((*r)[*nmatch]->total_bp_score > *max_total_bp_score) { *max_total_bp_score = (*r)[*nmatch]->total_bp_score; } (*nmatch)++; if (*nmatch >= max_trna) { #ifdef DEBUG printf("REALLOC nmatch %d max_trna %d\n", *nmatch, max_trna); #endif if (-1 == realloc_trna(r, &max_trna)) return -1; } /* really we need to store up the results and return them. Then trna_draw is not called from here */ } } } } } } } } } } return 0; }
Dataset cnn_reduce(Dataset ds, int n_neighbors) { int i, j, k, l; int n_classes; int* class_labels = NULL; int* S = malloc(sizeof(int) * ds.n_instances); int* S_copy = malloc(sizeof(int) * ds.n_instances); int* non_S = malloc(sizeof(int) * ds.n_instances); int* last_train_S_size = calloc(ds.n_instances, sizeof(int)); int S_size = 0; int non_S_size = 0; int S_index; int* nearest = malloc(sizeof(int) * ds.n_instances * n_neighbors); int* votes = NULL; int neighbor_majority_class; int neighbor_majority_class_count; bool whole_non_S_classified_correctly = FALSE; Dataset ds_reduced; fill_int_array(nearest, ds.n_instances * n_neighbors, -1); count_classes(ds, &n_classes, &class_labels); votes = malloc(sizeof(int) * n_classes); // Add one random instance from each class to S srand(time(NULL)); for (i = 0; i < n_classes; i++) while (1) { int j = rand() % ds.n_instances; if (ds.y[j] == class_labels[i]) { S[S_size++] = j; break; } } while (!whole_non_S_classified_correctly) { whole_non_S_classified_correctly = TRUE; // copy S to auxiliary array and sort it memcpy(S_copy, S, sizeof(int) * S_size); qsort(S_copy, S_size, sizeof(int), compare_ints); // Find all instances not in S S_index = 0; non_S_size = 0; for (i = 0; i < ds.n_instances; i++) if (S_index == S_size || i < S_copy[S_index]) non_S[non_S_size++] = i; else S_index++; shuffle_ints(non_S_size, non_S); for (i = 0; i < non_S_size; i++) { // update nearest neighbors for non_S[i] for (j = last_train_S_size[non_S[i]]; j < S_size; j++) { for (k = 0; k < n_neighbors; k++) { int* nearest_for_i = nearest + non_S[i] * n_neighbors; if (nearest_for_i[k] < 0) { nearest_for_i[k] = j; break; } if (squared_dist(ds.n_features, ds.X + ds.n_features * nearest_for_i[k], ds.X + ds.n_features * non_S[i]) > squared_dist(ds.n_features, ds.X + ds.n_features * non_S[i], ds.X + ds.n_features * j)) { for (l = n_neighbors - 1; l >= k + 1; l--) nearest_for_i[l] = nearest_for_i[l - 1]; nearest_for_i[k] = j; break; } } } // count votes for non_S[i] memset(votes, 0, n_classes * sizeof(int)); for (j = 0; j < n_neighbors; j++) { int current_neighbor = nearest[non_S[i] * n_neighbors + j]; if (current_neighbor >= 0) { int current_class = -1; for (k = 0; k < n_classes; k++) if (ds.y[current_neighbor] == class_labels[k]) { current_class = k; break; } votes[current_class]++; } else break; } // find out the majority class of non_S[i] neighbor_majority_class = class_labels[0]; neighbor_majority_class_count = votes[0]; for (j = 1; j < n_classes; j++) if (votes[j] > neighbor_majority_class_count) { neighbor_majority_class_count = votes[j]; neighbor_majority_class = class_labels[j]; } // based on the majority class either add non_S[i] to S // or remember the S_size used to classify non_S[i] if (ds.y[non_S[i]] != neighbor_majority_class) { S[S_size++] = non_S[i]; whole_non_S_classified_correctly = FALSE; } else last_train_S_size[non_S[i]] = S_size; } } // form a new dataset with only selected instances ds_reduced = alloc_dataset(ds.n_features, S_size); for (i = 0; i < S_size; i++) { memcpy(ds_reduced.X + ds.n_features * i, ds.X + ds.n_features * S[i], sizeof(flpoint) * ds.n_features); ds_reduced.y[i] = ds.y[S[i]]; } free(class_labels); free(S); free(S_copy); free(non_S); free(nearest); free(last_train_S_size); free(votes); return ds_reduced; }
void find_classes_centroids_in_data(const Dataset ds, int n_classes, int* class_labels, int* indices) { int i, j; flpoint* centroids = calloc(n_classes * ds.n_features, sizeof(flpoint)); int* class_instance_count = calloc(n_classes, sizeof(int)); flpoint* min_squared_dists = NULL; int* closest_to_centroids = NULL; // add each instance to the sum of instances of the corresponding // class for (i = 0; i < ds.n_instances; i++) { int current_class = -1; for (j = 0; j < n_classes; j++) if (ds.y[i] == class_labels[j]) { current_class = j; break; } for (j = 0; j < ds.n_features; j++) centroids[current_class * ds.n_features + j] += ds.X[i * ds.n_features + j]; class_instance_count[current_class] += 1; } // divide all sums by the number of instances in the respective class for (i = 0; i < n_classes; i++) { flpoint norm = 1. / class_instance_count[i]; for (j = 0; j < ds.n_features; j++) centroids[i * ds.n_features + j] *= norm; } // find instances in the dataset closest to centroids computed above min_squared_dists = malloc(sizeof(flpoint) * n_classes); closest_to_centroids = malloc(sizeof(int) * n_classes); fill_int_array(closest_to_centroids, n_classes, -1); for (i = 0; i < n_classes; i++) min_squared_dists[i] = -1; for (i = 0; i < ds.n_instances; i++) { int current_class = -1; flpoint current_squared_dist; for (j = 0; j < n_classes; j++) if (ds.y[i] == class_labels[j]) { current_class = j; break; } current_squared_dist = squared_dist(ds.n_features, centroids + current_class * ds.n_features, ds.X + i * ds.n_features); if (min_squared_dists[current_class] < 0 || current_squared_dist < min_squared_dists[current_class]) { min_squared_dists[current_class] = current_squared_dist; closest_to_centroids[current_class] = i; } } for (i = 0; i < n_classes; i++) indices[i] = closest_to_centroids[i]; free(centroids); free(class_instance_count); free(min_squared_dists); free(closest_to_centroids); }
Dataset fcnn_reduce(Dataset ds, int n_neighbors) { int i, j, k, l; int n_classes; int* class_labels = NULL; int* S = malloc(sizeof(int) * ds.n_instances); int* delta_S = malloc(sizeof(int) * ds.n_instances); int* non_S = malloc(sizeof(int) * ds.n_instances); int S_size = 0; int delta_S_size = 0; int non_S_size = 0; int S_index; int* nearest = malloc(sizeof(int) * ds.n_instances * n_neighbors); int* rep = NULL; int* votes = NULL; int neighbor_majority_class; int neighbor_majority_class_count; Dataset ds_reduced; count_classes(ds, &n_classes, &class_labels); fill_int_array(nearest, ds.n_instances * n_neighbors, -1); delta_S_size = n_classes; find_classes_centroids_in_data(ds, n_classes, class_labels, delta_S); rep = malloc(sizeof(int) * ds.n_instances); votes = malloc(sizeof(int) * n_classes); // main loop while (delta_S_size > 0) { // merge delta_S into S for (i = 0; i < delta_S_size; i++) { S[S_size + i] = delta_S[i]; } S_size += delta_S_size; qsort(S, S_size, sizeof(int), compare_ints); fill_int_array(rep, ds.n_instances, -1); // find instances which are not in S S_index = 0; non_S_size = 0; for (i = 0; i < ds.n_instances; i++) if (S_index == S_size || i < S[S_index]) non_S[non_S_size++] = i; else S_index++; for (i = 0; i < non_S_size; i++) { // find n_neighbors nearest neighbors for X[non_S[i]] // in delta_S for (j = 0; j < delta_S_size; j++) { for (k = 0; k < n_neighbors; k++) { int* nearest_for_i = nearest + non_S[i] * n_neighbors; if (nearest_for_i[k] < 0) { nearest_for_i[k] = delta_S[j]; break; } if (squared_dist(ds.n_features, ds.X + ds.n_features * nearest_for_i[k], ds.X + ds.n_features * non_S[i]) > squared_dist(ds.n_features, ds.X + ds.n_features * non_S[i], ds.X + ds.n_features * delta_S[j])) { // move all farther neighbors to the right for (l = n_neighbors - 1; l >= k + 1; l--) nearest_for_i[l] = nearest_for_i[l - 1]; nearest_for_i[k] = delta_S[j]; break; } } } memset(votes, 0, sizeof(int) * n_classes); // collect votes for their classes from these neighbors for (j = 0; j < n_neighbors; j++) { int current_neighbor = nearest[non_S[i] * n_neighbors + j]; if (current_neighbor >= 0) { int current_class = -1; for (k = 0; k < n_classes; k++) if (class_labels[k] == ds.y[current_neighbor]) { current_class = k; break; } votes[current_class]++; } else break; } // find majority class of these neighbors neighbor_majority_class = class_labels[0]; neighbor_majority_class_count = votes[0]; for (j = 1; j < n_classes; j++) if (votes[j] > neighbor_majority_class_count) { neighbor_majority_class_count = votes[j]; neighbor_majority_class = class_labels[j]; } // if majority class is incorrect (i.e. non_S[i] would // be misclassified by kNN-classifier trained on delta_S) // update representative instance for each neighbor if (ds.y[non_S[i]] != neighbor_majority_class) { for (j = 0; j < n_neighbors; j++) { int current_neighbor = nearest[non_S[i] * n_neighbors + j]; if (current_neighbor >= 0) { if (rep[current_neighbor] < 0 || squared_dist(ds.n_features, ds.X + ds.n_features * current_neighbor, ds.X + ds.n_features * non_S[i]) < squared_dist(ds.n_features, ds.X + ds.n_features * current_neighbor, ds.X + ds.n_features * rep[current_neighbor]) ) rep[current_neighbor] = non_S[i]; } else break; } } } // refill delta_S again delta_S_size = 0; for (i = 0; i < S_size; i++) { bool instance_in_delta_S = FALSE; for (j = 0; j < delta_S_size; j++) if (rep[S[i]] == delta_S[j]) { instance_in_delta_S = TRUE; break; } if (rep[S[i]] >= 0 && !instance_in_delta_S) delta_S[delta_S_size++] = rep[S[i]]; } } // form a new dataset with only selected instances ds_reduced = alloc_dataset(ds.n_features, S_size); for (i = 0; i < S_size; i++) { memcpy(ds_reduced.X + ds.n_features * i, ds.X + ds.n_features * S[i], sizeof(flpoint) * ds.n_features); ds_reduced.y[i] = ds.y[S[i]]; } free(class_labels); free(S); free(delta_S); free(non_S); free(nearest); free(rep); free(votes); return ds_reduced; }