Пример #1
0
int main(int argc, char *argv[]) {
	//int c[] = {1,2,3,4,5,6};
	srand(time(NULL));
	int random_array[rand()%11];
	int array_size = sizeof(random_array)/sizeof(int);
	printf("%i, %i\n", sizeof(random_array), sizeof(int));
	fill_int_array(random_array);
	printf("The length of the array is %i\n", array_size);
	print_int_array(random_array);
	
	//int current;
	//int close = 1;
	//char response[3];
	/*
	printf("Please enter a non-negative integer value\n");
	scanf("%i", &current);
	printf("The value at index %i is %i\n",current, c[current]);
	while (close == 1) {
		printf("Enter 'n' for next and 'p' for previous or 'x' to close: \n" );
		scanf("%s", response);
		switch (response[0]) {
			case 'p':
				previous(&current);
				printf("The value at index %i is %i\n",current, c[current]);
				continue;
			case 'n':
				next(&current);
				printf("The value at index %i is %i\n",current, c[current]);
				continue;
			case 'x':
				close = 0;
				continue;	
			default:
				printf("You didn't put a recognised character.");
				break;
		}
	}*/
	return 0;
	
}
Пример #2
0
int do_trna_search ( char seq[], int seq_length, int user_start, int user_end,
		     TrnaSpec *t, TrnaRes ***r, int *nmatch, 
		     int *max_total_bp_score) {

    int aa_left, aa_right, max_aa_start, aa_left_start, min_aa_end, max_aa_end;
    int aa_right_end, aa_score;
    int tu_number, tu_right, tu_left, tu_score, tu_left_match[10], tu_match_score[10];
    int tu_right_match=0, tu_match_number;
    int i,j,start,end,intron_length;
    int ac_min_start, ac_max_start, ac_left, d_left, d_right,d_score;
    int ac_right_start, ac_right, lac, rac, ac_score, ac_right_end;
    int base_pair_score [ 25 ];
    int total_base_pair;
    int max_trna = MAX_TRNA;

    *nmatch = 0;

    fill_int_array ( base_pair_score, 25, 0 );
    base_pair_score [3] = 2;
    base_pair_score [7] = 2;
    base_pair_score [11] = 2;
    base_pair_score [13] = 1;
    base_pair_score [15] = 2;
    base_pair_score [17] = 1;

    start = user_start - 1;
    end   = user_end - 1;

    /* loop for all aa stem left starts */

    max_aa_start = end - ( t->min_trna_length - 1 );


    for ( aa_left_start = start; aa_left_start <= max_aa_start; aa_left_start++ ) {

	/* loop for all aa stem right ends */

	min_aa_end = aa_left_start + t->min_trna_length - 1;
	max_aa_end = MIN ( aa_left_start + t->max_trna_length + t->max_intron_length - 1, end);

	for ( aa_right_end = min_aa_end; aa_right_end <= max_aa_end; aa_right_end++ ) {

	    /* get the aa score */
	    for ( aa_left = aa_left_start, 
		 aa_right = aa_right_end, 
		 aa_score = 0,
		 i=0; i<7; 
		 aa_left++, aa_right--, i++ )  {

		aa_score += base_pair_score [ char_lookup [ seq [ aa_left ]] 
					     + char_lookup [ seq [ aa_right ] ] * 5 ];
	    }

	    if ( aa_score >= t->min_aa_score ) {

		/* do the tu loop */


		for ( i = t->min_tu_loop_length, tu_number = 0; i <= t->max_tu_loop_length; i++ ) {
		    tu_right = aa_right;
		    tu_left  = aa_right - 9 - i;
		    tu_score = 0;
		    for ( j=0; j<5; j++, tu_left++, tu_right-- ) {

			tu_score += base_pair_score [ char_lookup [ seq [ tu_left ]] 
					     + char_lookup [ seq [ tu_right ] ] * 5 ];
		    }

		    if ( tu_score >= t->min_tu_score ) {

			tu_left_match [ tu_number ] = tu_left - 5;
			tu_match_score [ tu_number ] = tu_score;
			tu_right_match = aa_right;
			tu_number++;
		    }
		}

		/* loop for all tu stems to find ac stem */

		for ( tu_match_number = 0; tu_match_number < tu_number; tu_match_number++ ) {

		    /* try all ac left starts */

		    ac_min_start = aa_left_start + t->min_aa_to_ac_length;
		    ac_max_start = MIN ( ( tu_left_match [ tu_match_number ] -
					t->min_aa_to_ac_length ), ( aa_left_start + t->max_aa_to_ac_length ) );

		    for ( ac_left = ac_min_start; ac_left <= ac_max_start; ac_left++ ) {

			/* do the d stem first */

			d_left = aa_left_start + 8;
			d_right = ac_left -1;
			for ( i=0, d_score = 0; i<5; i++ ) {
			    d_left++;
			    d_right--;

			    d_score += base_pair_score [ char_lookup [ seq [ d_left ]] 
					     + char_lookup [ seq [ d_right ] ] * 5 ];
			}

			if ( d_score >= t->min_d_score ) {

			    /* try all ac right end positions */

			    ac_right_start = MAX ( ( ac_left + t->min_acs_to_ace_length ),
						 ( tu_left_match [ tu_match_number ] -
						  t->max_var_loop_length ));
			    ac_right_end = MIN ( ( ac_left + t->min_acs_to_ace_length + t->max_intron_length),
						( tu_left_match [ tu_match_number ] - 4 ));

			    for ( ac_right = ac_right_start; ac_right <= ac_right_end; ac_right++ ) {
				lac = ac_left - 1;
				rac = ac_right + 1;
				for ( i=0, ac_score = 0; i<5; i++ ) {
				    lac++;
				    rac--;

				    ac_score += base_pair_score [ char_lookup [ seq [ lac ]] 
					     + char_lookup [ seq [ rac ] ] * 5 ];
				}

				if ( ac_score >= t->min_ac_score ) {

				    /* we have got all stems !!! */

				    /* intron length sensisble ? */

				    intron_length = ac_right - ac_left - 16;

				    if ( ( ( intron_length == 0 ) ||
					 ( intron_length >= t->min_intron_length ) ) &&
					 ( ( aa_right_end - aa_left_start + 1 - intron_length ) <=
					  t->max_trna_length )) {

					/* high enough overall base pairing score ? */
					total_base_pair = aa_score + ac_score + 
					    d_score + tu_match_score [ tu_match_number ];
					if ( total_base_pair >= t->min_total_bp_score ) {
					    /* fudge factors to fit fortran 

					     *  r->aa_right += 1;
					     *  r->ac_left  += 4;
					     *  r->ac_right -= 4;
					     *  r->tu_right -= 4;
					     *  r->tu_left  += 4;
					     */

					    (*r)[*nmatch]->seq = seq;
					    (*r)[*nmatch]->seq_length = seq_length;
					    (*r)[*nmatch]->aa_right = aa_right_end + 1;
					    (*r)[*nmatch]->aa_left = aa_left_start;
					    (*r)[*nmatch]->ac_left = ac_left + 4;
					    (*r)[*nmatch]->ac_right = ac_right - 4;
					    (*r)[*nmatch]->tu_right = tu_right_match - 4;
					    (*r)[*nmatch]->tu_left = tu_left_match[tu_match_number] + 4;

					    /* do conserved base search in an odd place ! */

					    if ( t->min_total_cb_score ) {
					      trna_base_scores ( (*r)[*nmatch], t );
					      if ( (*r)[*nmatch]->total_cb_score < t->min_total_cb_score ) continue;
					    }
					    (*r)[*nmatch]->intron_length = intron_length;
					    (*r)[*nmatch]->aa_score = aa_score;
					    (*r)[*nmatch]->ac_score = ac_score;
					    (*r)[*nmatch]->tu_score = tu_match_score[tu_match_number];
					    (*r)[*nmatch]->d_score  = d_score;
					    (*r)[*nmatch]->total_bp_score = total_base_pair;

					    if ((*r)[*nmatch]->total_bp_score >
						*max_total_bp_score) {
						*max_total_bp_score = (*r)[*nmatch]->total_bp_score;
					    }
					    (*nmatch)++;

					    if (*nmatch >= max_trna) {
#ifdef DEBUG
						printf("REALLOC nmatch %d max_trna %d\n",
						       *nmatch, max_trna);
#endif

						if (-1 == realloc_trna(r, &max_trna))
						    return -1;

					    }
					    /* really we need to store up the results 
					    and return them. Then trna_draw is not
					    called from here */
					}
				    }
				}
			    }
			}
		    }	
		}
	    }
	}
    }
    return 0;
}
Dataset cnn_reduce(Dataset ds, int n_neighbors)
{
    int i, j, k, l;
    int n_classes;
    int* class_labels = NULL;
    int* S = malloc(sizeof(int) * ds.n_instances);
    int* S_copy = malloc(sizeof(int) * ds.n_instances);
    int* non_S = malloc(sizeof(int) * ds.n_instances);
    int* last_train_S_size = calloc(ds.n_instances, sizeof(int));
    int S_size = 0;
    int non_S_size = 0;
    int S_index;
    int* nearest = malloc(sizeof(int) * ds.n_instances * n_neighbors);
    int* votes = NULL;
    int neighbor_majority_class;
    int neighbor_majority_class_count;
    bool whole_non_S_classified_correctly = FALSE;
    Dataset ds_reduced;

    fill_int_array(nearest, ds.n_instances * n_neighbors, -1);

    count_classes(ds, &n_classes, &class_labels);
    votes = malloc(sizeof(int) * n_classes);
    
    // Add one random instance from each class to S
    srand(time(NULL));
    for (i = 0; i < n_classes; i++)
        while (1)
        {
            int j = rand() % ds.n_instances;
            if (ds.y[j] == class_labels[i])
            {
                S[S_size++] = j;
                break;
            }
        }

    while (!whole_non_S_classified_correctly)
    {
        whole_non_S_classified_correctly = TRUE;
        // copy S to auxiliary array and sort it
        memcpy(S_copy, S, sizeof(int) * S_size);
        qsort(S_copy, S_size, sizeof(int), compare_ints);

        // Find all instances not in S
        S_index = 0;
        non_S_size = 0;
        for (i = 0; i < ds.n_instances; i++)
            if (S_index == S_size || i < S_copy[S_index])
                non_S[non_S_size++] = i;
            else
                S_index++;

        shuffle_ints(non_S_size, non_S);

        for (i = 0; i < non_S_size; i++)
        {
            // update nearest neighbors for non_S[i]
            for (j = last_train_S_size[non_S[i]]; j < S_size; j++)
            {
                for (k = 0; k < n_neighbors; k++)
                {
                    int* nearest_for_i = nearest + non_S[i] * n_neighbors;
                    if (nearest_for_i[k] < 0)
                    {
                        nearest_for_i[k] = j;
                        break;
                    }
                    if (squared_dist(ds.n_features,
                                ds.X + ds.n_features * nearest_for_i[k],
                                ds.X + ds.n_features * non_S[i]) >
                            squared_dist(ds.n_features,
                                ds.X + ds.n_features * non_S[i],
                                ds.X + ds.n_features * j))
                    {
                        for (l = n_neighbors - 1; l >= k + 1; l--)
                            nearest_for_i[l] = nearest_for_i[l - 1];
                        nearest_for_i[k] = j;
                        break;
                    }
                }
            }

            // count votes for non_S[i]
            memset(votes, 0, n_classes * sizeof(int));
            for (j = 0; j < n_neighbors; j++)
            {
                int current_neighbor = nearest[non_S[i] * n_neighbors + j];
                if (current_neighbor >= 0)
                {
                    int current_class = -1;
                    for (k = 0; k < n_classes; k++)
                        if (ds.y[current_neighbor] == class_labels[k])
                        {
                            current_class = k;
                            break;
                        }
                    votes[current_class]++;
                }
                else break;
            }

            // find out the majority class of non_S[i]
            neighbor_majority_class = class_labels[0];
            neighbor_majority_class_count = votes[0];
            for (j = 1; j < n_classes; j++)
                if (votes[j] > neighbor_majority_class_count)
                {
                    neighbor_majority_class_count = votes[j];
                    neighbor_majority_class = class_labels[j];
                }

            // based on the majority class either add non_S[i] to S
            // or remember the S_size used to classify non_S[i]
            if (ds.y[non_S[i]] != neighbor_majority_class)
            {
                S[S_size++] = non_S[i];
                whole_non_S_classified_correctly = FALSE;
            }
            else
                last_train_S_size[non_S[i]] = S_size;
        }
    }

    // form a new dataset with only selected instances
    ds_reduced = alloc_dataset(ds.n_features, S_size);
    for (i = 0; i < S_size; i++)
    {
        memcpy(ds_reduced.X + ds.n_features * i,
                ds.X + ds.n_features * S[i], sizeof(flpoint) * ds.n_features);
        ds_reduced.y[i] = ds.y[S[i]];
    }

    free(class_labels);
    free(S);
    free(S_copy);
    free(non_S);
    free(nearest);
    free(last_train_S_size);
    free(votes);

    return ds_reduced;
}
void find_classes_centroids_in_data(const Dataset ds, int n_classes,
        int* class_labels, int* indices)
{
    int i, j;
    flpoint* centroids = calloc(n_classes * ds.n_features, sizeof(flpoint));
    int* class_instance_count = calloc(n_classes, sizeof(int));
    flpoint* min_squared_dists = NULL;
    int* closest_to_centroids = NULL;

    // add each instance to the sum of instances of the corresponding
    // class
    for (i = 0; i < ds.n_instances; i++)
    {
        int current_class = -1;
        for (j = 0; j < n_classes; j++)
            if (ds.y[i] == class_labels[j])
            {
                current_class = j;
                break;
            }

        for (j = 0; j < ds.n_features; j++)
            centroids[current_class * ds.n_features + j] +=
                ds.X[i * ds.n_features + j];
        class_instance_count[current_class] += 1;
    }

    // divide all sums by the number of instances in the respective class
    for (i = 0; i < n_classes; i++)
    {
        flpoint norm = 1. / class_instance_count[i];
        for (j = 0; j < ds.n_features; j++)
            centroids[i * ds.n_features + j] *= norm;
    }

    // find instances in the dataset closest to centroids computed above
    min_squared_dists = malloc(sizeof(flpoint) * n_classes);
    closest_to_centroids = malloc(sizeof(int) * n_classes);
    fill_int_array(closest_to_centroids, n_classes, -1);
    for (i = 0; i < n_classes; i++)
        min_squared_dists[i] = -1;

    for (i = 0; i < ds.n_instances; i++)
    {
        int current_class = -1;
        flpoint current_squared_dist;

        for (j = 0; j < n_classes; j++)
            if (ds.y[i] == class_labels[j])
            {
                current_class = j;
                break;
            }

        current_squared_dist = squared_dist(ds.n_features,
                centroids + current_class * ds.n_features,
                ds.X + i * ds.n_features);
        if (min_squared_dists[current_class] < 0 ||
                current_squared_dist < min_squared_dists[current_class])
        {
            min_squared_dists[current_class] = current_squared_dist;
            closest_to_centroids[current_class] = i;
        }
    }

    for (i = 0; i < n_classes; i++)
        indices[i] = closest_to_centroids[i];

    free(centroids);
    free(class_instance_count);
    free(min_squared_dists);
    free(closest_to_centroids);
}
Dataset fcnn_reduce(Dataset ds, int n_neighbors)
{
    int i, j, k, l;
    int n_classes;
    int* class_labels = NULL;
    int* S = malloc(sizeof(int) * ds.n_instances);
    int* delta_S = malloc(sizeof(int) * ds.n_instances);
    int* non_S = malloc(sizeof(int) * ds.n_instances);
    int S_size = 0;
    int delta_S_size = 0;
    int non_S_size = 0;
    int S_index;
    int* nearest = malloc(sizeof(int) * ds.n_instances * n_neighbors);
    int* rep = NULL;
    int* votes = NULL;
    int neighbor_majority_class;
    int neighbor_majority_class_count;
    Dataset ds_reduced;

    count_classes(ds, &n_classes, &class_labels);

    fill_int_array(nearest, ds.n_instances * n_neighbors, -1);

    delta_S_size = n_classes;
    find_classes_centroids_in_data(ds, n_classes, class_labels, delta_S);

    rep = malloc(sizeof(int) * ds.n_instances);
    votes = malloc(sizeof(int) * n_classes);
    // main loop
    while (delta_S_size > 0)
    {
        // merge delta_S into S
        for (i = 0; i < delta_S_size; i++)
        {
            S[S_size + i] = delta_S[i];
        }
        S_size += delta_S_size;
        qsort(S, S_size, sizeof(int), compare_ints);

        fill_int_array(rep, ds.n_instances, -1);

        // find instances which are not in S
        S_index = 0;
        non_S_size = 0;
        for (i = 0; i < ds.n_instances; i++)
            if (S_index == S_size || i < S[S_index])
                non_S[non_S_size++] = i;
            else
                S_index++;

        for (i = 0; i < non_S_size; i++)
        {
            // find n_neighbors nearest neighbors for X[non_S[i]]
            // in delta_S
            for (j = 0; j < delta_S_size; j++)
            {
                for (k = 0; k < n_neighbors; k++)
                {
                    int* nearest_for_i = nearest + non_S[i] * n_neighbors;
                    if (nearest_for_i[k] < 0)
                    {
                        nearest_for_i[k] =
                            delta_S[j];
                        break;
                    }
                    if (squared_dist(ds.n_features,
                                ds.X + ds.n_features *
                                nearest_for_i[k],
                                ds.X + ds.n_features * non_S[i]) >
                            squared_dist(ds.n_features,
                                ds.X + ds.n_features * non_S[i],
                                ds.X + ds.n_features * delta_S[j]))
                    {
                        // move all farther neighbors to the right
                        for (l = n_neighbors - 1; l >= k + 1; l--)
                            nearest_for_i[l] = nearest_for_i[l - 1];
                        nearest_for_i[k] = delta_S[j];
                        break;
                    }
                }
            }

            memset(votes, 0, sizeof(int) * n_classes);
            // collect votes for their classes from these neighbors
            for (j = 0; j < n_neighbors; j++)
            {
                int current_neighbor = nearest[non_S[i] * n_neighbors + j];
                if (current_neighbor >= 0)
                {
                    int current_class = -1;
                    for (k = 0; k < n_classes; k++)
                        if (class_labels[k] == ds.y[current_neighbor])
                        {
                            current_class = k;
                            break;
                        }
                    votes[current_class]++;
                }
                else
                    break;
            }

            // find majority class of these neighbors
            neighbor_majority_class = class_labels[0];
            neighbor_majority_class_count = votes[0];
            for (j = 1; j < n_classes; j++)
                if (votes[j] > neighbor_majority_class_count)
                {
                    neighbor_majority_class_count = votes[j];
                    neighbor_majority_class = class_labels[j];
                }

            // if majority class is incorrect (i.e. non_S[i] would
            // be misclassified by kNN-classifier trained on delta_S)
            // update representative instance for each neighbor
            if (ds.y[non_S[i]] != neighbor_majority_class)
            {
                for (j = 0; j < n_neighbors; j++)
                {
                    int current_neighbor =
                        nearest[non_S[i] * n_neighbors + j];
                    if (current_neighbor >= 0)
                    {
                        if (rep[current_neighbor] < 0 ||
                             squared_dist(ds.n_features,
                                 ds.X + ds.n_features * current_neighbor,
                                 ds.X + ds.n_features * non_S[i]) <
                             squared_dist(ds.n_features,
                                 ds.X + ds.n_features * current_neighbor,
                                 ds.X + ds.n_features * rep[current_neighbor])
                            )
                            rep[current_neighbor] = non_S[i];
                    }
                    else break;
                }
            }
        }

        // refill delta_S again
        delta_S_size = 0;
        for (i = 0; i < S_size; i++)
        {
            bool instance_in_delta_S = FALSE;
            for (j = 0; j < delta_S_size; j++)
                if (rep[S[i]] == delta_S[j])
                {
                    instance_in_delta_S = TRUE;
                    break;
                }
            if (rep[S[i]] >= 0 && !instance_in_delta_S)
                delta_S[delta_S_size++] = rep[S[i]];
        }
    }

    // form a new dataset with only selected instances
    ds_reduced = alloc_dataset(ds.n_features, S_size);
    for (i = 0; i < S_size; i++)
    {
        memcpy(ds_reduced.X + ds.n_features * i,
                ds.X + ds.n_features * S[i], sizeof(flpoint) * ds.n_features);
        ds_reduced.y[i] = ds.y[S[i]];
    }

    free(class_labels);
    free(S);
    free(delta_S);
    free(non_S);
    free(nearest);
    free(rep);
    free(votes);

    return ds_reduced;
}