Ejemplo n.º 1
0
int main (int argc, char *argv[]) {
	if (argc != 2) {
		printf( "Usage:\n  feature_vs_label [TRAIN FILE]\n\n");
		return 1;
	}

	//------------------------------------------------------------------

	csr *labels = csr_malloc(TRAIN_NROWS, TRAIN_NLABELDATA);
	csr *features = csr_malloc(TRAIN_NROWS, TRAIN_NFEATUREDATA);

	FILE *train;
	train = fopen(argv[1], "r");
	read_train(train, labels, features);
	fclose(train);
	
	int i, j;
	
	for (i = 0; i < features->nrows; ++i) {
		for (j = features->ptr[i]; j < features->ptr[i+1]; ++j) {
			printf("%d ", features->idx[j]);
		}
		printf("\n");
	}
	
	csr_free(labels);
	csr_free(features);
	return 0;
}
Ejemplo n.º 2
0
int main(void) {
    struct train tr = {};
    read_train(&tr);
    printf("\n===========================================================================\n");
    print_train(tr);
    printf("\n===========================================================================\n");
    printf("Введите название станции, для которой\nнеобходимо найти ближайший поезд: ");
    char required_station[20];
    char c[300] = "";
    gets(c);
    strncat(required_station, c, 20);
    //scanf("%s", &required_station);
    next_station(&tr, required_station);

    //Освобождение памяти
    free(tr.stations_arr);

	return EXIT_SUCCESS;
}
void reverse_step (int start_grid[][24], int stop_grid[][24], int vote_case[], double probability_case[]) {
	int i, j, n, idx;
	//int count_ambiguity[4] = {0, 0, 0, 0};
	FLIP_STACK fs = {.size = 0};
	for (i = 2; i < 22; ++i) {
		for (j = 2; j < 22; ++j) {	
			idx = get_pattern_idx(stop_grid, i, j);
			start_grid[i][j] = vote_case[idx];
			
			//if ((0.15 < probability_case[idx]) && (probability_case[idx] < 0.85)) ++count_ambiguity[0];
			//if ((0.3 < probability_case[idx]) && (probability_case[idx] < 0.7)) ++count_ambiguity[1];
			//if ((0.4 < probability_case[idx]) && (probability_case[idx] < 0.6)) ++count_ambiguity[2];
			//if ((0.45 < probability_case[idx]) && (probability_case[idx] < 0.55)) ++count_ambiguity[3];
			
			// put all the points with median probability into a stack
			if (fabs(0.5-probability_case[idx]) < 0.125) push_flip(&fs, i, j, fabs(0.5-probability_case[idx]));
		}
	}
	
	//printf("%d,%d,%d,%d,%d,\n", count_1s_grid(stop_grid),
	//       count_ambiguity[0], count_ambiguity[1], count_ambiguity[2], count_ambiguity[3]);
	//printf("%d %d %f\n", imax, jmax, probability_max);
	
	quicksort_flips(fs.items, fs.size); // sort the stack
	
	FLIP_POINT ijp;
	double score_start, score_flip;
	for (n = fs.size; n > 0; --n) {
		score_start = difference_forward(start_grid, stop_grid);
		ijp = pop_flip(&fs);
		flip_point(ijp.i, ijp.j, start_grid);
		score_flip = difference_forward(start_grid, stop_grid);
		if (score_start <= score_flip) flip_point(ijp.i, ijp.j, start_grid);
	}
}

////////////////////////////////////////////////////////////////////////

int count_case_0[5][2097152], count_case_1[5][2097152]; // 2097152 = pow(2, 21)
int vote_case[5][2097152];
double probability_case[5][2097152];

int main () {
	
	// utility parameters
	int n, i;
	
	clean_array((int*)count_case_0, 2097152*5);
	clean_array((int*)count_case_1, 2097152*5);
	
	// read the train.csv data
	FILE *train;
	train = fopen ("train.csv", "r");
	while (fgetc(train) != '\n') ;  // skip the head line

	int id, delta, initial_grid[24][24], start_grid[24][24], stop_grid[24][24];
	clean_array((int*)initial_grid, 24*24);
	clean_array((int*)start_grid, 24*24);
	clean_array((int*)stop_grid, 24*24);
	
	// read all the train set and make statistical table for them
	for (n = 0; n < 50000; ++n) {
		read_train(train, &id, &delta, (int*)start_grid);
		for (i = 0; i < delta; ++i) {
			conway_step(start_grid, stop_grid);
			vote_step(start_grid, stop_grid, count_case_0[i], count_case_1[i]);
			copy_grid(start_grid, stop_grid);
		}
	}
	
	// vote for statistical table
	for (i = 0; i < 5; ++i) {
		for (n = 0; n < 2097152; ++n) {
			if (count_case_0[i][n] < count_case_1[i][n]) vote_case[i][n] = 1;
			else vote_case[i][n] = 0;
			
			if (count_case_0[i][n]+count_case_1[i][n] == 0) probability_case[i][n] = 0;
			// for the patterns happen really less frequently, we assume the original center is definitely 0.
			else probability_case[i][n] = (double)count_case_1[i][n] / (double)(count_case_0[i][n]+count_case_1[i][n]);
		}
	}
	
	//------------------------------------------------------------------
	
	// check prediction accuracy
	///*
	train = fopen ("train.csv", "r");
	while (fgetc(train) != '\n') ;  // skip the head line
	
	FILE *pred;
	pred = fopen ("pred.csv", "w");
	fprintf(pred, "id,");
	for (n = 1; n < 401; ++n) fprintf(pred, "start.%d,", n);
	for (n = 1; n < 400; ++n) fprintf(pred, "diff.%d,", n);
	fprintf(pred, "diff.400\n");
	
	double difference_all = 0;
	for (n = 0; n < 50000; ++n) {
		read_train (train, &id, &delta, (int*)initial_grid);
		copy_grid(start_grid, initial_grid);

		for (i = 0; i < delta; ++i) {
		//for (i = 0; i < 1; ++i) {
			conway_step(start_grid, stop_grid);
			copy_grid(start_grid, stop_grid);
		}
		
		for (i = delta-1; i >= 0; --i) {
		//for (i = 0; i >= 0; --i) {
			reverse_step (start_grid, stop_grid, vote_case[i], probability_case[i]);
			copy_grid(stop_grid, start_grid);
		}
		write_pred(pred, id, initial_grid, start_grid);
		difference_all += difference_grids(initial_grid, start_grid);
	}
	printf("training set score: %f\n", difference_all/50000);
	fclose(pred);
	//*/

	fclose(train);
	
	//------------------------------------------------------------------
	
	// make submission file
	/*
	FILE *test;
	test = fopen ("test.csv", "r");
	while (fgetc(train) != '\n') ;  // skip the head line
	
	FILE *submission;
	submission = fopen ("submission.csv", "w");
	fprintf(submission, "id,");
	for (n = 1; n < 400; ++n) fprintf(submission, "start.%d,", n);
	fprintf(submission, "start.400\n");
	
	for (n = 0; n < 50000; ++n) {
		read_test(test, &id, &delta, stop_grid);
		
		for (i = delta-1; i >= 0; --i) {
			reverse_step (start_grid, stop_grid, vote_case[i], probability_case[i]);
			copy_grid(stop_grid, start_grid);
		}
		write_submission(submission, id, start_grid);
	}
	
	fclose(test);
	fclose(submission);
	*/
	
	return 0;
	
	// goes the reverse check (see everything after vote_case in reverse_step).
	// however, the imporvement seems really tiny.
	// for the cutoff of points which need to be checked reversely,
	// I use all the predicted data (50000+50000) and got the following table.
	// threshold | score
	//     0     | 0.128226
	//     0.05  | 0.127253
	//     0.1   | 0.126986
	//     0.12  | 0.126851
	//     0.125 | 0.126803  <--
	//     0.127 | 0.126826
	//     0.13  | 0.126824
	//     0.135 | 0.126889
	//     0.14  | 0.126914
	//     0.15  | 0.127000
	
	// so I use 0.127 (should use 0.125) to predict the test set.
	// predicted set score: 0.12957 (-0.0003)
	// the improvement is even smaller than I thought, I think it should be around 0.0014.
	// :-(
	
	// probably the imporvement of the training set, is because we use the training set
	// itself to calculate the scores, which gives bias.
	
	//---------------------
	// then use 0.125, and set the probably to be 0.5 if the pattern never appear.
	// predicted set score: 0.13034 (+0.0004)  -- even go worse~~
	
	//---------------------
	// if ruling out the unfrequent patterns, it doesn't seem help.
	//     0.125 | 0.127049 (rule out < 2)
	//     0.125 | 0.127095 (rule out < 4)
}
int main(int argc, char **argv[])
{
	string name;
	vector<Mat>Images(100), TestImages(50);
	vector<Mat> Descriptor(100), TestDescriptor(50), TestPcafeature(50);
	vector<vector<KeyPoint>>Keypoints(100), TestKeypoint(50);
	Mat histogram = Mat::zeros(100, Cluster, CV_32F);
	Mat Testhistogram = Mat::zeros(50, Cluster, CV_32F);
	Mat Keyword = Mat::zeros(Cluster, 20, CV_32F);
	Mat full_Descriptor, Pcafeature, Pcaduplicate, clusteridx, trainlabels(100, 1, CV_32F);
	vector<vector<DMatch>> matches(50);
	Mat predicted(Testhistogram.rows, 1, CV_32F);

	// Read Training Images.
	read_train(Images, name);

	//Calculate SIFT features for the Training Images.
	calculate_SIFT(Images,Keypoints,Descriptor);
	merge_descriptor(full_Descriptor,Descriptor);

	//Compute PCA for all the features across all Images.
	PCA pca;
	perform_PCA(full_Descriptor, Pcafeature, pca);
	
	//Perform K-Means on all the PCA reduced features.
	Pcafeature.convertTo(Pcaduplicate, CV_32F);
	calculate_Kmeans(Pcaduplicate, clusteridx);

	//Calculate the Keywords in the Feature Space.
	make_dictionary(clusteridx, Pcaduplicate, Keyword);

	//Get the Histogram for each Training Image.
	hist(Descriptor, clusteridx, histogram);

	//Read Test Image
	read_test(TestImages, name);

	//Calculate the SIFT feature for all the test Images.
	calculate_SIFT(TestImages, TestKeypoint, TestDescriptor);

	//Project the SIFT feature of each feature on the lower dimensional PCA plane calculated above. 
	pca_testProject(TestDescriptor, TestPcafeature, pca);

	//Find the Label by searching for keywords closest to current feature.
	get_matches(TestPcafeature,Keyword,matches);

	//Calculate Histogram for each test Image.
	hist_test(TestDescriptor, matches, Testhistogram);
	
	//Perform classification through Knn Classifier. 
	train_labels(trainlabels);
	KNearest knn;
	train_classifier(histogram, trainlabels, knn);
	test_classify(Testhistogram,predicted,knn);

	//Calculate Accuracy for each class.
	calculate_accuracy(predicted);
	
	getchar();
	return 0;
}
void reverse_step (int start_grid[][24], int stop_grid[][24], int vote_case[]) {
	int i, j, n, idx;
	FLIP_STACK fs = {.size = 0};
	for (i = 2; i < 22; ++i) {
		for (j = 2; j < 22; ++j) {	
			idx = get_pattern_idx(stop_grid, i, j);
			start_grid[i][j] = vote_case[idx]%2;
			if (vote_case[idx] >= 2) push_flip(&fs, i, j);		
		}
	}
	
	FLIP_POINT ijp;
	double score_start, score_flip;
	for (n = fs.size; n > 0; --n) {
		score_start = difference_forward(start_grid, stop_grid);
		ijp = pop_flip(&fs);
		flip_point(ijp.i, ijp.j, start_grid);
		score_flip = difference_forward(start_grid, stop_grid);
		if (score_start <= score_flip) flip_point(ijp.i, ijp.j, start_grid);
	}
}

////////////////////////////////////////////////////////////////////////

unsigned int count_case_0[5][2097152], count_case_1[5][2097152]; // 2097152 = pow(2, 21)
int vote_case[5][2097152];
//double probability_case[5][2097152];

int main () {
	
	// utility parameters
	int n, i;
	
	clean_array((int*)count_case_0, 2097152*5);
	clean_array((int*)count_case_1, 2097152*5);
	
	// read the train.csv data
	FILE *train;
	train = fopen ("train.csv", "r");
	while (fgetc(train) != '\n') ;  // skip the head line

	int id, delta, initial_grid[24][24], start_grid[24][24], stop_grid[24][24];
	clean_array((int*)initial_grid, 24*24);
	clean_array((int*)start_grid, 24*24);
	clean_array((int*)stop_grid, 24*24);
	
	// read all the train set and make statistical table for them
	for (n = 0; n < 50000; ++n) {
		read_train(train, &id, &delta, (int*)start_grid);
		for (i = 0; i < delta; ++i) {
			conway_step(start_grid, stop_grid);
			vote_step(start_grid, stop_grid, count_case_0[i], count_case_1[i]);
			copy_grid(start_grid, stop_grid);
		}
	}
	
	// vote for statistical table
	for (i = 0; i < 5; ++i) {
		for (n = 0; n < 2097152; ++n) {
			vote_case[i][n] = vote_pattern(n, count_case_0[i][n], count_case_1[i][n], i);
			
			//if (count_case_0[i][n]+count_case_1[i][n] == 0) probability_case[i][n] = 0;
			//else probability_case[i][n] = (double)count_case_1[i][n] / (double)(count_case_0[i][n]+count_case_1[i][n]);
		}
	}
	
	//------------------------------------------------------------------
	
	// check prediction accuracy
	/*
	train = fopen ("train.csv", "r");
	while (fgetc(train) != '\n') ;  // skip the head line
	
	FILE *pred;
	pred = fopen ("pred.csv", "w");
	fprintf(pred, "id,");
	for (n = 1; n < 401; ++n) fprintf(pred, "start.%d,", n);
	for (n = 1; n < 400; ++n) fprintf(pred, "diff.%d,", n);
	fprintf(pred, "diff.400\n");
	
	double difference_all = 0;
	for (n = 0; n < 50000; ++n) {
		read_train (train, &id, &delta, (int*)initial_grid);
		copy_grid(start_grid, initial_grid);

		for (i = 0; i < delta; ++i) {
			conway_step(start_grid, stop_grid);
			copy_grid(start_grid, stop_grid);
		}
		
		for (i = delta-1; i >= 0; --i) {
			reverse_step (start_grid, stop_grid, vote_case[i]);
			copy_grid(stop_grid, start_grid);
		}
		write_pred(pred, id, initial_grid, start_grid);
		difference_all += difference_grids(initial_grid, start_grid);
	}
	printf("training set score: %f\n", difference_all/50000);
	fclose(pred);
	*/

	fclose(train);
	
	//------------------------------------------------------------------
	
	// make submission file
	///*
	FILE *test;
	test = fopen ("test.csv", "r");
	while (fgetc(train) != '\n') ;  // skip the head line
	
	FILE *submission;
	submission = fopen ("submission.csv", "w");
	fprintf(submission, "id,");
	for (n = 1; n < 400; ++n) fprintf(submission, "start.%d,", n);
	fprintf(submission, "start.400\n");
	
	for (n = 0; n < 50000; ++n) {
		read_test(test, &id, &delta, stop_grid);
		
		for (i = delta-1; i >= 0; --i) {
			reverse_step (start_grid, stop_grid, vote_case[i]);
			copy_grid(stop_grid, start_grid);
		}
		write_submission(submission, id, start_grid);
	}
	
	fclose(test);
	fclose(submission);
	//*/
	
	return 0;
	// training set score: 0.124948 (-0.00054)
	// real score: 0.12685 (-0.00002)
	// actual improvement is really tiny compare to the training set one.
}
int main () {
	
	// utility parameters
	int n, i;
	
	clean_array((int*)count_case_0, 2097152*5);
	clean_array((int*)count_case_1, 2097152*5);
	
	// read the train.csv data
	FILE *train;
	train = fopen ("train.csv", "r");
	while (fgetc(train) != '\n') ;  // skip the head line

	int id, delta, initial_grid[24][24], start_grid[24][24], stop_grid[24][24];
	clean_array((int*)initial_grid, 24*24);
	clean_array((int*)start_grid, 24*24);
	clean_array((int*)stop_grid, 24*24);
	
	// read all the train set and make statistical table for them
	for (n = 0; n < 50000; ++n) {
		read_train(train, &id, &delta, (int*)start_grid);
		for (i = 0; i < delta; ++i) {
			conway_step(start_grid, stop_grid);
			vote_step(start_grid, stop_grid, count_case_0[i], count_case_1[i]);
			copy_grid(start_grid, stop_grid);
		}
	}
	
	// vote for statistical table
	for (i = 0; i < 5; ++i) {
		for (n = 0; n < 2097152; ++n) 
			vote_case[i][n] = vote_pattern(n, count_case_0[i][n], count_case_1[i][n], i);
	}
	
	//------------------------------------------------------------------
	
	// check prediction accuracy
	///*
	train = fopen ("train.csv", "r");
	while (fgetc(train) != '\n') ;  // skip the head line
	
	FILE *pred;
	pred = fopen ("pred.csv", "w");
	fprintf(pred, "id,");
	for (n = 1; n < 401; ++n) fprintf(pred, "start.%d,", n);
	for (n = 1; n < 400; ++n) fprintf(pred, "diff.%d,", n);
	fprintf(pred, "diff.400\n");
	
	double difference_all = 0;
	for (n = 0; n < 50000; ++n) {
		read_train (train, &id, &delta, (int*)initial_grid);
		copy_grid(start_grid, initial_grid);

		for (i = 0; i < delta; ++i) {
			conway_step(start_grid, stop_grid);
			copy_grid(start_grid, stop_grid);
		}
		
		for (i = delta-1; i >= 0; --i) {
			reverse_step (start_grid, stop_grid, vote_case[i]);
			copy_grid(stop_grid, start_grid);
		}
		write_pred(pred, id, initial_grid, start_grid);
		difference_all += difference_grid(initial_grid, start_grid);
	}
	printf("training set score: %f\n", difference_all/50000);
	fclose(pred);
	//*/

	fclose(train);
	
	//------------------------------------------------------------------
	
	// make submission file
	///*
	FILE *test;
	test = fopen ("test.csv", "r");
	while (fgetc(train) != '\n') ;  // skip the head line
	
	FILE *submission;
	submission = fopen ("submission.csv", "w");
	fprintf(submission, "id,");
	for (n = 1; n < 400; ++n) fprintf(submission, "start.%d,", n);
	fprintf(submission, "start.400\n");
	
	for (n = 0; n < 50000; ++n) {
		read_test(test, &id, &delta, stop_grid);
		
		for (i = delta-1; i >= 0; --i) {
			reverse_step (start_grid, stop_grid, vote_case[i]);
			copy_grid(stop_grid, start_grid);
		}
		write_submission(submission, id, start_grid);
	}
	
	fclose(test);
	fclose(submission);
	//*/
	
	return 0;
	// training set score: 0.12549
	// real score: 0.12687 	

	// I also tried set up vote_case table cutoff based on different delta number.
	// it may helps (the entire table parameter need a long time to be fixed),
	// but a naive setup of the final step (5->6) to 0.5 makes everything goes worse.
}