int main (int argc, char *argv[]) { if (argc != 2) { printf( "Usage:\n feature_vs_label [TRAIN FILE]\n\n"); return 1; } //------------------------------------------------------------------ csr *labels = csr_malloc(TRAIN_NROWS, TRAIN_NLABELDATA); csr *features = csr_malloc(TRAIN_NROWS, TRAIN_NFEATUREDATA); FILE *train; train = fopen(argv[1], "r"); read_train(train, labels, features); fclose(train); int i, j; for (i = 0; i < features->nrows; ++i) { for (j = features->ptr[i]; j < features->ptr[i+1]; ++j) { printf("%d ", features->idx[j]); } printf("\n"); } csr_free(labels); csr_free(features); return 0; }
int main(void) { struct train tr = {}; read_train(&tr); printf("\n===========================================================================\n"); print_train(tr); printf("\n===========================================================================\n"); printf("Введите название станции, для которой\nнеобходимо найти ближайший поезд: "); char required_station[20]; char c[300] = ""; gets(c); strncat(required_station, c, 20); //scanf("%s", &required_station); next_station(&tr, required_station); //Освобождение памяти free(tr.stations_arr); return EXIT_SUCCESS; }
void reverse_step (int start_grid[][24], int stop_grid[][24], int vote_case[], double probability_case[]) { int i, j, n, idx; //int count_ambiguity[4] = {0, 0, 0, 0}; FLIP_STACK fs = {.size = 0}; for (i = 2; i < 22; ++i) { for (j = 2; j < 22; ++j) { idx = get_pattern_idx(stop_grid, i, j); start_grid[i][j] = vote_case[idx]; //if ((0.15 < probability_case[idx]) && (probability_case[idx] < 0.85)) ++count_ambiguity[0]; //if ((0.3 < probability_case[idx]) && (probability_case[idx] < 0.7)) ++count_ambiguity[1]; //if ((0.4 < probability_case[idx]) && (probability_case[idx] < 0.6)) ++count_ambiguity[2]; //if ((0.45 < probability_case[idx]) && (probability_case[idx] < 0.55)) ++count_ambiguity[3]; // put all the points with median probability into a stack if (fabs(0.5-probability_case[idx]) < 0.125) push_flip(&fs, i, j, fabs(0.5-probability_case[idx])); } } //printf("%d,%d,%d,%d,%d,\n", count_1s_grid(stop_grid), // count_ambiguity[0], count_ambiguity[1], count_ambiguity[2], count_ambiguity[3]); //printf("%d %d %f\n", imax, jmax, probability_max); quicksort_flips(fs.items, fs.size); // sort the stack FLIP_POINT ijp; double score_start, score_flip; for (n = fs.size; n > 0; --n) { score_start = difference_forward(start_grid, stop_grid); ijp = pop_flip(&fs); flip_point(ijp.i, ijp.j, start_grid); score_flip = difference_forward(start_grid, stop_grid); if (score_start <= score_flip) flip_point(ijp.i, ijp.j, start_grid); } } //////////////////////////////////////////////////////////////////////// int count_case_0[5][2097152], count_case_1[5][2097152]; // 2097152 = pow(2, 21) int vote_case[5][2097152]; double probability_case[5][2097152]; int main () { // utility parameters int n, i; clean_array((int*)count_case_0, 2097152*5); clean_array((int*)count_case_1, 2097152*5); // read the train.csv data FILE *train; train = fopen ("train.csv", "r"); while (fgetc(train) != '\n') ; // skip the head line int id, delta, initial_grid[24][24], start_grid[24][24], stop_grid[24][24]; clean_array((int*)initial_grid, 24*24); clean_array((int*)start_grid, 24*24); clean_array((int*)stop_grid, 24*24); // read all the train set and make statistical table for them for (n = 0; n < 50000; ++n) { read_train(train, &id, &delta, (int*)start_grid); for (i = 0; i < delta; ++i) { conway_step(start_grid, stop_grid); vote_step(start_grid, stop_grid, count_case_0[i], count_case_1[i]); copy_grid(start_grid, stop_grid); } } // vote for statistical table for (i = 0; i < 5; ++i) { for (n = 0; n < 2097152; ++n) { if (count_case_0[i][n] < count_case_1[i][n]) vote_case[i][n] = 1; else vote_case[i][n] = 0; if (count_case_0[i][n]+count_case_1[i][n] == 0) probability_case[i][n] = 0; // for the patterns happen really less frequently, we assume the original center is definitely 0. else probability_case[i][n] = (double)count_case_1[i][n] / (double)(count_case_0[i][n]+count_case_1[i][n]); } } //------------------------------------------------------------------ // check prediction accuracy ///* train = fopen ("train.csv", "r"); while (fgetc(train) != '\n') ; // skip the head line FILE *pred; pred = fopen ("pred.csv", "w"); fprintf(pred, "id,"); for (n = 1; n < 401; ++n) fprintf(pred, "start.%d,", n); for (n = 1; n < 400; ++n) fprintf(pred, "diff.%d,", n); fprintf(pred, "diff.400\n"); double difference_all = 0; for (n = 0; n < 50000; ++n) { read_train (train, &id, &delta, (int*)initial_grid); copy_grid(start_grid, initial_grid); for (i = 0; i < delta; ++i) { //for (i = 0; i < 1; ++i) { conway_step(start_grid, stop_grid); copy_grid(start_grid, stop_grid); } for (i = delta-1; i >= 0; --i) { //for (i = 0; i >= 0; --i) { reverse_step (start_grid, stop_grid, vote_case[i], probability_case[i]); copy_grid(stop_grid, start_grid); } write_pred(pred, id, initial_grid, start_grid); difference_all += difference_grids(initial_grid, start_grid); } printf("training set score: %f\n", difference_all/50000); fclose(pred); //*/ fclose(train); //------------------------------------------------------------------ // make submission file /* FILE *test; test = fopen ("test.csv", "r"); while (fgetc(train) != '\n') ; // skip the head line FILE *submission; submission = fopen ("submission.csv", "w"); fprintf(submission, "id,"); for (n = 1; n < 400; ++n) fprintf(submission, "start.%d,", n); fprintf(submission, "start.400\n"); for (n = 0; n < 50000; ++n) { read_test(test, &id, &delta, stop_grid); for (i = delta-1; i >= 0; --i) { reverse_step (start_grid, stop_grid, vote_case[i], probability_case[i]); copy_grid(stop_grid, start_grid); } write_submission(submission, id, start_grid); } fclose(test); fclose(submission); */ return 0; // goes the reverse check (see everything after vote_case in reverse_step). // however, the imporvement seems really tiny. // for the cutoff of points which need to be checked reversely, // I use all the predicted data (50000+50000) and got the following table. // threshold | score // 0 | 0.128226 // 0.05 | 0.127253 // 0.1 | 0.126986 // 0.12 | 0.126851 // 0.125 | 0.126803 <-- // 0.127 | 0.126826 // 0.13 | 0.126824 // 0.135 | 0.126889 // 0.14 | 0.126914 // 0.15 | 0.127000 // so I use 0.127 (should use 0.125) to predict the test set. // predicted set score: 0.12957 (-0.0003) // the improvement is even smaller than I thought, I think it should be around 0.0014. // :-( // probably the imporvement of the training set, is because we use the training set // itself to calculate the scores, which gives bias. //--------------------- // then use 0.125, and set the probably to be 0.5 if the pattern never appear. // predicted set score: 0.13034 (+0.0004) -- even go worse~~ //--------------------- // if ruling out the unfrequent patterns, it doesn't seem help. // 0.125 | 0.127049 (rule out < 2) // 0.125 | 0.127095 (rule out < 4) }
int main(int argc, char **argv[]) { string name; vector<Mat>Images(100), TestImages(50); vector<Mat> Descriptor(100), TestDescriptor(50), TestPcafeature(50); vector<vector<KeyPoint>>Keypoints(100), TestKeypoint(50); Mat histogram = Mat::zeros(100, Cluster, CV_32F); Mat Testhistogram = Mat::zeros(50, Cluster, CV_32F); Mat Keyword = Mat::zeros(Cluster, 20, CV_32F); Mat full_Descriptor, Pcafeature, Pcaduplicate, clusteridx, trainlabels(100, 1, CV_32F); vector<vector<DMatch>> matches(50); Mat predicted(Testhistogram.rows, 1, CV_32F); // Read Training Images. read_train(Images, name); //Calculate SIFT features for the Training Images. calculate_SIFT(Images,Keypoints,Descriptor); merge_descriptor(full_Descriptor,Descriptor); //Compute PCA for all the features across all Images. PCA pca; perform_PCA(full_Descriptor, Pcafeature, pca); //Perform K-Means on all the PCA reduced features. Pcafeature.convertTo(Pcaduplicate, CV_32F); calculate_Kmeans(Pcaduplicate, clusteridx); //Calculate the Keywords in the Feature Space. make_dictionary(clusteridx, Pcaduplicate, Keyword); //Get the Histogram for each Training Image. hist(Descriptor, clusteridx, histogram); //Read Test Image read_test(TestImages, name); //Calculate the SIFT feature for all the test Images. calculate_SIFT(TestImages, TestKeypoint, TestDescriptor); //Project the SIFT feature of each feature on the lower dimensional PCA plane calculated above. pca_testProject(TestDescriptor, TestPcafeature, pca); //Find the Label by searching for keywords closest to current feature. get_matches(TestPcafeature,Keyword,matches); //Calculate Histogram for each test Image. hist_test(TestDescriptor, matches, Testhistogram); //Perform classification through Knn Classifier. train_labels(trainlabels); KNearest knn; train_classifier(histogram, trainlabels, knn); test_classify(Testhistogram,predicted,knn); //Calculate Accuracy for each class. calculate_accuracy(predicted); getchar(); return 0; }
void reverse_step (int start_grid[][24], int stop_grid[][24], int vote_case[]) { int i, j, n, idx; FLIP_STACK fs = {.size = 0}; for (i = 2; i < 22; ++i) { for (j = 2; j < 22; ++j) { idx = get_pattern_idx(stop_grid, i, j); start_grid[i][j] = vote_case[idx]%2; if (vote_case[idx] >= 2) push_flip(&fs, i, j); } } FLIP_POINT ijp; double score_start, score_flip; for (n = fs.size; n > 0; --n) { score_start = difference_forward(start_grid, stop_grid); ijp = pop_flip(&fs); flip_point(ijp.i, ijp.j, start_grid); score_flip = difference_forward(start_grid, stop_grid); if (score_start <= score_flip) flip_point(ijp.i, ijp.j, start_grid); } } //////////////////////////////////////////////////////////////////////// unsigned int count_case_0[5][2097152], count_case_1[5][2097152]; // 2097152 = pow(2, 21) int vote_case[5][2097152]; //double probability_case[5][2097152]; int main () { // utility parameters int n, i; clean_array((int*)count_case_0, 2097152*5); clean_array((int*)count_case_1, 2097152*5); // read the train.csv data FILE *train; train = fopen ("train.csv", "r"); while (fgetc(train) != '\n') ; // skip the head line int id, delta, initial_grid[24][24], start_grid[24][24], stop_grid[24][24]; clean_array((int*)initial_grid, 24*24); clean_array((int*)start_grid, 24*24); clean_array((int*)stop_grid, 24*24); // read all the train set and make statistical table for them for (n = 0; n < 50000; ++n) { read_train(train, &id, &delta, (int*)start_grid); for (i = 0; i < delta; ++i) { conway_step(start_grid, stop_grid); vote_step(start_grid, stop_grid, count_case_0[i], count_case_1[i]); copy_grid(start_grid, stop_grid); } } // vote for statistical table for (i = 0; i < 5; ++i) { for (n = 0; n < 2097152; ++n) { vote_case[i][n] = vote_pattern(n, count_case_0[i][n], count_case_1[i][n], i); //if (count_case_0[i][n]+count_case_1[i][n] == 0) probability_case[i][n] = 0; //else probability_case[i][n] = (double)count_case_1[i][n] / (double)(count_case_0[i][n]+count_case_1[i][n]); } } //------------------------------------------------------------------ // check prediction accuracy /* train = fopen ("train.csv", "r"); while (fgetc(train) != '\n') ; // skip the head line FILE *pred; pred = fopen ("pred.csv", "w"); fprintf(pred, "id,"); for (n = 1; n < 401; ++n) fprintf(pred, "start.%d,", n); for (n = 1; n < 400; ++n) fprintf(pred, "diff.%d,", n); fprintf(pred, "diff.400\n"); double difference_all = 0; for (n = 0; n < 50000; ++n) { read_train (train, &id, &delta, (int*)initial_grid); copy_grid(start_grid, initial_grid); for (i = 0; i < delta; ++i) { conway_step(start_grid, stop_grid); copy_grid(start_grid, stop_grid); } for (i = delta-1; i >= 0; --i) { reverse_step (start_grid, stop_grid, vote_case[i]); copy_grid(stop_grid, start_grid); } write_pred(pred, id, initial_grid, start_grid); difference_all += difference_grids(initial_grid, start_grid); } printf("training set score: %f\n", difference_all/50000); fclose(pred); */ fclose(train); //------------------------------------------------------------------ // make submission file ///* FILE *test; test = fopen ("test.csv", "r"); while (fgetc(train) != '\n') ; // skip the head line FILE *submission; submission = fopen ("submission.csv", "w"); fprintf(submission, "id,"); for (n = 1; n < 400; ++n) fprintf(submission, "start.%d,", n); fprintf(submission, "start.400\n"); for (n = 0; n < 50000; ++n) { read_test(test, &id, &delta, stop_grid); for (i = delta-1; i >= 0; --i) { reverse_step (start_grid, stop_grid, vote_case[i]); copy_grid(stop_grid, start_grid); } write_submission(submission, id, start_grid); } fclose(test); fclose(submission); //*/ return 0; // training set score: 0.124948 (-0.00054) // real score: 0.12685 (-0.00002) // actual improvement is really tiny compare to the training set one. }
int main () { // utility parameters int n, i; clean_array((int*)count_case_0, 2097152*5); clean_array((int*)count_case_1, 2097152*5); // read the train.csv data FILE *train; train = fopen ("train.csv", "r"); while (fgetc(train) != '\n') ; // skip the head line int id, delta, initial_grid[24][24], start_grid[24][24], stop_grid[24][24]; clean_array((int*)initial_grid, 24*24); clean_array((int*)start_grid, 24*24); clean_array((int*)stop_grid, 24*24); // read all the train set and make statistical table for them for (n = 0; n < 50000; ++n) { read_train(train, &id, &delta, (int*)start_grid); for (i = 0; i < delta; ++i) { conway_step(start_grid, stop_grid); vote_step(start_grid, stop_grid, count_case_0[i], count_case_1[i]); copy_grid(start_grid, stop_grid); } } // vote for statistical table for (i = 0; i < 5; ++i) { for (n = 0; n < 2097152; ++n) vote_case[i][n] = vote_pattern(n, count_case_0[i][n], count_case_1[i][n], i); } //------------------------------------------------------------------ // check prediction accuracy ///* train = fopen ("train.csv", "r"); while (fgetc(train) != '\n') ; // skip the head line FILE *pred; pred = fopen ("pred.csv", "w"); fprintf(pred, "id,"); for (n = 1; n < 401; ++n) fprintf(pred, "start.%d,", n); for (n = 1; n < 400; ++n) fprintf(pred, "diff.%d,", n); fprintf(pred, "diff.400\n"); double difference_all = 0; for (n = 0; n < 50000; ++n) { read_train (train, &id, &delta, (int*)initial_grid); copy_grid(start_grid, initial_grid); for (i = 0; i < delta; ++i) { conway_step(start_grid, stop_grid); copy_grid(start_grid, stop_grid); } for (i = delta-1; i >= 0; --i) { reverse_step (start_grid, stop_grid, vote_case[i]); copy_grid(stop_grid, start_grid); } write_pred(pred, id, initial_grid, start_grid); difference_all += difference_grid(initial_grid, start_grid); } printf("training set score: %f\n", difference_all/50000); fclose(pred); //*/ fclose(train); //------------------------------------------------------------------ // make submission file ///* FILE *test; test = fopen ("test.csv", "r"); while (fgetc(train) != '\n') ; // skip the head line FILE *submission; submission = fopen ("submission.csv", "w"); fprintf(submission, "id,"); for (n = 1; n < 400; ++n) fprintf(submission, "start.%d,", n); fprintf(submission, "start.400\n"); for (n = 0; n < 50000; ++n) { read_test(test, &id, &delta, stop_grid); for (i = delta-1; i >= 0; --i) { reverse_step (start_grid, stop_grid, vote_case[i]); copy_grid(stop_grid, start_grid); } write_submission(submission, id, start_grid); } fclose(test); fclose(submission); //*/ return 0; // training set score: 0.12549 // real score: 0.12687 // I also tried set up vote_case table cutoff based on different delta number. // it may helps (the entire table parameter need a long time to be fixed), // but a naive setup of the final step (5->6) to 0.5 makes everything goes worse. }