/*--------------------------------------------------------*/ void AzsSvrg::_train_test() { if (rseed > 0) { srand(rseed); /* initialize the random seed */ } /*--- initialization ---*/ int dim = m_trn_x->rowNum(); reset_weights(dim); /*--- iterate ... ---*/ AzTimeLog::print("--- Training begins ... ", log_out); AzsSvrgData_fast prev_fast; AzsSvrgData_compact prev_compact; int ite; for (ite = 0; ite < ite_num; ++ite) { if (do_show_timing) AzTimeLog::print("--- iteration#", ite+1, log_out); if (doing_svrg(ite) && (ite-sgd_ite) % svrg_interval == 0) { if (do_show_timing) AzTimeLog::print("Computing gradient average ... ", log_out); if (do_compact) get_avg_gradient_compact(&prev_compact); else get_avg_gradient_fast(&prev_fast); } if (do_show_timing) AzTimeLog::print("Updating weights ... ", log_out); AzIntArr ia_dxs; const int *dxs = gen_seq(dataSize(), ia_dxs); int ix; for (ix = 0; ix < dataSize(); ++ix) { int dx = dxs[ix]; /* data point index */ AzDvect v_deriv(class_num); get_deriv(dx, &v_deriv); /* compute the derivatives */ if (doing_svrg(ite)) { if (do_compact) updateDelta_svrg_compact(dx, &v_deriv, prev_compact); else updateDelta_svrg_fast(dx, &v_deriv, prev_fast); } else { updateDelta_sgd(dx, &v_deriv); } flushDelta(); } show_perf(ite); } if (do_show_timing) AzTimeLog::print("--- End of training ... ", log_out); /*--- write predictions to a file if requested ---*/ if (s_pred_fn.length() > 0) { AzTimeLog::print("Writing predictions to ", s_pred_fn.c_str(), log_out); write_pred(m_tst_x, s_pred_fn.c_str()); } }
void reverse_step (int start_grid[][24], int stop_grid[][24], int vote_case[], double probability_case[]) { int i, j, n, idx; //int count_ambiguity[4] = {0, 0, 0, 0}; FLIP_STACK fs = {.size = 0}; for (i = 2; i < 22; ++i) { for (j = 2; j < 22; ++j) { idx = get_pattern_idx(stop_grid, i, j); start_grid[i][j] = vote_case[idx]; //if ((0.15 < probability_case[idx]) && (probability_case[idx] < 0.85)) ++count_ambiguity[0]; //if ((0.3 < probability_case[idx]) && (probability_case[idx] < 0.7)) ++count_ambiguity[1]; //if ((0.4 < probability_case[idx]) && (probability_case[idx] < 0.6)) ++count_ambiguity[2]; //if ((0.45 < probability_case[idx]) && (probability_case[idx] < 0.55)) ++count_ambiguity[3]; // put all the points with median probability into a stack if (fabs(0.5-probability_case[idx]) < 0.125) push_flip(&fs, i, j, fabs(0.5-probability_case[idx])); } } //printf("%d,%d,%d,%d,%d,\n", count_1s_grid(stop_grid), // count_ambiguity[0], count_ambiguity[1], count_ambiguity[2], count_ambiguity[3]); //printf("%d %d %f\n", imax, jmax, probability_max); quicksort_flips(fs.items, fs.size); // sort the stack FLIP_POINT ijp; double score_start, score_flip; for (n = fs.size; n > 0; --n) { score_start = difference_forward(start_grid, stop_grid); ijp = pop_flip(&fs); flip_point(ijp.i, ijp.j, start_grid); score_flip = difference_forward(start_grid, stop_grid); if (score_start <= score_flip) flip_point(ijp.i, ijp.j, start_grid); } } //////////////////////////////////////////////////////////////////////// int count_case_0[5][2097152], count_case_1[5][2097152]; // 2097152 = pow(2, 21) int vote_case[5][2097152]; double probability_case[5][2097152]; int main () { // utility parameters int n, i; clean_array((int*)count_case_0, 2097152*5); clean_array((int*)count_case_1, 2097152*5); // read the train.csv data FILE *train; train = fopen ("train.csv", "r"); while (fgetc(train) != '\n') ; // skip the head line int id, delta, initial_grid[24][24], start_grid[24][24], stop_grid[24][24]; clean_array((int*)initial_grid, 24*24); clean_array((int*)start_grid, 24*24); clean_array((int*)stop_grid, 24*24); // read all the train set and make statistical table for them for (n = 0; n < 50000; ++n) { read_train(train, &id, &delta, (int*)start_grid); for (i = 0; i < delta; ++i) { conway_step(start_grid, stop_grid); vote_step(start_grid, stop_grid, count_case_0[i], count_case_1[i]); copy_grid(start_grid, stop_grid); } } // vote for statistical table for (i = 0; i < 5; ++i) { for (n = 0; n < 2097152; ++n) { if (count_case_0[i][n] < count_case_1[i][n]) vote_case[i][n] = 1; else vote_case[i][n] = 0; if (count_case_0[i][n]+count_case_1[i][n] == 0) probability_case[i][n] = 0; // for the patterns happen really less frequently, we assume the original center is definitely 0. else probability_case[i][n] = (double)count_case_1[i][n] / (double)(count_case_0[i][n]+count_case_1[i][n]); } } //------------------------------------------------------------------ // check prediction accuracy ///* train = fopen ("train.csv", "r"); while (fgetc(train) != '\n') ; // skip the head line FILE *pred; pred = fopen ("pred.csv", "w"); fprintf(pred, "id,"); for (n = 1; n < 401; ++n) fprintf(pred, "start.%d,", n); for (n = 1; n < 400; ++n) fprintf(pred, "diff.%d,", n); fprintf(pred, "diff.400\n"); double difference_all = 0; for (n = 0; n < 50000; ++n) { read_train (train, &id, &delta, (int*)initial_grid); copy_grid(start_grid, initial_grid); for (i = 0; i < delta; ++i) { //for (i = 0; i < 1; ++i) { conway_step(start_grid, stop_grid); copy_grid(start_grid, stop_grid); } for (i = delta-1; i >= 0; --i) { //for (i = 0; i >= 0; --i) { reverse_step (start_grid, stop_grid, vote_case[i], probability_case[i]); copy_grid(stop_grid, start_grid); } write_pred(pred, id, initial_grid, start_grid); difference_all += difference_grids(initial_grid, start_grid); } printf("training set score: %f\n", difference_all/50000); fclose(pred); //*/ fclose(train); //------------------------------------------------------------------ // make submission file /* FILE *test; test = fopen ("test.csv", "r"); while (fgetc(train) != '\n') ; // skip the head line FILE *submission; submission = fopen ("submission.csv", "w"); fprintf(submission, "id,"); for (n = 1; n < 400; ++n) fprintf(submission, "start.%d,", n); fprintf(submission, "start.400\n"); for (n = 0; n < 50000; ++n) { read_test(test, &id, &delta, stop_grid); for (i = delta-1; i >= 0; --i) { reverse_step (start_grid, stop_grid, vote_case[i], probability_case[i]); copy_grid(stop_grid, start_grid); } write_submission(submission, id, start_grid); } fclose(test); fclose(submission); */ return 0; // goes the reverse check (see everything after vote_case in reverse_step). // however, the imporvement seems really tiny. // for the cutoff of points which need to be checked reversely, // I use all the predicted data (50000+50000) and got the following table. // threshold | score // 0 | 0.128226 // 0.05 | 0.127253 // 0.1 | 0.126986 // 0.12 | 0.126851 // 0.125 | 0.126803 <-- // 0.127 | 0.126826 // 0.13 | 0.126824 // 0.135 | 0.126889 // 0.14 | 0.126914 // 0.15 | 0.127000 // so I use 0.127 (should use 0.125) to predict the test set. // predicted set score: 0.12957 (-0.0003) // the improvement is even smaller than I thought, I think it should be around 0.0014. // :-( // probably the imporvement of the training set, is because we use the training set // itself to calculate the scores, which gives bias. //--------------------- // then use 0.125, and set the probably to be 0.5 if the pattern never appear. // predicted set score: 0.13034 (+0.0004) -- even go worse~~ //--------------------- // if ruling out the unfrequent patterns, it doesn't seem help. // 0.125 | 0.127049 (rule out < 2) // 0.125 | 0.127095 (rule out < 4) }
SERD_API SerdStatus serd_writer_write_statement(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* graph, const SerdNode* subject, const SerdNode* predicate, const SerdNode* object, const SerdNode* datatype, const SerdNode* lang) { if (!subject || !predicate || !object || !subject->buf || !predicate->buf || !object->buf || !is_resource(subject) || !is_resource(predicate)) { return SERD_ERR_BAD_ARG; } #define TRY(write_result) \ if (!write_result) { \ return SERD_ERR_UNKNOWN; \ } switch (writer->syntax) { case SERD_NTRIPLES: TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags)); sink(" ", 1, writer); TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags)); sink(" ", 1, writer); TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags)); sink(" .\n", 3, writer); return SERD_SUCCESS; default: break; } if ((flags & SERD_LIST_CONT)) { if (write_list_obj(writer, flags, predicate, object, datatype, lang)) { // Reached end of list if (--writer->list_depth == 0 && writer->list_subj.type) { reset_context(writer, true); writer->context.subject = writer->list_subj; writer->list_subj = SERD_NODE_NULL; } return SERD_SUCCESS; } } else if (serd_node_equals(subject, &writer->context.subject)) { if (serd_node_equals(predicate, &writer->context.predicate)) { // Abbreviate S P if (!(flags & SERD_ANON_O_BEGIN)) { ++writer->indent; } write_sep(writer, SEP_END_O); write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); if (!(flags & SERD_ANON_O_BEGIN)) { --writer->indent; } } else { // Abbreviate S Sep sep = writer->context.predicate.type ? SEP_END_P : SEP_S_P; write_sep(writer, sep); write_pred(writer, flags, predicate); write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); } } else { // No abbreviation if (writer->context.subject.type) { assert(writer->indent > 0); --writer->indent; if (serd_stack_is_empty(&writer->anon_stack)) { write_sep(writer, SEP_END_S); } } else if (!writer->empty) { write_sep(writer, SEP_S_P); } if (!(flags & SERD_ANON_CONT)) { write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags); ++writer->indent; write_sep(writer, SEP_S_P); } else { ++writer->indent; } reset_context(writer, true); copy_node(&writer->context.subject, subject); if (!(flags & SERD_LIST_S_BEGIN)) { write_pred(writer, flags, predicate); } write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); } if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) { WriteContext* ctx = (WriteContext*)serd_stack_push( &writer->anon_stack, sizeof(WriteContext)); *ctx = writer->context; WriteContext new_context = { serd_node_copy(graph), serd_node_copy(subject), SERD_NODE_NULL }; if ((flags & SERD_ANON_S_BEGIN)) { new_context.predicate = serd_node_copy(predicate); } writer->context = new_context; } else { copy_node(&writer->context.graph, graph); copy_node(&writer->context.subject, subject); copy_node(&writer->context.predicate, predicate); } return SERD_SUCCESS; }
int main () { // utility parameters int n, i; clean_array((int*)count_case_0, 2097152*5); clean_array((int*)count_case_1, 2097152*5); // read the train.csv data FILE *train; train = fopen ("train.csv", "r"); while (fgetc(train) != '\n') ; // skip the head line int id, delta, initial_grid[24][24], start_grid[24][24], stop_grid[24][24]; clean_array((int*)initial_grid, 24*24); clean_array((int*)start_grid, 24*24); clean_array((int*)stop_grid, 24*24); // read all the train set and make statistical table for them for (n = 0; n < 50000; ++n) { read_train(train, &id, &delta, (int*)start_grid); for (i = 0; i < delta; ++i) { conway_step(start_grid, stop_grid); vote_step(start_grid, stop_grid, count_case_0[i], count_case_1[i]); copy_grid(start_grid, stop_grid); } } // vote for statistical table for (i = 0; i < 5; ++i) { for (n = 0; n < 2097152; ++n) vote_case[i][n] = vote_pattern(n, count_case_0[i][n], count_case_1[i][n], i); } //------------------------------------------------------------------ // check prediction accuracy ///* train = fopen ("train.csv", "r"); while (fgetc(train) != '\n') ; // skip the head line FILE *pred; pred = fopen ("pred.csv", "w"); fprintf(pred, "id,"); for (n = 1; n < 401; ++n) fprintf(pred, "start.%d,", n); for (n = 1; n < 400; ++n) fprintf(pred, "diff.%d,", n); fprintf(pred, "diff.400\n"); double difference_all = 0; for (n = 0; n < 50000; ++n) { read_train (train, &id, &delta, (int*)initial_grid); copy_grid(start_grid, initial_grid); for (i = 0; i < delta; ++i) { conway_step(start_grid, stop_grid); copy_grid(start_grid, stop_grid); } for (i = delta-1; i >= 0; --i) { reverse_step (start_grid, stop_grid, vote_case[i]); copy_grid(stop_grid, start_grid); } write_pred(pred, id, initial_grid, start_grid); difference_all += difference_grid(initial_grid, start_grid); } printf("training set score: %f\n", difference_all/50000); fclose(pred); //*/ fclose(train); //------------------------------------------------------------------ // make submission file ///* FILE *test; test = fopen ("test.csv", "r"); while (fgetc(train) != '\n') ; // skip the head line FILE *submission; submission = fopen ("submission.csv", "w"); fprintf(submission, "id,"); for (n = 1; n < 400; ++n) fprintf(submission, "start.%d,", n); fprintf(submission, "start.400\n"); for (n = 0; n < 50000; ++n) { read_test(test, &id, &delta, stop_grid); for (i = delta-1; i >= 0; --i) { reverse_step (start_grid, stop_grid, vote_case[i]); copy_grid(stop_grid, start_grid); } write_submission(submission, id, start_grid); } fclose(test); fclose(submission); //*/ return 0; // training set score: 0.12549 // real score: 0.12687 // I also tried set up vote_case table cutoff based on different delta number. // it may helps (the entire table parameter need a long time to be fixed), // but a naive setup of the final step (5->6) to 0.5 makes everything goes worse. }