Example #1
0
/*--------------------------------------------------------*/
void AzsSvrg::_train_test()
{  
  if (rseed > 0) {
    srand(rseed); /* initialize the random seed */
  }

  /*---  initialization  ---*/
  int dim = m_trn_x->rowNum(); 
  reset_weights(dim); 

  /*---  iterate ... ---*/
  AzTimeLog::print("---  Training begins ... ", log_out); 
  AzsSvrgData_fast prev_fast; 
  AzsSvrgData_compact prev_compact; 
  int ite;
  for (ite = 0; ite < ite_num; ++ite) {
    if (do_show_timing) AzTimeLog::print("---  iteration#", ite+1, log_out); 
    if (doing_svrg(ite) && (ite-sgd_ite) % svrg_interval == 0) {
      if (do_show_timing) AzTimeLog::print("Computing gradient average ... ", log_out); 
      if (do_compact) get_avg_gradient_compact(&prev_compact); 
      else            get_avg_gradient_fast(&prev_fast); 
    }  

    if (do_show_timing) AzTimeLog::print("Updating weights ... ", log_out); 
    AzIntArr ia_dxs; 
    const int *dxs = gen_seq(dataSize(), ia_dxs);   
    int ix; 
    for (ix = 0; ix < dataSize(); ++ix) {      
      int dx = dxs[ix];  /* data point index */
      AzDvect v_deriv(class_num); 
      get_deriv(dx, &v_deriv); /* compute the derivatives */
      if (doing_svrg(ite)) {
        if (do_compact) updateDelta_svrg_compact(dx, &v_deriv, prev_compact); 
        else            updateDelta_svrg_fast(dx, &v_deriv, prev_fast); 
      }
      else {
        updateDelta_sgd(dx, &v_deriv);       
      } 
      flushDelta(); 
    }
    show_perf(ite); 
  }

  if (do_show_timing) AzTimeLog::print("--- End of training ... ", log_out); 
  
  /*---  write predictions to a file if requested  ---*/
  if (s_pred_fn.length() > 0) {
    AzTimeLog::print("Writing predictions to ", s_pred_fn.c_str(), log_out); 
    write_pred(m_tst_x, s_pred_fn.c_str()); 
  }
}
void reverse_step (int start_grid[][24], int stop_grid[][24], int vote_case[], double probability_case[]) {
	int i, j, n, idx;
	//int count_ambiguity[4] = {0, 0, 0, 0};
	FLIP_STACK fs = {.size = 0};
	for (i = 2; i < 22; ++i) {
		for (j = 2; j < 22; ++j) {	
			idx = get_pattern_idx(stop_grid, i, j);
			start_grid[i][j] = vote_case[idx];
			
			//if ((0.15 < probability_case[idx]) && (probability_case[idx] < 0.85)) ++count_ambiguity[0];
			//if ((0.3 < probability_case[idx]) && (probability_case[idx] < 0.7)) ++count_ambiguity[1];
			//if ((0.4 < probability_case[idx]) && (probability_case[idx] < 0.6)) ++count_ambiguity[2];
			//if ((0.45 < probability_case[idx]) && (probability_case[idx] < 0.55)) ++count_ambiguity[3];
			
			// put all the points with median probability into a stack
			if (fabs(0.5-probability_case[idx]) < 0.125) push_flip(&fs, i, j, fabs(0.5-probability_case[idx]));
		}
	}
	
	//printf("%d,%d,%d,%d,%d,\n", count_1s_grid(stop_grid),
	//       count_ambiguity[0], count_ambiguity[1], count_ambiguity[2], count_ambiguity[3]);
	//printf("%d %d %f\n", imax, jmax, probability_max);
	
	quicksort_flips(fs.items, fs.size); // sort the stack
	
	FLIP_POINT ijp;
	double score_start, score_flip;
	for (n = fs.size; n > 0; --n) {
		score_start = difference_forward(start_grid, stop_grid);
		ijp = pop_flip(&fs);
		flip_point(ijp.i, ijp.j, start_grid);
		score_flip = difference_forward(start_grid, stop_grid);
		if (score_start <= score_flip) flip_point(ijp.i, ijp.j, start_grid);
	}
}

////////////////////////////////////////////////////////////////////////

int count_case_0[5][2097152], count_case_1[5][2097152]; // 2097152 = pow(2, 21)
int vote_case[5][2097152];
double probability_case[5][2097152];

int main () {
	
	// utility parameters
	int n, i;
	
	clean_array((int*)count_case_0, 2097152*5);
	clean_array((int*)count_case_1, 2097152*5);
	
	// read the train.csv data
	FILE *train;
	train = fopen ("train.csv", "r");
	while (fgetc(train) != '\n') ;  // skip the head line

	int id, delta, initial_grid[24][24], start_grid[24][24], stop_grid[24][24];
	clean_array((int*)initial_grid, 24*24);
	clean_array((int*)start_grid, 24*24);
	clean_array((int*)stop_grid, 24*24);
	
	// read all the train set and make statistical table for them
	for (n = 0; n < 50000; ++n) {
		read_train(train, &id, &delta, (int*)start_grid);
		for (i = 0; i < delta; ++i) {
			conway_step(start_grid, stop_grid);
			vote_step(start_grid, stop_grid, count_case_0[i], count_case_1[i]);
			copy_grid(start_grid, stop_grid);
		}
	}
	
	// vote for statistical table
	for (i = 0; i < 5; ++i) {
		for (n = 0; n < 2097152; ++n) {
			if (count_case_0[i][n] < count_case_1[i][n]) vote_case[i][n] = 1;
			else vote_case[i][n] = 0;
			
			if (count_case_0[i][n]+count_case_1[i][n] == 0) probability_case[i][n] = 0;
			// for the patterns happen really less frequently, we assume the original center is definitely 0.
			else probability_case[i][n] = (double)count_case_1[i][n] / (double)(count_case_0[i][n]+count_case_1[i][n]);
		}
	}
	
	//------------------------------------------------------------------
	
	// check prediction accuracy
	///*
	train = fopen ("train.csv", "r");
	while (fgetc(train) != '\n') ;  // skip the head line
	
	FILE *pred;
	pred = fopen ("pred.csv", "w");
	fprintf(pred, "id,");
	for (n = 1; n < 401; ++n) fprintf(pred, "start.%d,", n);
	for (n = 1; n < 400; ++n) fprintf(pred, "diff.%d,", n);
	fprintf(pred, "diff.400\n");
	
	double difference_all = 0;
	for (n = 0; n < 50000; ++n) {
		read_train (train, &id, &delta, (int*)initial_grid);
		copy_grid(start_grid, initial_grid);

		for (i = 0; i < delta; ++i) {
		//for (i = 0; i < 1; ++i) {
			conway_step(start_grid, stop_grid);
			copy_grid(start_grid, stop_grid);
		}
		
		for (i = delta-1; i >= 0; --i) {
		//for (i = 0; i >= 0; --i) {
			reverse_step (start_grid, stop_grid, vote_case[i], probability_case[i]);
			copy_grid(stop_grid, start_grid);
		}
		write_pred(pred, id, initial_grid, start_grid);
		difference_all += difference_grids(initial_grid, start_grid);
	}
	printf("training set score: %f\n", difference_all/50000);
	fclose(pred);
	//*/

	fclose(train);
	
	//------------------------------------------------------------------
	
	// make submission file
	/*
	FILE *test;
	test = fopen ("test.csv", "r");
	while (fgetc(train) != '\n') ;  // skip the head line
	
	FILE *submission;
	submission = fopen ("submission.csv", "w");
	fprintf(submission, "id,");
	for (n = 1; n < 400; ++n) fprintf(submission, "start.%d,", n);
	fprintf(submission, "start.400\n");
	
	for (n = 0; n < 50000; ++n) {
		read_test(test, &id, &delta, stop_grid);
		
		for (i = delta-1; i >= 0; --i) {
			reverse_step (start_grid, stop_grid, vote_case[i], probability_case[i]);
			copy_grid(stop_grid, start_grid);
		}
		write_submission(submission, id, start_grid);
	}
	
	fclose(test);
	fclose(submission);
	*/
	
	return 0;
	
	// goes the reverse check (see everything after vote_case in reverse_step).
	// however, the imporvement seems really tiny.
	// for the cutoff of points which need to be checked reversely,
	// I use all the predicted data (50000+50000) and got the following table.
	// threshold | score
	//     0     | 0.128226
	//     0.05  | 0.127253
	//     0.1   | 0.126986
	//     0.12  | 0.126851
	//     0.125 | 0.126803  <--
	//     0.127 | 0.126826
	//     0.13  | 0.126824
	//     0.135 | 0.126889
	//     0.14  | 0.126914
	//     0.15  | 0.127000
	
	// so I use 0.127 (should use 0.125) to predict the test set.
	// predicted set score: 0.12957 (-0.0003)
	// the improvement is even smaller than I thought, I think it should be around 0.0014.
	// :-(
	
	// probably the imporvement of the training set, is because we use the training set
	// itself to calculate the scores, which gives bias.
	
	//---------------------
	// then use 0.125, and set the probably to be 0.5 if the pattern never appear.
	// predicted set score: 0.13034 (+0.0004)  -- even go worse~~
	
	//---------------------
	// if ruling out the unfrequent patterns, it doesn't seem help.
	//     0.125 | 0.127049 (rule out < 2)
	//     0.125 | 0.127095 (rule out < 4)
}
Example #3
0
SERD_API
SerdStatus
serd_writer_write_statement(SerdWriter*        writer,
                            SerdStatementFlags flags,
                            const SerdNode*    graph,
                            const SerdNode*    subject,
                            const SerdNode*    predicate,
                            const SerdNode*    object,
                            const SerdNode*    datatype,
                            const SerdNode*    lang)
{
	if (!subject || !predicate || !object
	    || !subject->buf || !predicate->buf || !object->buf
	    || !is_resource(subject) || !is_resource(predicate)) {
		return SERD_ERR_BAD_ARG;
	}

#define TRY(write_result) \
	if (!write_result) { \
		return SERD_ERR_UNKNOWN; \
	}

	switch (writer->syntax) {
	case SERD_NTRIPLES:
		TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags));
		sink(" ", 1, writer);
		TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags));
		sink(" ", 1, writer);
		TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags));
		sink(" .\n", 3, writer);
		return SERD_SUCCESS;
	default:
		break;
	}

	if ((flags & SERD_LIST_CONT)) {
		if (write_list_obj(writer, flags, predicate, object, datatype, lang)) {
			// Reached end of list
			if (--writer->list_depth == 0 && writer->list_subj.type) {
				reset_context(writer, true);
				writer->context.subject = writer->list_subj;
				writer->list_subj       = SERD_NODE_NULL;
			}
			return SERD_SUCCESS;
		}
	} else if (serd_node_equals(subject, &writer->context.subject)) {
		if (serd_node_equals(predicate, &writer->context.predicate)) {
			// Abbreviate S P
			if (!(flags & SERD_ANON_O_BEGIN)) {
				++writer->indent;
			}
			write_sep(writer, SEP_END_O);
			write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
			if (!(flags & SERD_ANON_O_BEGIN)) {
				--writer->indent;
			}
		} else {
			// Abbreviate S
			Sep sep = writer->context.predicate.type ? SEP_END_P : SEP_S_P;
			write_sep(writer, sep);
			write_pred(writer, flags, predicate);
			write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
		}
	} else {
		// No abbreviation
		if (writer->context.subject.type) {
			assert(writer->indent > 0);
			--writer->indent;
			if (serd_stack_is_empty(&writer->anon_stack)) {
				write_sep(writer, SEP_END_S);
			}
		} else if (!writer->empty) {
			write_sep(writer, SEP_S_P);
		}

		if (!(flags & SERD_ANON_CONT)) {
			write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags);
			++writer->indent;
			write_sep(writer, SEP_S_P);
		} else {
			++writer->indent;
		}

		reset_context(writer, true);
		copy_node(&writer->context.subject, subject);

		if (!(flags & SERD_LIST_S_BEGIN)) {
			write_pred(writer, flags, predicate);
		}

		write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
	}

	if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) {
		WriteContext* ctx = (WriteContext*)serd_stack_push(
			&writer->anon_stack, sizeof(WriteContext));
		*ctx = writer->context;
		WriteContext new_context = {
			serd_node_copy(graph), serd_node_copy(subject), SERD_NODE_NULL };
		if ((flags & SERD_ANON_S_BEGIN)) {
			new_context.predicate = serd_node_copy(predicate);
		}
		writer->context = new_context;
	} else {
		copy_node(&writer->context.graph, graph);
		copy_node(&writer->context.subject, subject);
		copy_node(&writer->context.predicate, predicate);
	}

	return SERD_SUCCESS;
}
int main () {
	
	// utility parameters
	int n, i;
	
	clean_array((int*)count_case_0, 2097152*5);
	clean_array((int*)count_case_1, 2097152*5);
	
	// read the train.csv data
	FILE *train;
	train = fopen ("train.csv", "r");
	while (fgetc(train) != '\n') ;  // skip the head line

	int id, delta, initial_grid[24][24], start_grid[24][24], stop_grid[24][24];
	clean_array((int*)initial_grid, 24*24);
	clean_array((int*)start_grid, 24*24);
	clean_array((int*)stop_grid, 24*24);
	
	// read all the train set and make statistical table for them
	for (n = 0; n < 50000; ++n) {
		read_train(train, &id, &delta, (int*)start_grid);
		for (i = 0; i < delta; ++i) {
			conway_step(start_grid, stop_grid);
			vote_step(start_grid, stop_grid, count_case_0[i], count_case_1[i]);
			copy_grid(start_grid, stop_grid);
		}
	}
	
	// vote for statistical table
	for (i = 0; i < 5; ++i) {
		for (n = 0; n < 2097152; ++n) 
			vote_case[i][n] = vote_pattern(n, count_case_0[i][n], count_case_1[i][n], i);
	}
	
	//------------------------------------------------------------------
	
	// check prediction accuracy
	///*
	train = fopen ("train.csv", "r");
	while (fgetc(train) != '\n') ;  // skip the head line
	
	FILE *pred;
	pred = fopen ("pred.csv", "w");
	fprintf(pred, "id,");
	for (n = 1; n < 401; ++n) fprintf(pred, "start.%d,", n);
	for (n = 1; n < 400; ++n) fprintf(pred, "diff.%d,", n);
	fprintf(pred, "diff.400\n");
	
	double difference_all = 0;
	for (n = 0; n < 50000; ++n) {
		read_train (train, &id, &delta, (int*)initial_grid);
		copy_grid(start_grid, initial_grid);

		for (i = 0; i < delta; ++i) {
			conway_step(start_grid, stop_grid);
			copy_grid(start_grid, stop_grid);
		}
		
		for (i = delta-1; i >= 0; --i) {
			reverse_step (start_grid, stop_grid, vote_case[i]);
			copy_grid(stop_grid, start_grid);
		}
		write_pred(pred, id, initial_grid, start_grid);
		difference_all += difference_grid(initial_grid, start_grid);
	}
	printf("training set score: %f\n", difference_all/50000);
	fclose(pred);
	//*/

	fclose(train);
	
	//------------------------------------------------------------------
	
	// make submission file
	///*
	FILE *test;
	test = fopen ("test.csv", "r");
	while (fgetc(train) != '\n') ;  // skip the head line
	
	FILE *submission;
	submission = fopen ("submission.csv", "w");
	fprintf(submission, "id,");
	for (n = 1; n < 400; ++n) fprintf(submission, "start.%d,", n);
	fprintf(submission, "start.400\n");
	
	for (n = 0; n < 50000; ++n) {
		read_test(test, &id, &delta, stop_grid);
		
		for (i = delta-1; i >= 0; --i) {
			reverse_step (start_grid, stop_grid, vote_case[i]);
			copy_grid(stop_grid, start_grid);
		}
		write_submission(submission, id, start_grid);
	}
	
	fclose(test);
	fclose(submission);
	//*/
	
	return 0;
	// training set score: 0.12549
	// real score: 0.12687 	

	// I also tried set up vote_case table cutoff based on different delta number.
	// it may helps (the entire table parameter need a long time to be fixed),
	// but a naive setup of the final step (5->6) to 0.5 makes everything goes worse.
}