static void initialize_data(char *namefilein) {/*{{{*/ int i; read_instance(namefilein); //dist = malloc(sizeof(int)*N*N); nn_list = malloc(sizeof(int)*N); pheromone = malloc(sizeof(float)*N); choice_info = malloc(sizeof(float)*N); ant = malloc(sizeof(single_ant_t)*M); for (i = 0; i < N; i++) { pheromone[i] = malloc(sizeof(int)*N); choice_info[i] = malloc(sizeof(int)*N); nn_list[i] = malloc(sizeof(int)*NN); } //compute_distances(); compute_nearest_neighbor_lists(); initialize_pheromone(); compute_choice_information(); initialize_ants(); initialize_statistics(); }/*}}}*/
void Postagger::train(void) { const char * train_file = train_opt.train_file.c_str(); // read in training instance read_instance(train_file); TRACE_LOG("Read in [%d] instances.", train_dat.size()); model = new Model; // build tag dictionary, map string tag to index TRACE_LOG("Start build configuration"); build_configuration(); TRACE_LOG("Build configuration is done."); TRACE_LOG("Number of labels: [%d]", model->labels.size()); // build feature space from the training instance TRACE_LOG("Start building feature space."); build_feature_space(); TRACE_LOG("Building feature space is done."); TRACE_LOG("Number of features: [%d]", model->space.num_features()); model->param.realloc(model->space.dim()); TRACE_LOG("Allocate [%d] dimensition parameter.", model->space.dim()); PostaggerWriter writer(cout); if (train_opt.algorithm == "mira") { // use mira algorithm /*kbest_decoder = new KBestDecoder(L); for (int iter = 0; iter < train_opt.max_iter; ++ iter) { for (int i = 0; i < train_dat.size(); ++ i) { extract_features(train_dat[i]); calculate_scores(train_dat[i]); KBestDecoder::KBestDecodeResult result; kbest_decoder->decode(train_dat[i], result); } }*/ } else { // use pa or average perceptron algorithm decoder = new Decoder(model->num_labels()); TRACE_LOG("Allocated plain decoder"); for (int iter = 0; iter < train_opt.max_iter; ++ iter) { TRACE_LOG("Training iteraition [%d]", (iter + 1)); for (int i = 0; i < train_dat.size(); ++ i) { // extract_features(train_dat[i]); Instance * inst = train_dat[i]; calculate_scores(inst, false); decoder->decode(inst); if (inst->features.dim() == 0) { collect_features(inst, inst->tagsidx, inst->features); } collect_features(inst, inst->predicted_tagsidx, inst->predicted_features); // writer.debug(inst, true); if (train_opt.algorithm == "pa") { SparseVec update_features; update_features.zero(); update_features.add(train_dat[i]->features, 1.); update_features.add(train_dat[i]->predicted_features, -1.); double error = train_dat[i]->num_errors(); double score = model->param.dot(update_features, false); double norm = update_features.L2(); double step = 0.; if (norm < EPS) { step = 0; } else { step = (error - score) / norm; } model->param.add(update_features, iter * train_dat.size() + i + 1, step); } else if (train_opt.algorithm == "ap") { SparseVec update_features; update_features.zero(); update_features.add(train_dat[i]->features, 1.); update_features.add(train_dat[i]->predicted_features, -1.); model->param.add(update_features, iter * train_dat.size() + i + 1, 1.); } if ((i+1) % train_opt.display_interval == 0) { TRACE_LOG("[%d] instances is trained.", i+1); } } TRACE_LOG("[%d] instances is trained.", train_dat.size()); model->param.flush( train_dat.size() * (iter + 1) ); Model * new_model = truncate(); swap(model, new_model); evaluate(); std::string saved_model_file = (train_opt.model_name + "." + strutils::to_str(iter) + ".model"); std::ofstream ofs(saved_model_file.c_str(), std::ofstream::binary); swap(model, new_model); new_model->save(ofs); delete new_model; // model->save(ofs); TRACE_LOG("Model for iteration [%d] is saved to [%s]", iter + 1, saved_model_file.c_str()); } } }
int main(const int argc, const char * argv[]) { const char * program_short_name, * input_name, * output_name; unsigned int nb_mutations; instance data; solution sol; chrono timer; int read_status, write_status; program_short_name = get_program_short_name(argv[0]); if((argc != 3) && (argc != 4)) { printf("Syntaxe pour utiliser ce programme :\t"); printf("%s input_name output_name nb_mutations\n", program_short_name); puts("\tinput_name \tnom du ficher contenant l'instance (obligatoire)"); puts("\toutput_name \tnom du ficher dans lequel ecrire la solution (obligatoire)"); puts("\tnb_mutations\tnombre de mutations a chaque iteration de l'algorithme (optionnel ; valeur par defaut = nombre de jobs dans l'instance)"); return EXIT_FAILURE; } input_name = argv[1]; output_name = argv[2]; switch(argc) { case(3): nb_mutations = 0; break; case(4): nb_mutations = read_nb_mutations(argv[3]); if(nb_mutations == 0) { printf("Erreur : nombre de mutations incorrect (\"%s\")\n", argv[3]); return EXIT_FAILURE; } break; } read_status = read_instance(&data, input_name); switch(read_status) { case(INPUT_ERROR_OPEN): printf("Erreur : impossible d'ouvrir le fichier \"%s\"\n", input_name); return EXIT_FAILURE; case(INPUT_ERROR_SYNTAX): printf("Erreur : la syntaxe du fichier \"%s\" est incorrecte.\n", input_name); return EXIT_FAILURE; case(INPUT_SUCCESS): puts("Lecture de l'instance : OK"); timer = chrono_new(); srand(time(NULL)); puts("Algorithme : START"); chrono_start(timer); sol = find_solution(data, nb_mutations); chrono_stop(timer); puts("Algorithme : STOP"); solution_set_cpu_time(sol, chrono_get_time(timer)); write_status = write_solution(&sol, output_name); solution_delete(sol); instance_delete(data); chrono_delete(timer); switch(write_status) { case(OUTPUT_ERROR): printf("Erreur : impossible d'ouvrir ou de creer le fichier \"%s\"\n", output_name); return EXIT_FAILURE; case(OUTPUT_SUCCESS): puts("Ecriture de la solution : OK"); break; // return EXIT_SUCCESS; } } return EXIT_SUCCESS; }