/* rdr_load: * Read from the given file a reader saved previously with rdr_save. The given * reader must be empty, comming fresh from rdr_new. Be carefull that this * function performs almost no checks on the input data, so if you modify the * reader and make a mistake, it will probably result in a crash. */ void rdr_load(rdr_t *rdr, FILE *file) { const char *err = "broken file, invalid reader format"; int autouni = rdr->autouni; fpos_t pos; fgetpos(file, &pos); if (fscanf(file, "#rdr#%"PRIu32"/%"PRIu32"/%d\n", &rdr->npats, &rdr->ntoks, &autouni) != 3) { // This for compatibility with previous file format fsetpos(file, &pos); if (fscanf(file, "#rdr#%"PRIu32"/%"PRIu32"\n", &rdr->npats, &rdr->ntoks) != 2) { fatal(err);} } rdr->autouni = autouni; rdr->nuni = rdr->nbi = 0; if (rdr->npats != 0) { rdr->pats = xmalloc(sizeof(pat_t *) * rdr->npats); for (uint32_t p = 0; p < rdr->npats; p++) { char *pat = ns_readstr(file); rdr->pats[p] = pat_comp(pat); switch (tolower(pat[0])) { case 'u': rdr->nuni++; break; case 'b': rdr->nbi++; break; case '*': rdr->nuni++; rdr->nbi++; break; } } } qrk_load(rdr->lbl, file); qrk_load(rdr->obs, file); }
static void dotrain(mdl_t *mdl) { // Check if the user requested the type or trainer list. If this is not // the case, search them in the lists. if (!strcmp(mdl->opt->type, "list")) { info("Available types of models:\n"); for (uint32_t i = 0; i < typ_cnt; i++) info("\t%s\n", typ_lst[i]); exit(EXIT_SUCCESS); } if (!strcmp(mdl->opt->algo, "list")) { info("Available training algorithms:\n"); for (uint32_t i = 0; i < trn_cnt; i++) info("\t%s\n", trn_lst[i].name); exit(EXIT_SUCCESS); } uint32_t typ, trn; for (typ = 0; typ < typ_cnt; typ++) if (!strcmp(mdl->opt->type, typ_lst[typ])) break; if (typ == typ_cnt) fatal("unknown model type '%s'", mdl->opt->type); mdl->type = typ; for (trn = 0; trn < trn_cnt; trn++) if (!strcmp(mdl->opt->algo, trn_lst[trn].name)) break; if (trn == trn_cnt) fatal("unknown algorithm '%s'", mdl->opt->algo); // Load a previous model to train again if specified by the user. if (mdl->opt->model != NULL) { info("* Load previous model\n"); FILE *file = fopen(mdl->opt->model, "r"); if (file == NULL) pfatal("cannot open input model file"); mdl_load(mdl, file); } // Load the pattern file. This will unlock the database if previously // locked by loading a model. if (mdl->opt->pattern != NULL) { info("* Load patterns\n"); FILE *file = fopen(mdl->opt->pattern, "r"); if (file == NULL) pfatal("cannot open pattern file"); rdr_loadpat(mdl->reader, file); fclose(file); qrk_lock(mdl->reader->obs, false); } // if feature file is specified, load feature to mdl->reader->obs // obs will be locked after feature is load if (mdl->opt->feature_file != NULL) { info("* Load features\n"); FILE *file = fopen(mdl->opt->feature_file, "r"); if (file == NULL) pfatal("cannot open feature file"); qrk_load(mdl->reader->obs, file); fclose(file); qrk_lock(mdl->reader->obs, true); } // Load the training data. When this is done we lock the quarks as we // don't want to put in the model, informations present only in the // devlopment set. info("* Load training data\n"); FILE *file = stdin; if (mdl->opt->input != NULL) { file = fopen(mdl->opt->input, "r"); if (file == NULL) pfatal("cannot open input data file"); } mdl->train = rdr_readdat(mdl->reader, file, true); if (mdl->opt->input != NULL) fclose(file); qrk_lock(mdl->reader->lbl, true); qrk_lock(mdl->reader->obs, true); if (mdl->train == NULL || mdl->train->nseq == 0) fatal("no train data loaded"); // If present, load the development set in the model. If not specified, // the training dataset will be used instead. if (mdl->opt->devel != NULL) { info("* Load development data\n"); FILE *file = fopen(mdl->opt->devel, "r"); if (file == NULL) pfatal("cannot open development file"); mdl->devel = rdr_readdat(mdl->reader, file, true); fclose(file); } // Initialize the model. If a previous model was loaded, this will be // just a resync, else the model structure will be created. if (mdl->theta == NULL) info("* Initialize the model\n"); else info("* Resync the model\n"); mdl_sync(mdl); // Display some statistics as we all love this. info("* Summary\n"); info(" nb train: %"PRIu32"\n", mdl->train->nseq); if (mdl->devel != NULL) info(" nb devel: %"PRIu32"\n", mdl->devel->nseq); info(" nb labels: %"PRIu32"\n", mdl->nlbl); info(" nb blocks: %"PRIu64"\n", mdl->nobs); info(" nb features: %"PRIu64"\n", mdl->nftr); // And train the model... info("* Train the model with %s\n", mdl->opt->algo); uit_setup(mdl); trn_lst[trn].train(mdl); uit_cleanup(mdl); // If requested compact the model. if (mdl->opt->compact) { const uint64_t O = mdl->nobs; const uint64_t F = mdl->nftr; info("* Compacting the model\n"); mdl_compact(mdl); info(" %8"PRIu64" observations removed\n", O - mdl->nobs); info(" %8"PRIu64" features removed\n", F - mdl->nftr); } // And save the trained model info("* Save the model\n"); file = stdout; if (mdl->opt->output != NULL) { file = fopen(mdl->opt->output, "w"); if (file == NULL) pfatal("cannot open output model"); } mdl_save(mdl, file); if (mdl->opt->output != NULL) fclose(file); info("* Done\n"); }