예제 #1
0
파일: reader.c 프로젝트: FaizaGara/lima
/* rdr_load:
 *   Read from the given file a reader saved previously with rdr_save. The given
 *   reader must be empty, comming fresh from rdr_new. Be carefull that this
 *   function performs almost no checks on the input data, so if you modify the
 *   reader and make a mistake, it will probably result in a crash.
 */
void rdr_load(rdr_t *rdr, FILE *file) {
	const char *err = "broken file, invalid reader format";
	int autouni = rdr->autouni;
	fpos_t pos;
	fgetpos(file, &pos);
	if (fscanf(file, "#rdr#%"PRIu32"/%"PRIu32"/%d\n",
			&rdr->npats, &rdr->ntoks, &autouni) != 3) {
		// This for compatibility with previous file format
		fsetpos(file, &pos);
		if (fscanf(file, "#rdr#%"PRIu32"/%"PRIu32"\n",
			   &rdr->npats, &rdr->ntoks) != 2) {
		  fatal(err);}
	}
	rdr->autouni = autouni;
	rdr->nuni = rdr->nbi = 0;
	if (rdr->npats != 0) {
		rdr->pats = xmalloc(sizeof(pat_t *) * rdr->npats);
		for (uint32_t p = 0; p < rdr->npats; p++) {
			char *pat = ns_readstr(file);
			rdr->pats[p] = pat_comp(pat);
			switch (tolower(pat[0])) {
				case 'u': rdr->nuni++; break;
				case 'b': rdr->nbi++;  break;
				case '*': rdr->nuni++;
				          rdr->nbi++;  break;
			}
		}
	}
	qrk_load(rdr->lbl, file);
	qrk_load(rdr->obs, file);
}
예제 #2
0
파일: wapiti.c 프로젝트: yanqingmen/Wapiti
static void dotrain(mdl_t *mdl) {
    // Check if the user requested the type or trainer list. If this is not
    // the case, search them in the lists.
    if (!strcmp(mdl->opt->type, "list")) {
        info("Available types of models:\n");
        for (uint32_t i = 0; i < typ_cnt; i++)
            info("\t%s\n", typ_lst[i]);
        exit(EXIT_SUCCESS);
    }
    if (!strcmp(mdl->opt->algo, "list")) {
        info("Available training algorithms:\n");
        for (uint32_t i = 0; i < trn_cnt; i++)
            info("\t%s\n", trn_lst[i].name);
        exit(EXIT_SUCCESS);
    }
    uint32_t typ, trn;
    for (typ = 0; typ < typ_cnt; typ++)
        if (!strcmp(mdl->opt->type, typ_lst[typ]))
            break;
    if (typ == typ_cnt)
        fatal("unknown model type '%s'", mdl->opt->type);
    mdl->type = typ;
    for (trn = 0; trn < trn_cnt; trn++)
        if (!strcmp(mdl->opt->algo, trn_lst[trn].name))
            break;
    if (trn == trn_cnt)
        fatal("unknown algorithm '%s'", mdl->opt->algo);
    // Load a previous model to train again if specified by the user.
    if (mdl->opt->model != NULL) {
        info("* Load previous model\n");
        FILE *file = fopen(mdl->opt->model, "r");
        if (file == NULL)
            pfatal("cannot open input model file");
        mdl_load(mdl, file);
    }
    // Load the pattern file. This will unlock the database if previously
    // locked by loading a model.
    if (mdl->opt->pattern != NULL) {
        info("* Load patterns\n");
        FILE *file = fopen(mdl->opt->pattern, "r");
        if (file == NULL)
            pfatal("cannot open pattern file");
        rdr_loadpat(mdl->reader, file);
        fclose(file);
        qrk_lock(mdl->reader->obs, false);
    }

    // if feature file is specified, load feature to mdl->reader->obs
    // obs will be locked after feature is load
    if (mdl->opt->feature_file != NULL) {
        info("* Load features\n");
        FILE *file = fopen(mdl->opt->feature_file, "r");
        if (file == NULL)
            pfatal("cannot open feature file");
        qrk_load(mdl->reader->obs, file);
        fclose(file);
        qrk_lock(mdl->reader->obs, true);
    }


    // Load the training data. When this is done we lock the quarks as we
    // don't want to put in the model, informations present only in the
    // devlopment set.
    info("* Load training data\n");
    FILE *file = stdin;
    if (mdl->opt->input != NULL) {
        file = fopen(mdl->opt->input, "r");
        if (file == NULL)
            pfatal("cannot open input data file");
    }
    mdl->train = rdr_readdat(mdl->reader, file, true);
    if (mdl->opt->input != NULL)
        fclose(file);
    qrk_lock(mdl->reader->lbl, true);
    qrk_lock(mdl->reader->obs, true);
    if (mdl->train == NULL || mdl->train->nseq == 0)
        fatal("no train data loaded");
    // If present, load the development set in the model. If not specified,
    // the training dataset will be used instead.
    if (mdl->opt->devel != NULL) {
        info("* Load development data\n");
        FILE *file = fopen(mdl->opt->devel, "r");
        if (file == NULL)
            pfatal("cannot open development file");
        mdl->devel = rdr_readdat(mdl->reader, file, true);
        fclose(file);
    }
    // Initialize the model. If a previous model was loaded, this will be
    // just a resync, else the model structure will be created.
    if (mdl->theta == NULL)
        info("* Initialize the model\n");
    else
        info("* Resync the model\n");
    mdl_sync(mdl);
    // Display some statistics as we all love this.
    info("* Summary\n");
    info("    nb train:    %"PRIu32"\n", mdl->train->nseq);
    if (mdl->devel != NULL)
        info("    nb devel:    %"PRIu32"\n", mdl->devel->nseq);
    info("    nb labels:   %"PRIu32"\n", mdl->nlbl);
    info("    nb blocks:   %"PRIu64"\n", mdl->nobs);
    info("    nb features: %"PRIu64"\n", mdl->nftr);
    // And train the model...
    info("* Train the model with %s\n", mdl->opt->algo);
    uit_setup(mdl);
    trn_lst[trn].train(mdl);
    uit_cleanup(mdl);
    // If requested compact the model.
    if (mdl->opt->compact) {
        const uint64_t O = mdl->nobs;
        const uint64_t F = mdl->nftr;
        info("* Compacting the model\n");
        mdl_compact(mdl);
        info("    %8"PRIu64" observations removed\n", O - mdl->nobs);
        info("    %8"PRIu64" features removed\n", F - mdl->nftr);
    }
    // And save the trained model
    info("* Save the model\n");
    file = stdout;
    if (mdl->opt->output != NULL) {
        file = fopen(mdl->opt->output, "w");
        if (file == NULL)
            pfatal("cannot open output model");
    }
    mdl_save(mdl, file);
    if (mdl->opt->output != NULL)
        fclose(file);
    info("* Done\n");
}