Exemple #1
0
/*******************************************************************************
 * Labeling
 ******************************************************************************/
void dolabel(mdl_t *mdl) {
	// First, load the model provided by the user. This is mandatory to
	// label new datas ;-)
	if (mdl->opt->model == NULL)
		fatal("you must specify a model");
	info("* Load model\n");
	FILE *file = fopen(mdl->opt->model, "r");
	if (file == NULL)
		pfatal("cannot open input model file");
	mdl_load(mdl, file);
	// Open input and output files
	FILE *fin = stdin, *fout = stdout;
	if (mdl->opt->input != NULL) {
		fin = fopen(mdl->opt->input, "r");
		if (fin == NULL)
			pfatal("cannot open input data file");
	}
	if (mdl->opt->output != NULL) {
		fout = fopen(mdl->opt->output, "w");
		if (fout == NULL)
			pfatal("cannot open output data file");
	}
	// Do the labelling
	info("* Label sequences\n");
	tag_label(mdl, fin, fout);
	info("* Done\n");
	// And close files
	if (mdl->opt->input != NULL)
		fclose(fin);
	if (mdl->opt->output != NULL)
		fclose(fout);
}
Exemple #2
0
/*******************************************************************************
 * Dumping
 ******************************************************************************/
static void dodump(mdl_t *mdl) {
    // Load input model file
    info("* Load model\n");
    FILE *fin = stdin;
    if (mdl->opt->input != NULL) {
        fin = fopen(mdl->opt->input, "r");
        if (fin == NULL)
            pfatal("cannot open input data file");
    }
    mdl_load(mdl, fin);
    if (mdl->opt->input != NULL)
        fclose(fin);
    // Open output file
    FILE *fout = stdout;
    if (mdl->opt->output != NULL) {
        fout = fopen(mdl->opt->output, "w");
        if (fout == NULL)
            pfatal("cannot open output data file");
    }
    // Dump model
    info("* Dump model\n");
    const uint32_t Y = mdl->nlbl;
    const uint64_t O = mdl->nobs;
    const qrk_t *Qlbl = mdl->reader->lbl;
    const qrk_t *Qobs = mdl->reader->obs;
    char fmt[16];
    sprintf(fmt, "%%.%df\n", mdl->opt->prec);
    for (uint64_t o = 0; o < O; o++) {
        const char *obs = qrk_id2str(Qobs, o);
        bool empty = true;
        if (mdl->kind[o] & 1) {
            const double *w = mdl->theta + mdl->uoff[o];
            for (uint32_t y = 0; y < Y; y++) {
                if (!mdl->opt->all && w[y] == 0.0)
                    continue;
                const char *ly = qrk_id2str(Qlbl, y);
                fprintf(fout, "%s\t#\t%s\t", obs, ly);
                fprintf(fout, fmt, w[y]);
                empty = false;
            }
        }
        if (mdl->kind[o] & 2) {
            const double *w = mdl->theta + mdl->boff[o];
            for (uint32_t d = 0; d < Y * Y; d++) {
                if (!mdl->opt->all && w[d] == 0.0)
                    continue;
                const char *ly  = qrk_id2str(Qlbl, d % Y);
                const char *lyp = qrk_id2str(Qlbl, d / Y);
                fprintf(fout, "%s\t%s\t%s\t", obs, lyp, ly);
                fprintf(fout, fmt, w[d]);
                empty = false;
            }
        }
        if (!empty)
            fprintf(fout, "\n");
    }
    if (mdl->opt->output != NULL)
        fclose(fout);
}
Exemple #3
0
void dotrain(mdl_t *mdl) {
	// Check if the user requested the trainer list. If this is not the
	// case, search the trainer.
	if (!strcmp(mdl->opt->algo, "list")) {
		info("Available training algorithms:\n");
		for (int i = 0; i < trn_cnt; i++)
			info("\t%s\n", trn_lst[i].name);
		exit(EXIT_SUCCESS);
	}
	int trn;
	for (trn = 0; trn < trn_cnt; trn++)
		if (!strcmp(mdl->opt->algo, trn_lst[trn].name))
			break;
	if (trn == trn_cnt)
		fatal("unknown algorithm '%s'", mdl->opt->algo);
	// Load a previous model to train again if specified by the user.
	if (mdl->opt->model != NULL) {
		info("* Load previous model\n");
		FILE *file = fopen(mdl->opt->model, "r");
		if (file == NULL)
			pfatal("cannot open input model file");
		mdl_load(mdl, file);
	}
	// Load the pattern file. This will unlock the database if previously
	// locked by loading a model.
	if (mdl->opt->pattern != NULL) {
		info("* Load patterns\n");
		FILE *file = fopen(mdl->opt->pattern, "r");
		if (file == NULL)
			pfatal("cannot open pattern file");
		rdr_loadpat(mdl->reader, file);
		fclose(file);
		qrk_lock(mdl->reader->obs, false);
	}
	// Load the training data. When this is done we lock the quarks as we
	// don't want to put in the model, informations present only in the
	// devlopment set.
	info("* Load training data\n");
	FILE *file = stdin;
	if (mdl->opt->input != NULL) {
		file = fopen(mdl->opt->input, "r");
		if (file == NULL)
			pfatal("cannot open input data file");
	}
	mdl->train = rdr_readdat(mdl->reader, file, true);
	if (mdl->opt->input != NULL)
		fclose(file);
	qrk_lock(mdl->reader->lbl, true);
	qrk_lock(mdl->reader->obs, true);
	if (mdl->train == NULL || mdl->train->nseq == 0)
		fatal("no train data loaded");
	// If present, load the development set in the model. If not specified,
	// the training dataset will be used instead.
	if (mdl->opt->devel != NULL) {
		info("* Load development data\n");
		FILE *file = fopen(mdl->opt->devel, "r");
		if (file == NULL)
			pfatal("cannot open development file");
		mdl->devel = rdr_readdat(mdl->reader, file, true);
		fclose(file);
	}
	// Initialize the model. If a previous model was loaded, this will be
	// just a resync, else the model structure will be created.
	if (mdl->theta == NULL)
		info("* Initialize the model\n");
	else
		info("* Resync the model\n");
	mdl_sync(mdl);
	// Display some statistics as we all love this.
	info("* Summary\n");
	info("    nb train:    %d\n", mdl->train->nseq);
	if (mdl->devel != NULL)
		info("    nb devel:    %d\n", mdl->devel->nseq);
	info("    nb labels:   %zu\n", mdl->nlbl);
	info("    nb blocks:   %zu\n", mdl->nobs);
	info("    nb features: %zu\n", mdl->nftr);
	// And train the model...
	info("* Train the model with %s\n", mdl->opt->algo);
	uit_setup(mdl);
	trn_lst[trn].train(mdl);
	uit_cleanup(mdl);
	// If requested compact the model.
	if (mdl->opt->compact) {
		const size_t O = mdl->nobs;
		const size_t F = mdl->nftr;
		info("* Compacting the model\n");
		mdl_compact(mdl);
		info("    %8zu observations removed\n", O - mdl->nobs);
		info("    %8zu features removed\n", F - mdl->nftr);
	}
	// And save the trained model
	info("* Save the model\n");
	file = stdout;
	if (mdl->opt->output != NULL) {
		file = fopen(mdl->opt->output, "w");
		if (file == NULL)
			pfatal("cannot open output model");
	}
	mdl_save(mdl, file);
	if (mdl->opt->output != NULL)
		fclose(file);
	info("* Done\n");
}
Exemple #4
0
/*******************************************************************************
 * Updating
 ******************************************************************************/
static void doupdt(mdl_t *mdl) {
    // Load input model file
    info("* Load model\n");
    if (mdl->opt->model == NULL)
        fatal("no model file provided");
    FILE *Min = fopen(mdl->opt->model, "r");
    if (Min == NULL)
        pfatal("cannot open model file %s", mdl->opt->model);
    mdl_load(mdl, Min);
    fclose(Min);
    // Open patch file
    info("* Update model\n");
    FILE *fin = stdin;
    if (mdl->opt->input != NULL) {
        fin = fopen(mdl->opt->input, "r");
        if (fin == NULL)
            pfatal("cannot open update file");
    }
    int nline = 0;
    while (!feof(fin)) {
        char *raw = rdr_readline(fin);
        if (raw == NULL)
            break;
        char *line = raw;
        nline++;
        // First we split the line in space separated tokens. We expect
        // four of them and skip empty lines.
        char *toks[4];
        int ntoks = 0;
        while (ntoks < 4) {
            while (isspace(*line))
                line++;
            if (*line == '\0')
                break;
            toks[ntoks++] = line;
            while (*line != '\0' && !isspace(*line))
                line++;
            if (*line == '\0')
                break;
            *line++ = '\0';
        }
        if (ntoks == 0) {
            free(raw);
            continue;
        } else if (ntoks != 4) {
            fatal("invalid line at %d", nline);
        }
        // Parse the tokens, the first three should be string maping to
        // observations and labels and the last should be the weight.
        uint64_t obs = none, yp = none, y = none;
        obs = qrk_str2id(mdl->reader->obs, toks[0]);
        if (obs == none)
            fatal("bad on observation on line %d", nline);
        if (strcmp(toks[1], "#")) {
            yp = qrk_str2id(mdl->reader->lbl, toks[1]);
            if (yp == none)
                fatal("bad label <%s> line %d", toks[1], nline);
        }
        y = qrk_str2id(mdl->reader->lbl, toks[2]);
        if (y == none)
            fatal("bad label <%s> line %d", toks[2], nline);
        double wgh = 0.0;
        if (sscanf(toks[3], "%lf", &wgh) != 1)
            fatal("bad weight on line %d", nline);

        const uint32_t Y = mdl->nlbl;
        if (yp == none) {
            double *w = mdl->theta + mdl->uoff[obs];
            w[y] = wgh;
        } else {
            double *w = mdl->theta + mdl->boff[obs];
            w[yp * Y + y] = wgh;
        }
        free(raw);
    }
    if (mdl->opt->input != NULL)
        fclose(fin);
    // If requested compact the model.
    if (mdl->opt->compact) {
        const uint64_t O = mdl->nobs;
        const uint64_t F = mdl->nftr;
        info("* Compacting the model\n");
        mdl_compact(mdl);
        info("    %8"PRIu64" observations removed\n", O - mdl->nobs);
        info("    %8"PRIu64" features removed\n", F - mdl->nftr);
    }
    // And save the updated model
    info("* Save the model\n");
    FILE *file = stdout;
    if (mdl->opt->output != NULL) {
        file = fopen(mdl->opt->output, "w");
        if (file == NULL)
            pfatal("cannot open output model");
    }
    mdl_save(mdl, file);
    if (mdl->opt->output != NULL)
        fclose(file);
    info("* Done\n");
}