/* Initially, align all string pairs greedily, i.e. e.g. <abcd, ax> => <abcd,ax00> */ void initial_align() { struct stringpair *pair; int inlen, outlen, i, j, k; for (pair = g_stringpairs; pair != NULL; pair = pair->next) { inlen = intseqlen(pair->in); outlen = intseqlen(pair->out); pair->inaligned = malloc(sizeof(int) * (inlen+outlen+1)); pair->outaligned = malloc(sizeof(int) * (inlen+outlen+1)); for (i = 0, j = 0, k = 0; pair->in[i] != -1 || pair->out[j] != -1; k++) { if (pair->in[i] == -1) { pair->inaligned[k] = 0; pair->outaligned[k] = pair->out[j]; j++; } else if (pair->out[j] == -1) { pair->inaligned[k] = pair->in[i]; pair->outaligned[k] = 0; i++; } else { pair->inaligned[k] = pair->in[i]; pair->outaligned[k] = pair->out[j]; i++; j++; } } pair->inaligned[k] = -1; pair->outaligned[k] = -1; add_counts(pair->inaligned, pair->outaligned); } }
void crp_train(int iterations, int burnin, int lag) { struct stringpair *sp; int i, j; for (i = 0; i < iterations; i++) { fprintf(stderr,"Alignment iteration: %i\n", i); print_counts(); for (sp = g_stringpairs; sp != NULL; sp = sp->next) { remove_counts(sp->inaligned, sp->outaligned); /* Remove counts before aligning */ fill_trellis(sp->in, sp->out, &cost_crp, MATRIX_MODE_GS); for (j = 0; g_in_result[j] != -1; j++) { sp->inaligned[j] = g_in_result[j]; sp->outaligned[j] = g_out_result[j]; } sp->inaligned[j] = -1; sp->outaligned[j] = -1; add_counts(sp->inaligned, sp->outaligned); /* Add counts back from new alignment */ } if (i > burnin && i % lag == 0) { add_global_counts(); } } }
/*----------------------------------------------------------------------*/ int main(int argc, char **argv) { /* MBIO status variables */ int status = MB_SUCCESS; int error = MB_ERROR_NO_ERROR; /* MBIO read control parameters */ int read_datalist = MB_NO; void *datalist; int look_processed = MB_DATALIST_LOOK_UNSET; double file_weight; mb_path ifile; /* MBIO read values */ int read_data; /* counting variables */ counts filerecs; counts totrecs; /* processing variables */ options opts; mbdefaults mbdflts; /* set default options */ default_options(&opts); /* mb_mem_debug_on(opts.verbose, &error); */ /* get mbsystem default values */ status = mb_defaults(opts.verbose, &(mbdflts.format), &(mbdflts.pings_get), &(mbdflts.lonflip), mbdflts.bounds, mbdflts.btime_i, mbdflts.etime_i, &(mbdflts.speedmin), &(mbdflts.timegap)); if (status == MB_SUCCESS) { parse_options(opts.verbose, argc, argv, &opts, &error); } if (opts.errflg) { fprintf(stderr, "usage: %s\n", usage_message); fprintf(stderr, "\nProgram <%s> Terminated\n", program_name); error = MB_ERROR_BAD_USAGE; exit(error); } /* print starting debug statements */ if (opts.verbose >= 1) { print_mbdefaults(opts.verbose, &opts, &mbdflts, &error); } /* if help desired then print it and exit */ if (opts.help) { fprintf(stderr, "\nProgram %s\n", program_name); fprintf(stderr, "Version %s\n", rcs_id); fprintf(stderr, "MB-system Version %s\n", MB_VERSION); fprintf(stderr, "\nusage: %s\n", usage_message); fprintf(stderr, "\n%s\n", help_message); exit(error); } /* get format if required */ if (opts.format == 0) { mb_get_format(opts.verbose, opts.read_file, NULL, &(opts.format), &error); } /* determine whether to read one file or a list of files */ if (opts.format < 0) { read_datalist = MB_YES; } /* open file list */ if (read_datalist == MB_YES) { if ((status = mb_datalist_open(opts.verbose, &datalist, opts.read_file, look_processed, &error)) != MB_SUCCESS) { char message[MAX_ERROR_STRING]; sprintf(message, "Unable to open data list file: %s\n", opts.read_file); error_exit(opts.verbose, MB_ERROR_OPEN_FAIL, "mb_datalist_open", message); } if ((status = mb_datalist_read(opts.verbose, datalist, ifile, &(opts.format), &file_weight, &error)) == MB_SUCCESS) { read_data = MB_YES; } else { read_data = MB_NO; } } /* else copy single filename to be read */ else { strcpy(ifile, opts.read_file); read_data = MB_YES; } /* reset total record counter */ zero_counts(opts.verbose, &totrecs, &error); /* loop over files to be read */ while (read_data == MB_YES) { /* reset file record counter */ zero_counts(opts.verbose, &filerecs, &error); /* process the output files */ if (status == MB_SUCCESS) { status = process_output(opts.verbose, &mbdflts, &opts, ifile, &filerecs, &error); } /* output counts */ filerecs.files_read++; if (opts.verbose >= 1) { fprintf(stdout, "\nData records read from: %s\n", ifile); print_counts(opts.verbose, &filerecs, &error); } /* add this file's counts to total */ add_counts(opts.verbose, &totrecs, &filerecs, &error); /* figure out whether and what to read next */ if (read_datalist == MB_YES) { if ((status = mb_datalist_read(opts.verbose, datalist, ifile, &(opts.format), &file_weight, &error)) == MB_SUCCESS) { read_data = MB_YES; } else { read_data = MB_NO; } } else { read_data = MB_NO; } } /* end loop over files in list */ /* output counts */ if (opts.verbose >= 1) { fprintf(stdout, "\nTotal data records read:\n"); print_counts(opts.verbose, &totrecs, &error); } if (read_datalist == MB_YES) { mb_datalist_close(opts.verbose, &datalist, &error); } /* check memory */ status = mb_memory_list(opts.verbose, &error); /* mb_mem_debug_off(opts.verbose, &error); */ return (status); } /* main */