示例#1
0
文件: idnconv.c 项目: 274914765/C
static int decode_file (idn_resconf_t conf1, idn_resconf_t conf2, FILE * fp, int flags)
{
    idn_result_t r;

    idnconv_strbuf_t buf1, buf2;

    idn_action_t actions1, actions2;

    int nl_trimmed;

    int local_ace_hack, idn_ace_hack;

    idn_converter_t conv;

    /*
     * See if the input codeset is an ACE.
     */
    conv = idn_resconf_getidnconverter (conf1);
    if (conv != NULL && idn_converter_isasciicompatible (conv) && (flags & FLAG_SELECTIVE))
        idn_ace_hack = 1;
    else
        idn_ace_hack = 0;
    if (conv != NULL)
        idn_converter_destroy (conv);

    conv = idn_resconf_getlocalconverter (conf1);
    if (conv != NULL && idn_converter_isasciicompatible (conv) && (flags & FLAG_SELECTIVE))
        local_ace_hack = 1;
    else
        local_ace_hack = 0;
    if (conv != NULL)
        idn_converter_destroy (conv);

    actions1 = IDN_IDNCONV;

    if (local_ace_hack)
    {
        actions2 = IDN_IDNCONV;
        if (flags & FLAG_MAP)
            actions2 |= IDN_MAP;
        if (flags & FLAG_NORMALIZE)
            actions2 |= IDN_NORMALIZE;
        if (flags & FLAG_PROHIBITCHECK)
            actions2 |= IDN_PROHCHECK;
        if (flags & FLAG_UNASSIGNCHECK)
            actions2 |= IDN_UNASCHECK;
        if (flags & FLAG_BIDICHECK)
            actions2 |= IDN_BIDICHECK;
        if (flags & FLAG_ASCIICHECK)
            actions2 |= IDN_ASCCHECK;
        if (flags & FLAG_LENGTHCHECK)
            actions2 |= IDN_LENCHECK;
    }
    else
    {
        actions2 = IDN_LOCALCONV;
    }

    if (flags & FLAG_DELIMMAP)
        actions1 |= IDN_DELIMMAP;
    if (flags & FLAG_MAP)
        actions1 |= IDN_MAP;
    if (flags & FLAG_NORMALIZE)
        actions1 |= IDN_NORMALIZE;
    if (flags & FLAG_NORMALIZE)
        actions1 |= IDN_NORMALIZE;
    if (flags & FLAG_PROHIBITCHECK)
        actions1 |= IDN_PROHCHECK;
    if (flags & FLAG_UNASSIGNCHECK)
        actions1 |= IDN_UNASCHECK;
    if (flags & FLAG_BIDICHECK)
        actions1 |= IDN_BIDICHECK;
    if (flags & FLAG_ASCIICHECK)
        actions1 |= IDN_ASCCHECK;
    if (flags & FLAG_ROUNDTRIPCHECK)
        actions1 |= IDN_RTCHECK;

    strbuf_init (&buf1);
    strbuf_init (&buf2);
    line_number = 1;
    while (strbuf_getline (&buf1, fp) != NULL)
    {
        /*
         * Trim newline at the end.  This is needed for
         * those ascii-comatible encodings such as UTF-5 or RACE
         * not to try converting newlines, which will result
         * in `invalid encoding' error.
         */
        nl_trimmed = trim_newline (&buf1);

        /*
         * Treat input line as the string encoded in local
         * encoding and convert it to UTF-8 encoded string.
         */
        if (local_ace_hack)
        {
            if (strbuf_copy (&buf2, strbuf_get (&buf1)) == NULL)
                r = idn_nomemory;
            else
                r = idn_success;
        }
        else
        {
            r = convert_line (&buf1, &buf2, conf1, IDN_LOCALCONV, 0);
        }
        if (r != idn_success)
        {
            errormsg ("conversion failed at line %d: %s\n", line_number, idn_result_tostring (r));
            goto error;
        }

        /*
         * Convert internationalized domain names in the line.
         */
        if (idn_ace_hack)
        {
            r = convert_line (&buf2, &buf1, conf1, actions1, FLAG_REVERSE | FLAG_SELECTIVE);
        }
        else
        {
            r = convert_line (&buf2, &buf1, conf1, actions1, FLAG_REVERSE);
        }
        if (r != idn_success)
        {
            errormsg ("conversion failed at line %d: %s\n", line_number, idn_result_tostring (r));
            goto error;
        }
        if (!idn_utf8_isvalidstring (strbuf_get (&buf1)))
        {
            errormsg ("conversion to utf-8 failed at line %d\n", line_number);
            goto error;
        }

        /*
         * Perform round trip check and convert to the output
         * codeset.
         */
        if (local_ace_hack)
        {
            r = convert_line (&buf1, &buf2, conf2, actions2, FLAG_SELECTIVE);
        }
        else
        {
            r = convert_line (&buf1, &buf2, conf1, actions2, FLAG_REVERSE);
        }

        if (r != idn_success)
        {
            errormsg ("error in nameprep or output conversion "
                      "at line %d: %s\n", line_number, idn_result_tostring (r));
            goto error;
        }

        fputs (strbuf_get (&buf2), stdout);
        if (nl_trimmed)
            putc ('\n', stdout);

        if (flush_every_line)
            fflush (stdout);

        line_number++;
    }
    strbuf_reset (&buf1);
    strbuf_reset (&buf2);
    return (0);

  error:
    strbuf_reset (&buf1);
    strbuf_reset (&buf2);
    return (1);
}
示例#2
0
文件: top.c 项目: NavamiK/C5.0
static void c50(char **namesv,
                char **datav,
                char **costv,
                int *subset,
                int *rules,
                int *utility,
                int *trials,
                int *winnow,
                double *sample,
                int *seed,
                int *noGlobalPruning,
                double *CF,
                int *minCases,
                int *fuzzyThreshold,
                int *earlyStopping,
                int *prunem,
                char **treev,
                char **rulesv,
                char **outputv)
{
    int val;  /* Used by setjmp/longjmp for implementing rbm_exit */

    // Announce ourselves for testing
    // Rprintf("c50 called\n");

    // Initialize the globals to the values that the c50
    // program would have at the start of execution
    initglobals();

    // Set globals based on the arguments.  This is analogous
    // to parsing the command line in the c50 program.
    setglobals(*subset, *rules, *utility, *trials, *prunem, *winnow, *sample,
               *seed, *noGlobalPruning, *CF, *minCases, *fuzzyThreshold,
               *earlyStopping, *costv);

    // Handles the strbufv data structure
    rbm_removeall();

    // Deallocates memory allocated by NewCase.
    // Not necessary since it's also called at the end of this function,
    // but it doesn't hurt, and I'm feeling paranoid.
    FreeCases();

    // XXX Should this be controlled via an option?
    // Rprintf("Calling setOf\n");
    setOf();

    // Create a strbuf using *namesv as the buffer.
    // Note that this is a readonly strbuf since we can't
    // extend *namesv.
    STRBUF *sb_names = strbuf_create_full(*namesv, strlen(*namesv));

    // Register this strbuf using the name "undefined.names"
	if (rbm_register(sb_names, "undefined.names", 0) < 0) {
		error("undefined.names already exists");
	}

    // Create a strbuf using *datav and register it as "undefined.data"
    STRBUF *sb_datav = strbuf_create_full(*datav, strlen(*datav));
    // XXX why is sb_datav copied? was that part of my debugging?
    // XXX or is this the cause of the leak?
	if (rbm_register(strbuf_copy(sb_datav), "undefined.data", 0) < 0) {
		error("undefined data already exists");
	}

    // Create a strbuf using *costv and register it as "undefined.costs"
    if (strlen(*costv) > 0) {
        // Rprintf("registering cost matrix: %s", *costv);
        STRBUF *sb_costv = strbuf_create_full(*costv, strlen(*costv));
        // XXX should sb_costv be copied?
	    if (rbm_register(sb_costv, "undefined.costs", 0) < 0) {
		    error("undefined.cost already exists");
	    }
    } else {
        // Rprintf("no cost matrix to register\n");
    }

    /*
     * We need to initialize rbm_buf before calling any code that
     * might call exit/rbm_exit.
     */
    if ((val = setjmp(rbm_buf)) == 0) {

        // Real work is done here
        // Rprintf("Calling c50main\n");
        c50main();

        // Rprintf("c50main finished\n");

        if (*rules == 0) {
            // Get the contents of the the tree file
            STRBUF *treebuf = rbm_lookup("undefined.tree");
            if (treebuf != NULL) {
                char *treeString = strbuf_getall(treebuf);
                char *treeObj = R_alloc(strlen(treeString) + 1, 1);
                strcpy(treeObj, treeString);

                // I think the previous value of *treev will be garbage collected
                *treev = treeObj;
            } else {
                // XXX Should *treev be assigned something in this case?
                // XXX Throw an error?
            }
        } else {
            // Get the contents of the the rules file
            STRBUF *rulesbuf = rbm_lookup("undefined.rules");
            if (rulesbuf != NULL) {
                char *rulesString = strbuf_getall(rulesbuf);
                char *rulesObj = R_alloc(strlen(rulesString) + 1, 1);
                strcpy(rulesObj, rulesString);

                // I think the previous value of *rulesv will be garbage collected
                *rulesv = rulesObj;
            } else {
                // XXX Should *rulesv be assigned something in this case?
                // XXX Throw an error?
            }
        }
    } else {
        Rprintf("c50 code called exit with value %d\n", val - JMP_OFFSET);
    }

    // Close file object "Of", and return its contents via argument outputv
    char *outputString = closeOf();
    char *output = R_alloc(strlen(outputString) + 1, 1);
    strcpy(output, outputString);
    *outputv = output;

    // Deallocates memory allocated by NewCase
    FreeCases();

    // We reinitialize the globals on exit out of general paranoia
    initglobals();
}
示例#3
0
void error_correct_list_of_files(StrBuf* list_fastq,char quality_cutoff, char ascii_qual_offset,
                                 dBGraphEc *db_graph, HandleLowQualUncorrectable policy,
                                 int max_read_len, int min_read_len, StrBuf* suffix, char* outdir,
                                 boolean add_greedy_bases_for_better_bwt_compression,
                                 int num_greedy_bases, boolean rev_comp_read_if_on_reverse_strand)

{
    printf("error correct list of files\n");
    fflush(stdout);
    int len = max_read_len+2;
    uint64_t* distrib_num_bases_corrected     =(uint64_t*) malloc(sizeof(uint64_t)*len);
    uint64_t* distrib_position_bases_corrected=(uint64_t*) malloc(sizeof(uint64_t)*len);
    if ( (distrib_num_bases_corrected==NULL)|| (distrib_position_bases_corrected==NULL))
    {
        die("Unable to alloc arrays for keeping stats. Your machine must have hardly any spare memory\n");
    }
    set_uint64_t_array(distrib_num_bases_corrected,      len, (uint64_t) 0);
    set_uint64_t_array(distrib_position_bases_corrected, len, (uint64_t) 0);

    FILE* list_fastq_fp = fopen(list_fastq->buff, "r");
    if (list_fastq_fp==NULL)
    {
        printf("Cannot open file %s\n", list_fastq->buff);
    }
    StrBuf *next_fastq     = strbuf_new();
    StrBuf* corrected_file = strbuf_new();
    StrBuf* corrected_file_newpath = strbuf_new();

    while(strbuf_reset_readline(next_fastq, list_fastq_fp))
    {
        strbuf_chomp(next_fastq);
        if(strbuf_len(next_fastq) > 0)
        {
            strbuf_reset(corrected_file);
            strbuf_reset(corrected_file_newpath);
            strbuf_copy(corrected_file, 0,//dest
                        next_fastq,0,strbuf_len(next_fastq));
            strbuf_append_str(corrected_file, suffix->buff);
            char* corrected_file_basename = basename(corrected_file->buff);
            strbuf_append_str(corrected_file_newpath, outdir);
            strbuf_append_str(corrected_file_newpath, corrected_file_basename);

            error_correct_file_against_graph(next_fastq->buff, quality_cutoff, ascii_qual_offset,
                                             db_graph, corrected_file_newpath->buff,
                                             distrib_num_bases_corrected,
                                             distrib_position_bases_corrected,
                                             len,
                                             min_read_len,
                                             policy,
                                             add_greedy_bases_for_better_bwt_compression,
                                             num_greedy_bases, rev_comp_read_if_on_reverse_strand);


        }
    }
    fclose(list_fastq_fp);
    strbuf_free(next_fastq);
    strbuf_free(corrected_file);
    strbuf_free(corrected_file_newpath);
    free(distrib_num_bases_corrected);
    free(distrib_position_bases_corrected);
}