static int decode_file (idn_resconf_t conf1, idn_resconf_t conf2, FILE * fp, int flags) { idn_result_t r; idnconv_strbuf_t buf1, buf2; idn_action_t actions1, actions2; int nl_trimmed; int local_ace_hack, idn_ace_hack; idn_converter_t conv; /* * See if the input codeset is an ACE. */ conv = idn_resconf_getidnconverter (conf1); if (conv != NULL && idn_converter_isasciicompatible (conv) && (flags & FLAG_SELECTIVE)) idn_ace_hack = 1; else idn_ace_hack = 0; if (conv != NULL) idn_converter_destroy (conv); conv = idn_resconf_getlocalconverter (conf1); if (conv != NULL && idn_converter_isasciicompatible (conv) && (flags & FLAG_SELECTIVE)) local_ace_hack = 1; else local_ace_hack = 0; if (conv != NULL) idn_converter_destroy (conv); actions1 = IDN_IDNCONV; if (local_ace_hack) { actions2 = IDN_IDNCONV; if (flags & FLAG_MAP) actions2 |= IDN_MAP; if (flags & FLAG_NORMALIZE) actions2 |= IDN_NORMALIZE; if (flags & FLAG_PROHIBITCHECK) actions2 |= IDN_PROHCHECK; if (flags & FLAG_UNASSIGNCHECK) actions2 |= IDN_UNASCHECK; if (flags & FLAG_BIDICHECK) actions2 |= IDN_BIDICHECK; if (flags & FLAG_ASCIICHECK) actions2 |= IDN_ASCCHECK; if (flags & FLAG_LENGTHCHECK) actions2 |= IDN_LENCHECK; } else { actions2 = IDN_LOCALCONV; } if (flags & FLAG_DELIMMAP) actions1 |= IDN_DELIMMAP; if (flags & FLAG_MAP) actions1 |= IDN_MAP; if (flags & FLAG_NORMALIZE) actions1 |= IDN_NORMALIZE; if (flags & FLAG_NORMALIZE) actions1 |= IDN_NORMALIZE; if (flags & FLAG_PROHIBITCHECK) actions1 |= IDN_PROHCHECK; if (flags & FLAG_UNASSIGNCHECK) actions1 |= IDN_UNASCHECK; if (flags & FLAG_BIDICHECK) actions1 |= IDN_BIDICHECK; if (flags & FLAG_ASCIICHECK) actions1 |= IDN_ASCCHECK; if (flags & FLAG_ROUNDTRIPCHECK) actions1 |= IDN_RTCHECK; strbuf_init (&buf1); strbuf_init (&buf2); line_number = 1; while (strbuf_getline (&buf1, fp) != NULL) { /* * Trim newline at the end. This is needed for * those ascii-comatible encodings such as UTF-5 or RACE * not to try converting newlines, which will result * in `invalid encoding' error. */ nl_trimmed = trim_newline (&buf1); /* * Treat input line as the string encoded in local * encoding and convert it to UTF-8 encoded string. */ if (local_ace_hack) { if (strbuf_copy (&buf2, strbuf_get (&buf1)) == NULL) r = idn_nomemory; else r = idn_success; } else { r = convert_line (&buf1, &buf2, conf1, IDN_LOCALCONV, 0); } if (r != idn_success) { errormsg ("conversion failed at line %d: %s\n", line_number, idn_result_tostring (r)); goto error; } /* * Convert internationalized domain names in the line. */ if (idn_ace_hack) { r = convert_line (&buf2, &buf1, conf1, actions1, FLAG_REVERSE | FLAG_SELECTIVE); } else { r = convert_line (&buf2, &buf1, conf1, actions1, FLAG_REVERSE); } if (r != idn_success) { errormsg ("conversion failed at line %d: %s\n", line_number, idn_result_tostring (r)); goto error; } if (!idn_utf8_isvalidstring (strbuf_get (&buf1))) { errormsg ("conversion to utf-8 failed at line %d\n", line_number); goto error; } /* * Perform round trip check and convert to the output * codeset. */ if (local_ace_hack) { r = convert_line (&buf1, &buf2, conf2, actions2, FLAG_SELECTIVE); } else { r = convert_line (&buf1, &buf2, conf1, actions2, FLAG_REVERSE); } if (r != idn_success) { errormsg ("error in nameprep or output conversion " "at line %d: %s\n", line_number, idn_result_tostring (r)); goto error; } fputs (strbuf_get (&buf2), stdout); if (nl_trimmed) putc ('\n', stdout); if (flush_every_line) fflush (stdout); line_number++; } strbuf_reset (&buf1); strbuf_reset (&buf2); return (0); error: strbuf_reset (&buf1); strbuf_reset (&buf2); return (1); }
static void c50(char **namesv, char **datav, char **costv, int *subset, int *rules, int *utility, int *trials, int *winnow, double *sample, int *seed, int *noGlobalPruning, double *CF, int *minCases, int *fuzzyThreshold, int *earlyStopping, int *prunem, char **treev, char **rulesv, char **outputv) { int val; /* Used by setjmp/longjmp for implementing rbm_exit */ // Announce ourselves for testing // Rprintf("c50 called\n"); // Initialize the globals to the values that the c50 // program would have at the start of execution initglobals(); // Set globals based on the arguments. This is analogous // to parsing the command line in the c50 program. setglobals(*subset, *rules, *utility, *trials, *prunem, *winnow, *sample, *seed, *noGlobalPruning, *CF, *minCases, *fuzzyThreshold, *earlyStopping, *costv); // Handles the strbufv data structure rbm_removeall(); // Deallocates memory allocated by NewCase. // Not necessary since it's also called at the end of this function, // but it doesn't hurt, and I'm feeling paranoid. FreeCases(); // XXX Should this be controlled via an option? // Rprintf("Calling setOf\n"); setOf(); // Create a strbuf using *namesv as the buffer. // Note that this is a readonly strbuf since we can't // extend *namesv. STRBUF *sb_names = strbuf_create_full(*namesv, strlen(*namesv)); // Register this strbuf using the name "undefined.names" if (rbm_register(sb_names, "undefined.names", 0) < 0) { error("undefined.names already exists"); } // Create a strbuf using *datav and register it as "undefined.data" STRBUF *sb_datav = strbuf_create_full(*datav, strlen(*datav)); // XXX why is sb_datav copied? was that part of my debugging? // XXX or is this the cause of the leak? if (rbm_register(strbuf_copy(sb_datav), "undefined.data", 0) < 0) { error("undefined data already exists"); } // Create a strbuf using *costv and register it as "undefined.costs" if (strlen(*costv) > 0) { // Rprintf("registering cost matrix: %s", *costv); STRBUF *sb_costv = strbuf_create_full(*costv, strlen(*costv)); // XXX should sb_costv be copied? if (rbm_register(sb_costv, "undefined.costs", 0) < 0) { error("undefined.cost already exists"); } } else { // Rprintf("no cost matrix to register\n"); } /* * We need to initialize rbm_buf before calling any code that * might call exit/rbm_exit. */ if ((val = setjmp(rbm_buf)) == 0) { // Real work is done here // Rprintf("Calling c50main\n"); c50main(); // Rprintf("c50main finished\n"); if (*rules == 0) { // Get the contents of the the tree file STRBUF *treebuf = rbm_lookup("undefined.tree"); if (treebuf != NULL) { char *treeString = strbuf_getall(treebuf); char *treeObj = R_alloc(strlen(treeString) + 1, 1); strcpy(treeObj, treeString); // I think the previous value of *treev will be garbage collected *treev = treeObj; } else { // XXX Should *treev be assigned something in this case? // XXX Throw an error? } } else { // Get the contents of the the rules file STRBUF *rulesbuf = rbm_lookup("undefined.rules"); if (rulesbuf != NULL) { char *rulesString = strbuf_getall(rulesbuf); char *rulesObj = R_alloc(strlen(rulesString) + 1, 1); strcpy(rulesObj, rulesString); // I think the previous value of *rulesv will be garbage collected *rulesv = rulesObj; } else { // XXX Should *rulesv be assigned something in this case? // XXX Throw an error? } } } else { Rprintf("c50 code called exit with value %d\n", val - JMP_OFFSET); } // Close file object "Of", and return its contents via argument outputv char *outputString = closeOf(); char *output = R_alloc(strlen(outputString) + 1, 1); strcpy(output, outputString); *outputv = output; // Deallocates memory allocated by NewCase FreeCases(); // We reinitialize the globals on exit out of general paranoia initglobals(); }
void error_correct_list_of_files(StrBuf* list_fastq,char quality_cutoff, char ascii_qual_offset, dBGraphEc *db_graph, HandleLowQualUncorrectable policy, int max_read_len, int min_read_len, StrBuf* suffix, char* outdir, boolean add_greedy_bases_for_better_bwt_compression, int num_greedy_bases, boolean rev_comp_read_if_on_reverse_strand) { printf("error correct list of files\n"); fflush(stdout); int len = max_read_len+2; uint64_t* distrib_num_bases_corrected =(uint64_t*) malloc(sizeof(uint64_t)*len); uint64_t* distrib_position_bases_corrected=(uint64_t*) malloc(sizeof(uint64_t)*len); if ( (distrib_num_bases_corrected==NULL)|| (distrib_position_bases_corrected==NULL)) { die("Unable to alloc arrays for keeping stats. Your machine must have hardly any spare memory\n"); } set_uint64_t_array(distrib_num_bases_corrected, len, (uint64_t) 0); set_uint64_t_array(distrib_position_bases_corrected, len, (uint64_t) 0); FILE* list_fastq_fp = fopen(list_fastq->buff, "r"); if (list_fastq_fp==NULL) { printf("Cannot open file %s\n", list_fastq->buff); } StrBuf *next_fastq = strbuf_new(); StrBuf* corrected_file = strbuf_new(); StrBuf* corrected_file_newpath = strbuf_new(); while(strbuf_reset_readline(next_fastq, list_fastq_fp)) { strbuf_chomp(next_fastq); if(strbuf_len(next_fastq) > 0) { strbuf_reset(corrected_file); strbuf_reset(corrected_file_newpath); strbuf_copy(corrected_file, 0,//dest next_fastq,0,strbuf_len(next_fastq)); strbuf_append_str(corrected_file, suffix->buff); char* corrected_file_basename = basename(corrected_file->buff); strbuf_append_str(corrected_file_newpath, outdir); strbuf_append_str(corrected_file_newpath, corrected_file_basename); error_correct_file_against_graph(next_fastq->buff, quality_cutoff, ascii_qual_offset, db_graph, corrected_file_newpath->buff, distrib_num_bases_corrected, distrib_position_bases_corrected, len, min_read_len, policy, add_greedy_bases_for_better_bwt_compression, num_greedy_bases, rev_comp_read_if_on_reverse_strand); } } fclose(list_fastq_fp); strbuf_free(next_fastq); strbuf_free(corrected_file); strbuf_free(corrected_file_newpath); free(distrib_num_bases_corrected); free(distrib_position_bases_corrected); }