int sff_sort_main(int argc, char *argv[]) { int c; sff_file_t *fp_in, *fp_out; while((c = getopt(argc, argv, "h")) >= 0) { switch(c) { case 'h': default: return usage(); } } if(argc != 1 + optind) { return usage(); } else { fp_in = sff_fopen(argv[optind], "rbi", NULL, NULL); fp_out = sff_fdopen(fileno(stdout), "wbi", fp_in->header, fp_in->index); sff_sort(fp_in, fp_out); sff_fclose(fp_in); sff_fclose(fp_out); } return 0; }
int sff_view_main(int argc, char *argv[]) { int i, c; sff_file_t *sff_file_in=NULL, *sff_file_out=NULL; sff_iter_t *sff_iter = NULL; sff_t *sff = NULL; char *fn_names = NULL; char **names = NULL; int32_t names_num = 0, names_mem = 0; int32_t out_mode, min_row, max_row, min_col, max_col; out_mode = 0; min_row = max_row = min_col = max_col = -1; while((c = getopt(argc, argv, "r:c:R:bqh")) >= 0) { switch(c) { case 'r': if(ion_parse_range(optarg, &min_row, &max_row) < 0) { ion_error(__func__, "-r : format not recognized", Exit, OutOfRange); } break; case 'c': if(ion_parse_range(optarg, &min_col, &max_col) < 0) { ion_error(__func__, "-c : format not recognized", Exit, OutOfRange); } break; case 'R': free(fn_names); fn_names = strdup(optarg); break; case 'q': out_mode |= 1; break; case 'b': out_mode |= 2; break; case 'h': default: return usage(); } } if(argc != 1+optind) { return usage(); } else { sff_header_t *header = NULL; if(3 == out_mode) { ion_error(__func__, "options -b and -q cannot be used together", Exit, CommandLineArgument); } // open the input SFF if(-1 != min_row || -1 != max_row || -1 != min_col || -1 != max_col) { sff_file_in = sff_fopen(argv[optind], "rbi", NULL, NULL); } else { sff_file_in = sff_fopen(argv[optind], "rb", NULL, NULL); } header = sff_header_clone(sff_file_in->header); /* copy header, but update n_reads if using index or names */ // read in the names if(NULL != fn_names) { FILE *fp = NULL; char name[1024]="\0"; // lets hope we don't exceed this length names_num = names_mem = 0; names = NULL; if(!(fp = fopen(fn_names, "rb"))) { fprintf(stderr, "** Could not open %s for reading. **\n", fn_names); ion_error(__func__, fn_names, Exit, OpenFileError); } while(EOF != fscanf(fp, "%s", name)) { while(names_num == names_mem) { if(0 == names_mem) names_mem = 4; else names_mem *= 2; names = ion_realloc(names, sizeof(char*) * names_mem, __func__, "names"); } names[names_num] = strdup(name); if(NULL == names[names_num]) { ion_error(__func__, name, Exit, MallocMemory); } names_num++; } names = ion_realloc(names, sizeof(char*) * names_num, __func__, "names"); fclose(fp); header->n_reads = names_num; } else { // if using index, then iterate once through the index to count the entries // so we can set the count correctly in the header if (-1 != min_row || -1 != max_row || -1 != min_col || -1 != max_col) { int entries = 0; sff_iter = sff_iter_query(sff_file_in, min_row, max_row, min_col, max_col); while (NULL != (sff = sff_iter_read(sff_file_in, sff_iter))) entries++; header->n_reads = entries; /* reset sff_iter */ sff_iter_destroy(sff_iter); sff_iter = sff_iter_query(sff_file_in, min_row, max_row, min_col, max_col); } } // print the header switch(out_mode) { case 0: sff_header_print(stdout, header); break; case 1: // do nothing: FASTQ break; case 2: sff_file_out = sff_fdopen(fileno(stdout), "wb", header, NULL); break; } while(1) { int32_t to_print = 1; if(-1 != min_row || -1 != max_row || -1 != min_col || -1 != max_col) { if(NULL == (sff = sff_iter_read(sff_file_in, sff_iter))) { break; } } else { if(NULL == (sff = sff_read(sff_file_in))) { break; } } if(0 < names_mem) { to_print = 0; for(i=0;i<names_num;i++) { if(0 == strcmp(names[i], sff_name(sff))) { to_print = 1; break; } } // shift down if(1 == to_print) { // i < names_num free(names[i]); names[i] = NULL; for(;i<names_num-1;i++) { names[i] = names[i+1]; names[i+1] = NULL; } names_num--; } } if(1 == to_print) { switch(out_mode) { case 0: sff_print(stdout, sff); break; case 1: if(fprintf(stdout, "@%s\n%s\n+\n", sff->rheader->name->s, sff->read->bases->s + sff->gheader->key_length) < 0) { ion_error(__func__, "stdout", Exit, WriteFileError); } for(i=sff->gheader->key_length;i<sff->read->quality->l;i++) { if(fputc(QUAL2CHAR(sff->read->quality->s[i]), stdout) < 0) { ion_error(__func__, "stdout", Exit, WriteFileError); } } if(fputc('\n', stdout) < 0) { ion_error(__func__, "stdout", Exit, WriteFileError); } break; case 2: sff_write(sff_file_out, sff); break; } } sff_destroy(sff); } sff_fclose(sff_file_in); if(2 == out_mode) { sff_fclose(sff_file_out); } if(-1 != min_row || -1 != max_row || -1 != min_col || -1 != max_col) { sff_iter_destroy(sff_iter); } if(0 != names_num) { fprintf(stderr, "** Did not find all the reads with (-R). **\n"); ion_error(__func__, fn_names, Exit, OutOfRange); } sff_header_destroy(header); } if(NULL != names && 0 < names_num) { free(names); } free(fn_names); return 0; }
int main(int argc, char *argv[]) { char *fastqFileName = NULL; char *sffFileName = NULL; bool forceClip = false; bool keyPass = false; bool allReads = false; int readCol = -1; int readRow = -1; bool findRead = false; int row, col; int numKeypassedReads = 0; int qual_offset = DEFAULT_QUAL_OFFSET; bool legacyFASTQName = false; // enable if you want r10|c100 format name in fastq file bool debug = false; bool legacyReadName = false; bool adapterTrim = true; bool ignoreLeftQualTrim = false; // process command-line args int argcc = 1; while (argcc < argc) { if (argv[argcc][0] == '-') { switch (argv[argcc][1]) { case 'a': // output all reads allReads = true; break; case 'R': // report read at row & column argcc++; readRow = atoi(argv[argcc]); break; case 'C': // report read at row & column argcc++; readCol = atoi(argv[argcc]); break; case 'q': // convert to fastq argcc++; fastqFileName = argv[argcc]; break; case 'c': // force qual clip left to 5 forceClip = true; break; case 's': // Offset to apply to quality scores argcc++; qual_offset = atoi(argv[argcc]); if(qual_offset==0) { fprintf (stderr, "-s option should specify a nonzero quality offset\n"); exit (1); } break; case 'k': // force keypass keyPass = true; argcc++; hackkey = argv[argcc]; hackkeylen = strlen(hackkey); break; case 'L': // don't record name of read in comment legacyFASTQName = true; break; case 'd': // enable debug print outs debug = true; break; case 'h': // help info printHelp (); exit (0); break; case 'u': // prevent read clipping adapterTrim = false; break; case 'b': // ignore barcodes (ok really its ignoring the left qual trim) ignoreLeftQualTrim = true; break; case 'v': // version info fprintf (stdout, "%s", IonVersion::GetFullVersion("SFFRead").c_str()); exit (0); break; default: //sffFileName = argv[argcc]; break; } } else { sffFileName = argv[argcc]; } argcc++; } if (!sffFileName) { printHelp(); exit(0); } if (readCol > -1 && readRow > -1) { findRead = true; allReads = true;// makes it search all reads } sff_file_t* sff_file_in = NULL; sff_file_in = sff_fopen(sffFileName, "rb", NULL, NULL); if (sff_file_in) { if (!findRead && !fastqFileName) { printf("Reading file: %s\n", sffFileName); sff_header_print(stdout, sff_file_in->header); } // -- read the reads int numReads; if (allReads) { numReads = sff_file_in->header->n_reads; } else { numReads = (sff_file_in->header->n_reads < 10 ? sff_file_in->header->n_reads:10); } FILE *fpq = NULL; if (fastqFileName) { numReads = sff_file_in->header->n_reads; fpq = fopen(fastqFileName, "w"); if (!fpq){ perror (fastqFileName); exit (1); } } for(int i=0;i<numReads;i++) { sff_read_header_t* rh = sff_read_header_read(sff_file_in->fp); sff_read_t* rr = sff_read_read(sff_file_in->fp, sff_file_in->header, rh); // optional - ignore the left & right adapter clipping by simply setting these values to 0 if (!adapterTrim) { rh->clip_adapter_left = 0; rh->clip_adapter_right = 0; } if (!fpq && !findRead) { printf("Read header length: %d\n", rh->rheader_length); printf("Read name length: %d\n", rh->name_length); } // Extract the row and column popsition info for this read if (1 != ion_readname_to_rowcol(rh->name->s, &row, &col)) { fprintf (stderr, "Error parsing read name: '%s'\n", rh->name->s); continue; } if(1 == ion_readname_legacy(rh->name->s)) { legacyReadName = true; } else { legacyReadName = false; } if (!fpq && !findRead) { printf("Read: %s (r%05d|c%05d) has %d bases\n", (rh->name_length > 0 ? rh->name->s : "NONAME"), row, col, rh->n_bases); printf("Clip left: %d qual: %d right: %d qual: %d\n", rh->clip_adapter_left, rh->clip_qual_left, rh->clip_adapter_right, rh->clip_qual_right); printf("Flowgram values:\n"); } if (findRead) { if (row == readRow && col == readCol) { //printf("Ionogram: "); int i; for(i=0;i<sff_file_in->header->flow_length;i++) { printf("%.2lf ", (double)(rr->flowgram[i])/100.0); } printf("\n"); //// now print the bases - all the bases, not clipped! //// these bases correspond to the raw flowgram data. in essence //for (int b=0;b<r.number_of_bases;b++) // fprintf(stdout, "%c", bases[b]); //fprintf(stdout, "\n"); } } else if (fpq) { bool ok = true; if (keyPass) { // if (r.number_of_bases > h.key_length) { if ((int)rh->n_bases > hackkeylen) { int b; // for(b=0;b<h.key_length;b++) { for(b=0;b<hackkeylen;b++) { // if (key_sequence[b] != bases[b]) { if (hackkey[b] != rr->bases->s[b]) { ok = false; break; } } } else ok = false; // not long enough } int clip_left_index = 0; int clip_right_index = 0; if (ok) { //numKeypassedReads++; // If force-clip option is set, we want to ensure the key gets trimmed if (forceClip && rh->clip_adapter_left < 4) rh->clip_adapter_left = hackkeylen+1; if (ignoreLeftQualTrim) clip_left_index = max (1, rh->clip_adapter_left); else clip_left_index = max (1, max (rh->clip_qual_left, rh->clip_adapter_left)); clip_right_index = min ((rh->clip_qual_right == 0 ? rh->n_bases:rh->clip_qual_right), (rh->clip_adapter_right == 0 ? rh->n_bases:rh->clip_adapter_right)); if (debug) fprintf (stdout, "debug clip: left = %d right = %d\n", clip_left_index, clip_right_index); numKeypassedReads++; if (clip_left_index > clip_right_index) // Suppress output of zero-mer reads (left > right) ok = false; } if (ok) { //print id string if (legacyFASTQName) { fprintf (fpq, "@r%d|c%d\n", row, col); } else { if (legacyReadName){ //Override legacy name char runId[6] = {'\0'}; strncpy (runId, &rh->name->s[7], 5); fprintf (fpq, "@%s:%d:%d\n", runId, row, col); } else { //Copy name verbatim fprintf (fpq, "@%s\n", rh->name->s); } } //print bases for (int b=clip_left_index-1;b<clip_right_index;b++) fprintf(fpq, "%c", rr->bases->s[b]); fprintf(fpq, "\n"); //print '+' fprintf(fpq, "+\n"); //print quality scores for (int b=clip_left_index-1;b<clip_right_index;b++) fprintf(fpq, "%c", QualToFastQ((int)(rr->quality->s[b]),qual_offset)); fprintf(fpq, "\n"); } } else { int f; for(f=0;f<sff_file_in->header->flow_length;f++) printf("%d ", rr->flowgram[f]); printf("\nFlow index per base:\n"); unsigned int b; for(b=0;b<rh->n_bases;b++) printf("%d ", rr->flow_index[b]); printf("\nBases called:\n"); for(b=0;b<rh->n_bases;b++) printf("%c", rr->bases->s[b]); printf("\nQuality scores:\n"); for(b=0;b<rh->n_bases;b++) printf("%d ", rr->quality->s[b]); printf("\nDone with this read\n\n"); } sff_read_header_destroy(rh); sff_read_destroy(rr); } // debug print - keypass reads written to the fastq file if (fpq) { static char *printkey = "All"; if (keyPass) printkey = hackkey; fprintf (stdout, "Keypass Reads(%s) = %d\n", printkey, numKeypassedReads); fprintf (stdout, "Total Reads = %d\n", numReads); fprintf (stdout, "Percentage = %.2f%%\n", ((float) numKeypassedReads/ (float) numReads) * 100.0); } sff_fclose(sff_file_in); if (fpq) fclose(fpq); } else { perror (sffFileName); exit (1); } return 0; }
int sff_index_create_main(int argc, char *argv[]) { int c; sff_file_t *fp_in, *fp_out; int32_t num_rows, num_cols, type; sff_header_t *fp_out_header; sff_index_t* index; sff_t *sff; num_rows = num_cols = -1; type = SFF_INDEX_ALL; while((c = getopt(argc, argv, "r:c:C:Rh")) >= 0) { switch(c) { case 'r': num_rows = atoi(optarg); break; case 'c': num_cols = atoi(optarg); break; case 'C': switch(atoi(optarg)) { case 0: num_rows = 1152; num_cols = 1280; break; case 1: num_rows = 2640; num_cols = 2736; break; case 2: num_rows = 3792; num_cols = 3392; break; default: break; } case 'R': type = SFF_INDEX_ROW_ONLY; break; case 'h': default: return usage(); } } if(argc != 1+optind) { return usage(); } else { // check cmd line args if(num_rows < 0) { ion_error(__func__, "-r must be specified and greater than zero", Exit, CommandLineArgument); } if(num_cols < 0) { ion_error(__func__, "-c must be specified and greater than zero", Exit, CommandLineArgument); } switch(type) { case SFF_INDEX_ROW_ONLY: case SFF_INDEX_ALL: break; default: ion_error(__func__, "bug encountered", Exit, OutOfRange); break; } fp_in = sff_fopen(argv[optind], "rb", NULL, NULL); fp_out_header = sff_header_clone(fp_in->header); index = sff_index_create(fp_in, fp_out_header, num_rows, num_cols, type); fp_out = sff_fdopen(fileno(stdout), "wbi", fp_out_header, index); // seek the input file to the beginning of the the entries, which is the same // location as where the index begins in the output file. if(0 != fseek(fp_in->fp, fp_out_header->index_offset, SEEK_SET)) { ion_error(__func__, "fseek", Exit, ReadFileError); } // write the sff entries while(NULL != (sff = sff_read(fp_in))) { sff_write(fp_out, sff); sff_destroy(sff); } // destroy the header. Don't destroy index, sff_fclose does that sff_header_destroy(fp_out_header); // sff_index_destroy(index); sff_fclose(fp_in); sff_fclose(fp_out); } return 0; }