void DatFile::GetExperimentHeader(struct experiment_header &hdr) { char* f = this->GetFileData(sizeof(struct dat_file_header), sizeof(struct experiment_header)); memcpy ( &hdr, f, sizeof ( hdr ) ); ByteSwap4(hdr.first_frame_time); ByteSwap2( hdr.rows ); ByteSwap2 ( hdr.cols ); ByteSwap2( hdr.x_region_size ); ByteSwap2( hdr.y_region_size ); ByteSwap2( hdr.frames_in_file ); ByteSwap2( hdr.uncomp_frames_in_file ); ByteSwap2( hdr.interlaceType ); if(hdr.uncomp_frames_in_file < hdr.frames_in_file || hdr.uncomp_frames_in_file >= hdr.frames_in_file * 4) { std::cout << "Unknown compression" << std::endl; exit(55); } }
int main(int argc, char *argv[]) { FILE *inSFF = NULL; FILE *outSFF = NULL; FILE *listFP = NULL; int n = 0; //number elements read char *inFileName = NULL; char *outFileName = NULL; char *listFileName = NULL; char *errFileName = {"./SFFFilter_err.txt"}; int numReads; int matchCnt = 0; int got = 0; bool debugflag = false; bool qualflag = false; int qual_offset = 33; bool listMatch = false; char name[256]; uint16_t *flowgram_values; // [NUMBER_OF_FLOWS_PER_READ]; uint8_t *flow_index_per_base; // * number_of_bases; char *bases; // * number_of_bases; uint8_t *quality_scores; // * number_of_bases; char *flow_chars; char *key_sequence; // Parse command line arguments int argcc = 1; while (argcc < argc) { if (argv[argcc][0] == '-') { switch (argv[argcc][1]) { case 'd': // print debug info debugflag = true; break; case 'q': // print debug info qualflag = true; break; case 'f': // list of locations to filter argcc++; listFileName = strdup (argv[argcc]); break; case 's': // Offset to apply to quality scores argcc++; qual_offset = atoi(argv[argcc]); if(qual_offset==0) { fprintf (stderr, "-s option should specify a nonzero quality offset\n"); exit (1); } break; case 'o': // output file name argcc++; outFileName = strdup(argv[argcc]); break; default: fprintf (stderr, "Unknown option %s\n", argv[argcc]); exit (1); break; } } else { inFileName = argv[argcc]; } argcc++; } if (!inFileName) { fprintf (stdout, "No input sff file specified\n"); fprintf (stdout, "Usage: %s [-f filename] [-d] sff-filename\n", argv[0]); fprintf (stdout, "\t-f Specify input file list.\n"); fprintf (stdout, "\t-o Specify output sff file name.\n"); fprintf (stdout, "\t-d Prints debug information.\n"); fprintf (stdout, "\t-q Take qualities from 4th field of file specified by -f.\n"); fprintf (stdout, "\t-s To use in conjunction with -q option, specifies an offset to be applied to quality scores.\n"); exit (1); } if (!listFileName) { fprintf (stdout, "No input list file specified\n"); fprintf (stdout, "Usage: %s [-f filename] [-d] sff-filename\n", argv[0]); fprintf (stdout, "\t-f Specify input file list.\n"); fprintf (stdout, "\t-o Specify output sff file name.\n"); fprintf (stdout, "\t-d Prints debug information.\n"); fprintf (stdout, "\t-q Take qualities from 4th field of file specified by -f.\n"); fprintf (stdout, "\t-s To use in conjunction with -q option, specifies an offset to be applied to quality scores.\n"); exit (1); } //Create output filename from input filename if it wasn't specified if(outFileName==NULL) { outFileName = (char *) malloc (sizeof(char) * (strlen(dirname(inFileName)) + strlen(inFileName) + 50)); sprintf (outFileName, "%s/filtered_%s", dirname(inFileName), inFileName); } //Open the SFF file inSFF = fopen(inFileName, "rb"); if (!inSFF) { perror (inFileName); exit (1); } //Open the outputSFF file outSFF = fopen(outFileName, "wb"); if (!outSFF) { perror (outFileName); exit (1); } //Open the list file listFP = fopen(listFileName, "rb"); if (!listFP) { perror (listFileName); exit (1); } //Read the list of locations into buffer got = GetNumLines(listFileName); if (got <= 0) { fprintf (stderr, "Did not read any pixel coordinates; does the file exist? Is it formatted correctly?\n"); exit (1); } else { fprintf (stdout, "Reading up to %d lines\n", got); } //Dynamic array allocation int *rows = (int *) malloc (sizeof(int) * got); int *cols = (int *) malloc (sizeof(int) * got); int *lengths = (int *) malloc (sizeof(int) * got); char **quals = (char **) malloc (sizeof(char*) * got); bool *fnds = (bool *) malloc (sizeof(bool) * got); //tracks reads that were found in SFF file for (int i=0;i<got;i++) { fnds[i] = false; quals[i] = (char *) malloc (sizeof(char) * MAX_BASES); } int lineCnt = 0; while (!feof(listFP)) { if(qualflag) { if(4 != fscanf (listFP, "%d %d %d %s\n", &rows[lineCnt], &cols[lineCnt], &lengths[lineCnt], quals[lineCnt])) { fprintf(stderr,"%s: bad format in line %d of %s - expected 3 ints and a char string.\n",argv[0],1+lineCnt,inFileName); exit(EXIT_FAILURE); } else if(strlen(quals[lineCnt]) < (unsigned int) lengths[lineCnt]) { fprintf(stderr,"%s: warning: line %d of %s - quality string is shorter than requested length.\n",argv[0],1+lineCnt,inFileName); } lineCnt++; } else { if(3 != fscanf (listFP, "%d %d %d\n", &rows[lineCnt], &cols[lineCnt], &lengths[lineCnt])) { fprintf(stderr,"%s: bad format in line %d of %s - expected 3 ints.\n",argv[0],1+lineCnt,inFileName); exit(EXIT_FAILURE); } else { lineCnt++; } } } fclose (listFP); // Read the input file header CommonHeader h; n = fread(&h, 31, 1, inSFF); assert(n==1); //Copy the header to write the output file CommonHeader ch_out; ch_out.magic_number = h.magic_number; ch_out.version[0] = 0; ch_out.version[1] = 0; ch_out.version[2] = 0; ch_out.version[3] = 1; ch_out.index_offset = h.index_offset; ch_out.index_length = h.index_length; ch_out.number_of_reads = h.number_of_reads; ch_out.header_length = h.header_length; ch_out.key_length = h.key_length; ch_out.number_of_flows_per_read = h.number_of_flows_per_read; ch_out.flowgram_format_code = h.flowgram_format_code; ByteSwap8(h.index_offset); ByteSwap4(h.index_length); ByteSwap4(h.number_of_reads); ByteSwap2(h.header_length); ByteSwap2(h.key_length); ByteSwap2(h.number_of_flows_per_read); flow_chars = (char *)malloc(h.number_of_flows_per_read); key_sequence = (char *)malloc(h.key_length); n = fread(flow_chars, h.number_of_flows_per_read, 1, inSFF); assert(n==1); n = fread(key_sequence, h.key_length, 1, inSFF); assert(n==1); int padBytes = (8-((31 + h.number_of_flows_per_read + h.key_length) & 7)); char padData[8]; if (padBytes > 0) { n = fread(padData, padBytes, 1, inSFF); assert(n==1); } if (debugflag) { //DEBUG printf("Magic: %u %s\n", h.magic_number, (h.magic_number == MAGIC ? "Yes" : "No")); printf("Header length: %hu\n", h.header_length); printf("Version: %d%d%d%d\n", h.version[0], h.version[1], h.version[2], h.version[3]); printf("Index offset: %lu length: %u\n", h.index_offset, h.index_length); printf("Number of reads: %u\n", h.number_of_reads); printf("Key length: %u\n", h.key_length); printf("Flows per read: %hu\n", h.number_of_flows_per_read); printf("Flowgram format: %hhu\n", h.flowgram_format_code); printf ("End of Header\n\n"); } // Write the header of the output SFF char pad[8]; memset(pad, 0, sizeof(pad)); int bytes = 31; fwrite (&ch_out, bytes, 1, outSFF); for(int i=0;i<h.number_of_flows_per_read;i++) { fwrite(&flow_chars[i%4], 1, 1, outSFF); bytes++; } fwrite(key_sequence, 1, 4, outSFF); bytes += 4; padBytes = (8 - (bytes & 0x7)) & 0x7; if (padBytes > 0) fwrite(pad, padBytes, 1, outSFF); // Prepare to process all the reads numReads = h.number_of_reads; flowgram_values = (uint16_t *)malloc(sizeof(uint16_t) * h.number_of_flows_per_read); int maxBases = h.number_of_flows_per_read * 100; // problems if ever a 10-mer hits every flow! flow_index_per_base = (uint8_t *)malloc(sizeof(uint8_t) * maxBases); bases = (char *)malloc(maxBases); quality_scores = (uint8_t *)malloc(sizeof(uint8_t) * maxBases); //Loop thru the reads for (int i=0;i<numReads;i++) { // Read read header ReadHeader r; n = fread(&r, 16, 1, inSFF); assert(n==1); ByteSwap2(r.read_header_length); ByteSwap4(r.number_of_bases); ByteSwap2(r.name_length); ByteSwap2(r.clip_qual_left); ByteSwap2(r.clip_qual_right); ByteSwap2(r.clip_adapter_left); ByteSwap2(r.clip_adapter_right); if (r.name_length > 0) { n = fread(name, r.name_length, 1, inSFF); assert(n==1); name[r.name_length] = '\0'; } int readPadLength = ((8 - ((16 + r.name_length) & 7)))%8; if (readPadLength > 0) { n = fread(padData, readPadLength, 1, inSFF); assert(n==1); } n = fread(flowgram_values, h.number_of_flows_per_read, sizeof(uint16_t), inSFF); assert(n==sizeof(uint16_t)); n = fread(flow_index_per_base, r.number_of_bases, sizeof(uint8_t), inSFF); assert(n==sizeof(uint8_t)); n = fread(bases, r.number_of_bases, 1, inSFF); assert(n==1); bases[r.number_of_bases] = '\0'; n = fread(quality_scores, r.number_of_bases, sizeof(uint8_t), inSFF); assert(n==sizeof(uint8_t)); int bytesRead = h.number_of_flows_per_read * sizeof(uint16_t) + 3 * r.number_of_bases; readPadLength = (8 - (bytesRead & 7))%8; if (readPadLength > 0) { n = fread(padData, readPadLength, 1, inSFF); assert(n==1); } int f; if (debugflag) { // DEBUG printf("Read: %s has %d bases\n", (r.name_length > 0 ? name : "NONAME"), r.number_of_bases); //printf("Read header length: %d\n", r.read_header_length); printf("Clip left: %d qual: %d right: %d qual: %d\n", r.clip_adapter_left, r.clip_qual_left, r.clip_adapter_right, r.clip_qual_right); printf("Flowgram bases:\n"); for(f=0;f<h.number_of_flows_per_read;f++) printf("%d ", (int) floor (ByteSwap2(flowgram_values[f])/100.0 + 0.5)); printf("\n"); //printf("\nFlow index per base:\n"); unsigned int b; //for(b=0;b<r.number_of_bases;b++) // printf("%d ", flow_index_per_base[b]); printf("Bases called:\n"); for(b=0;b<r.number_of_bases;b++) printf("%c", bases[b]); //printf("\nQuality scores:\n"); //for(b=0;b<r.number_of_bases;b++) // printf("%d ", quality_scores[b]); } else { for(f=0;f<h.number_of_flows_per_read;f++) ByteSwap2(flowgram_values[f]); } //Get the row column for this read int row; int col; if(1 != ion_readname_to_rowcol(name, &row, &col)) { fprintf (stderr, "Error parsing read name: '%s'\n", name); continue; } //Look for matching row column in the list listMatch = false; //fprintf (stdout, "Looking for %d %d\n", row, col); int readMatch=0; for (;readMatch<got;readMatch++) { if (row == rows[readMatch] && col == cols[readMatch]) { //fprintf (stdout, "\there it is %d %d\n", rows[readMatch],cols[readMatch]); listMatch = true; fnds[readMatch] = true; matchCnt++; break; } } if (listMatch) { // // Update the output file // int nameLen = r.name_length; int numBasesCalled = r.number_of_bases; if(r.clip_qual_right == 0 || r.clip_qual_right > lengths[readMatch]) r.clip_qual_right = lengths[readMatch]; // write the header ByteSwap2(r.read_header_length); ByteSwap4(r.number_of_bases); ByteSwap2(r.name_length); ByteSwap2(r.clip_qual_left); ByteSwap2(r.clip_qual_right); ByteSwap2(r.clip_adapter_left); ByteSwap2(r.clip_adapter_right); fwrite (&r, 16, 1, outSFF); fwrite(name, nameLen, 1, outSFF); int writePadLength = (8 - (nameLen & 7)) & 7; if (writePadLength) fwrite(padData, writePadLength, 1, outSFF); if(qualflag) { for(int iBase=0; iBase < lengths[readMatch]; iBase++) { quality_scores[iBase] = (uint8_t) quals[readMatch][iBase] + qual_offset; } } for(int iBase=lengths[readMatch]; iBase < numBasesCalled; iBase++) { flow_index_per_base[iBase] = 0; bases[iBase] = 'N'; quality_scores[iBase] = 0; } for (f=0;f<h.number_of_flows_per_read;f++) ByteSwap2(flowgram_values[f]); fwrite(flowgram_values, h.number_of_flows_per_read, sizeof(uint16_t), outSFF); fwrite(flow_index_per_base, numBasesCalled, sizeof(uint8_t), outSFF); fwrite(bases, numBasesCalled, 1, outSFF); fwrite(quality_scores, numBasesCalled, sizeof(uint8_t), outSFF); int bytesWritten = h.number_of_flows_per_read * sizeof(uint16_t) + 3 * numBasesCalled; writePadLength = (8 - (bytesWritten & 7)) & 7; if (writePadLength) fwrite(padData, writePadLength, 1, outSFF); } else { //Skip this read } } //Update Read Count in output SFF file ch_out.number_of_reads = BYTE_SWAP_4(matchCnt); fseek (outSFF, 0, SEEK_SET); bytes=31; fwrite(&ch_out, bytes, 1, outSFF); //User message fprintf (stdout, "Created file: %s\n", outFileName); // //Write out report on unfound reads // bool printErrorLog = false; for (int i=0;i<got;i++) { if (fnds[i] == false) { printErrorLog = true; break; } } if (printErrorLog) { fprintf (stdout, "There are reads that were not found. See %s\n", errFileName); FILE *fpErr = fopen (errFileName, "wb"); if (fpErr) { fprintf (fpErr, "# SFF file: %s\n", inFileName); fprintf (fpErr, "# Read positions source: %s\n", listFileName); fprintf (fpErr, "# Reads not found in SFF:\n"); fprintf (fpErr, "# Row Column\n"); for (int i=0;i<got;i++) { if (fnds[i] == false) { fprintf (fpErr, "%d %d\n", rows[i], cols[i]); } } fclose (fpErr); } } //Cleanup fclose (inSFF); fclose (outSFF); free (rows); free (cols); free (fnds); free (flow_chars); free (key_sequence); free (flowgram_values); free (flow_index_per_base); free (bases); free (quality_scores); free (listFileName); free (outFileName); return 0; }
int main(int argc, char *argv[]) { FILE *inSFF = NULL; FILE *outSFF = NULL; int n =0; // number elements read char *inFileName = NULL; char outFileName[512] = {"\0"}; int numReads; //int keyFlows = 8; //int adapterFlows = 0; bool debugflag = false; char *primer = NULL; double fom = 0.85; unsigned int sPos = 4; // start searching after 4 base key unsigned int endPos = 7; // end searching st 3rd base after key char name[256]; uint16_t *flowgram_values; // [NUMBER_OF_FLOWS_PER_READ]; uint8_t *flow_index_per_base; // * number_of_bases; char *bases; // * number_of_bases; uint8_t *quality_scores; // * number_of_bases; char *flow_chars; char *key_sequence; // Parse command line arguments int argcc = 1; while (argcc < argc) { if (argv[argcc][0] == '-') { switch (argv[argcc][1]) { case 's': // define barcode string argcc++; primer = strdup (argv[argcc]); break; case 'd': // print debug info debugflag = true; break; case 'f': // acceptance threshold argcc++; fom = atof (argv[argcc]); break; } } else { inFileName = argv[argcc]; } argcc++; } if (!inFileName) { fprintf (stdout, "No input file specified\n"); fprintf (stdout, "Usage: %s [-b barcode][-f #][-k #][-d]\n", argv[0]); fprintf (stdout, "\t-s Specify barcode string (CTTCCTTC).\n"); fprintf (stdout, "\t-f Specify acceptance threshold (0.85).\n"); fprintf (stdout, "\t-d Prints debug information.\n"); exit (1); } // No barcode passed in from command line so set up a default if (primer == NULL) { primer = strdup ("CTTCCTTC"); } //Create output filename from input filename snprintf (outFileName, 512, "%s/AT_%s", dirname(inFileName), inFileName); //Open the SFF file inSFF = fopen(inFileName, "rb"); //Open the outputSFF file outSFF = fopen(outFileName, "wb"); // Read the input file header CommonHeader h; n = fread(&h, 31, 1, inSFF); assert(n == 1); //Copy the header to write the output file CommonHeader ch_out; ch_out.magic_number = h.magic_number; ch_out.version[0] = 0; ch_out.version[1] = 0; ch_out.version[2] = 0; ch_out.version[3] = 1; ch_out.index_offset = h.index_offset; ch_out.index_length = h.index_length; ch_out.number_of_reads = h.number_of_reads; ch_out.header_length = h.header_length; ch_out.key_length = h.key_length; ch_out.number_of_flows_per_read = h.number_of_flows_per_read; ByteSwap8(h.index_offset); ByteSwap4(h.index_length); ByteSwap4(h.number_of_reads); ByteSwap2(h.header_length); ByteSwap2(h.key_length); ByteSwap2(h.number_of_flows_per_read); flow_chars = (char *)malloc(h.number_of_flows_per_read); key_sequence = (char *)malloc(h.key_length); n = fread(flow_chars, h.number_of_flows_per_read, 1, inSFF); assert(n == 1); n = fread(key_sequence, h.key_length, 1, inSFF); assert(n == 1); int padBytes = (8-((31 + h.number_of_flows_per_read + h.key_length) & 7)); char padData[8]; n = fread(padData, padBytes, 1, inSFF); assert(n == 1); if (0) { //DEBUG printf("Magic: %u %s\n", h.magic_number, (h.magic_number == MAGIC ? "Yes" : "No")); printf("Header length: %hu\n", h.header_length); printf("Version: %d%d%d%d\n", h.version[0], h.version[1], h.version[2], h.version[3]); printf("Index offset: %lu length: %u\n", h.index_offset, h.index_length); printf("Number of reads: %u\n", h.number_of_reads); printf("Key length: %u\n", h.key_length); printf("Flows per read: %hu\n", h.number_of_flows_per_read); printf("Flowgram format: %hhu\n", h.flowgram_format_code); printf ("End of Header\n\n"); } // Write the header of the output SFF char pad[8]; memset(pad, 0, sizeof(pad)); int bytes = 31; fwrite (&ch_out, bytes, 1, outSFF); for(int i=0;i<h.number_of_flows_per_read;i++) { fwrite(&flow_chars[i%4], 1, 1, outSFF); bytes++; } fwrite(key_sequence, 1, 4, outSFF); bytes += 4; padBytes = (8 - (bytes & 0x7)) & 0x7; if (padBytes > 0) fwrite(pad, padBytes, 1, outSFF); // Prepare to process all the reads numReads = h.number_of_reads; // Statistics variables int numFoundSeq = 0; // number of reads with the search sequence int numPassFOM = 0; // number of reads above acceptance threshold int *corr_histo = (int *) malloc (sizeof(int) * (strlen(primer)+1)); for (unsigned int j=0;j<strlen(primer)+1;j++) corr_histo[j] = 0; flowgram_values = (uint16_t *)malloc(sizeof(uint16_t) * h.number_of_flows_per_read); int maxBases = h.number_of_flows_per_read * 100; // problems if ever a 10-mer hits every flow! flow_index_per_base = (uint8_t *)malloc(sizeof(uint8_t) * maxBases); bases = (char *)malloc(maxBases); quality_scores = (uint8_t *)malloc(sizeof(uint8_t) * maxBases); //Loop thru the reads for (int nr=0;nr<numReads;nr++) { // Read read header ReadHeader r; n = fread(&r, 16, 1, inSFF); assert(n == 1); ByteSwap2(r.read_header_length); ByteSwap4(r.number_of_bases); ByteSwap2(r.name_length); ByteSwap2(r.clip_qual_left); ByteSwap2(r.clip_qual_right); ByteSwap2(r.clip_adapter_left); ByteSwap2(r.clip_adapter_right); if (r.name_length > 0) { n = fread(name, r.name_length, 1, inSFF); assert(n == 1); name[r.name_length] = '\0'; } int readPadLength = ((8 - ((16 + r.name_length) & 7)))%8; if (readPadLength > 0) { n = fread(padData, readPadLength, 1, inSFF); assert(n == 1); } n = fread(flowgram_values, h.number_of_flows_per_read, sizeof(uint16_t), inSFF); assert(n == sizeof(uint16_t)); n = fread(flow_index_per_base, r.number_of_bases, sizeof(uint8_t), inSFF); assert(n == sizeof(uint8_t)); n = fread(bases, r.number_of_bases, 1, inSFF); assert(n == 1); bases[r.number_of_bases] = '\0'; n = fread(quality_scores, r.number_of_bases, sizeof(uint8_t), inSFF); assert(n == sizeof(uint8_t)); int bytesRead = h.number_of_flows_per_read * sizeof(uint16_t) + 3 * r.number_of_bases; readPadLength = (8 - (bytesRead & 7))%8; if (readPadLength > 0) { n = fread(padData, readPadLength, 1, inSFF); assert(n == 1); } int f; if (0) { // DEBUG printf("Read: %s has %d bases\n", (r.name_length > 0 ? name : "NONAME"), r.number_of_bases); //printf("Read header length: %d\n", r.read_header_length); printf("Clip left: %d qual: %d right: %d qual: %d\n", r.clip_adapter_left, r.clip_qual_left, r.clip_adapter_right, r.clip_qual_right); printf("Flowgram bases:\n"); for(f=0;f<h.number_of_flows_per_read;f++) printf("%d ", (int) floor (ByteSwap2(flowgram_values[f])/100.0 + 0.5)); printf("\n"); //printf("\nFlow index per base:\n"); unsigned int b; //for(b=0;b<r.number_of_bases;b++) // printf("%d ", flow_index_per_base[b]); printf("Bases called:\n"); for(b=0;b<r.number_of_bases;b++) printf("%c", bases[b]); //printf("\nQuality scores:\n"); //for(b=0;b<r.number_of_bases;b++) // printf("%d ", quality_scores[b]); } else { for(f=0;f<h.number_of_flows_per_read;f++) ByteSwap2(flowgram_values[f]); } /* This is trimming based on number of flows Assumes a perfect barcode * // Trim the key and the adapter int numFlowsToTrim = keyFlows + adapterFlows; // numBasesToTrim = basesInKey + basesInAdapter; int numBasesToTrim = 0; for (int j=0;j<numFlowsToTrim;j++) numBasesToTrim += (int) floor (flowgram_values[j]/100.0 + 0.5); //relies on debug print to byteswap! // TODO: make this generic for the final base of the key int k = 4; while (bases[k++] == 'G') numBasesToTrim -= 1; if (debugflag) printf("\nNumBases = %d\n", numBasesToTrim); r.clip_qual_left = numBasesToTrim + 1; //The essence of this tool. // End trimming based on number of flows */ /* Trimming based on finding the actual barcode 'near' the key */ // Loop thru starting positions starting at left going right int primerLen = strlen(primer); //sPos = 4; // start searching after 4 base key //endPos = 7; // end searching st 3rd base after key if (r.number_of_bases < endPos) continue; int correct = 0; double *correctness = (double *) malloc (sizeof(double) * (endPos - sPos)); unsigned int i; for (i=sPos;i<endPos;i++) { // Loop thru the flows for (int flow=0;flow<primerLen;flow++) { if (primer[flow] == bases[flow+i]) { correct++; } } correctness[i-sPos] = (double) correct;/// (double) primerLen; correct = 0; } double max; unsigned int matchingIndex = 0; for (i=sPos;i<endPos;i++) { if (i == sPos || correctness[i-sPos] > max) { max = correctness[i-sPos]; matchingIndex = i; } } corr_histo[(int)max]++; numFoundSeq++; if ((double)(max/primerLen) >= fom) { numPassFOM++; if (debugflag) { fprintf (stdout, "%s\n", name); fprintf (stdout, "Matching Index = %d (%0.2lf)\n", matchingIndex, (double)(max/primerLen)); for (i=0;i<matchingIndex;i++) fprintf (stdout, " "); fprintf (stdout, "%s\n%s\n", primer, bases); } matchingIndex = matchingIndex + primerLen; if (debugflag) fprintf (stdout, "Trim point is %d\n", matchingIndex+1); } else { matchingIndex = 4; } r.clip_qual_left = matchingIndex+1; /* End Trimming barcode near the key */ // // Update the output file // int nameLen = r.name_length; int numBasesCalled = r.number_of_bases; // write the header ByteSwap2(r.read_header_length); ByteSwap4(r.number_of_bases); ByteSwap2(r.name_length); ByteSwap2(r.clip_qual_left); ByteSwap2(r.clip_qual_right); ByteSwap2(r.clip_adapter_left); ByteSwap2(r.clip_adapter_right); fwrite (&r, 16, 1, outSFF); fwrite(name, nameLen, 1, outSFF); int writePadLength = (8 - (nameLen & 7)) & 7; if (writePadLength) fwrite(padData, writePadLength, 1, outSFF); for (f=0;f<h.number_of_flows_per_read;f++) ByteSwap2(flowgram_values[f]); fwrite(flowgram_values, h.number_of_flows_per_read, sizeof(uint16_t), outSFF); fwrite(flow_index_per_base, numBasesCalled, sizeof(uint8_t), outSFF); fwrite(bases, numBasesCalled, 1, outSFF); fwrite(quality_scores, numBasesCalled, sizeof(uint8_t), outSFF); int bytesWritten = h.number_of_flows_per_read * sizeof(uint16_t) + 3 * numBasesCalled; writePadLength = (8 - (bytesWritten & 7)) & 7; if (writePadLength) fwrite(padData, writePadLength, 1, outSFF); free (correctness); } // Print statistics to stdout fprintf (stdout, "\n=====================================================\n"); fprintf (stdout, "Barcode String: %s\n", primer); fprintf (stdout, "%15s: %10d\n", "Total Reads", numReads); fprintf (stdout, "%15s: %10d\n", "with Adapter", numFoundSeq); fprintf (stdout, "%15s: %10d\n", "passing FOM", numPassFOM); fprintf (stdout, "Acceptance threshold: %0.2lf%%\n", fom); fprintf (stdout, "Search indices: %d thru %d\n", sPos, endPos-1); for (int i = 0; i <= (int)strlen(primer);i++) { fprintf (stdout, "[%2d/%2d %6.1lf%%] %5d/%d %6.2lf%%\n", i, (int) strlen(primer), ((double)i/(double)strlen(primer))*100.0,corr_histo[i], numFoundSeq, ((double)corr_histo[i]/(double)numFoundSeq)*100.0); } //Cleanup fclose (inSFF); fclose (outSFF); free (flow_index_per_base); free (flowgram_values); free (bases); free (quality_scores); free (flow_chars); free (key_sequence); free (primer); free (corr_histo); return 0; }
int main(int argc, char *argv[]) { char *fastqFileName = NULL; char *sffFileName = NULL; bool forceClip = false; bool keyPass = false; bool allReads = false; int minReadLen = 8; // min read length after key-pass that we will write out to fastq file int readCol = -1; int readRow = -1; bool findRead = false; int row, col; // process command-line args int argcc = 1; while (argcc < argc) { if (argv[argcc][0] == '-') { switch (argv[argcc][1]) { case 'a': // output all reads allReads = true; break; case 'R': // report read at row & column argcc++; readRow = atoi(argv[argcc]); break; case 'C': // report read at row & column argcc++; readCol = atoi(argv[argcc]); break; case 'q': // convert to fastq argcc++; fastqFileName = argv[argcc]; break; case 'c': // force qual clip left to 5 forceClip = true; break; case 'k': // force keypass keyPass = true; argcc++; hackkey = argv[argcc]; hackkeylen = strlen(hackkey); break; case 'l': // set min readlength for fastq file output filter argcc++; minReadLen = atoi(argv[argcc]); break; default: //sffFileName = argv[argcc]; break; } } else { sffFileName = argv[argcc]; } argcc++; } if (!sffFileName) { printf("Usage: SFFRead [args] sffFile.sff\n"); exit(0); } if (readCol > -1 && readRow > -1) { findRead = true; allReads = true;// makes it search all reads } FILE *fp; fp = fopen(sffFileName, "r+"); if (fp) { if (!findRead && !fastqFileName) printf("Reading file: %s\n", sffFileName); CommonHeader h; // Fix the flow_format_code problem: make sure it is set to 1 fpos_t p, start; fgetpos (fp, &p); fgetpos (fp, &start); start = p; int elements_read = fread(&h, 31, 1, fp); assert(elements_read == 1); h.flowgram_format_code = 1; fsetpos (fp, &p); fwrite (&h, 31, 1, fp); fsetpos (fp, &p); elements_read = fread(&h, 31, 1, fp); assert(elements_read == 1); ByteSwap8(h.index_offset); ByteSwap4(h.index_length); ByteSwap4(h.number_of_reads); ByteSwap2(h.header_length); ByteSwap2(h.key_length); ByteSwap2(h.number_of_flows_per_read); if (!findRead && !fastqFileName) { printf("Magic: %u %s\n", h.magic_number, (h.magic_number == MAGIC ? "Yes" : "No")); printf("Version: %d%d%d%d\n", h.version[0], h.version[1], h.version[2], h.version[3]); printf("Index offset: %lu length: %u\n", h.index_offset, h.index_length); printf("Number of reads: %u\n", h.number_of_reads); printf("Header length: %hu\n", h.header_length); printf("Key length: %u\n", h.key_length); printf("Flows per read: %hu\n", h.number_of_flows_per_read); printf("Flowgram format: %hhu\n", h.flowgram_format_code); } flow_chars = (char *)malloc(h.number_of_flows_per_read); key_sequence = (char *)malloc(h.key_length); elements_read = fread(flow_chars, h.number_of_flows_per_read, 1, fp); assert(elements_read == 1); elements_read = fread(key_sequence, h.key_length, 1, fp); assert(elements_read == 1); int i; if (!findRead && !fastqFileName) { printf("Key sequence: "); for(i=0;i<h.key_length;i++) printf("%c", key_sequence[i]); printf("\nFlow chars:\n"); for(i=0;i<h.number_of_flows_per_read;i++) printf("%c", flow_chars[i]); printf("\n"); } int padBytes = (8-((31 + h.number_of_flows_per_read + h.key_length) & 7)); char padData[8]; // fprintf (stdout, "Pad Bytes = %d\n", padBytes); elements_read = fread(padData, padBytes, 1, fp); assert(elements_read == 1); fgetpos(fp, &p); // fprintf (stdout, "We are at %ld\n", (p.__pos - start.__pos)); // -- read the reads int numReads = h.number_of_reads; // pre-allocate space so we be fast flowgram_values = (uint16_t *)malloc(sizeof(uint16_t) * h.number_of_flows_per_read); int maxBases = h.number_of_flows_per_read * 10; // problems if ever a 10-mer hits every flow! flow_index_per_base = (uint8_t *)malloc(sizeof(uint8_t) * maxBases); bases = (char *)malloc(maxBases); quality_scores = (uint8_t *)malloc(sizeof(uint8_t) * maxBases); for(i=0;i<numReads;i++) { ReadHeader r; #define FIXIT #ifdef FIXIT fpos_t pos; // Get position ready-to-read header fgetpos (fp, &pos); // Read header elements_read = fread(&r, 16, 1, fp); assert(elements_read == 1); // byte swap ByteSwap2(r.read_header_length); ByteSwap2(r.name_length); // fprintf (stdout, "Old read header length = %d\n", r.read_header_length); // Fix the read_header_length // read_header_length is "16 + name_length" rounded up to nearest divisible by 8 r.read_header_length = 16 + r.name_length; r.read_header_length += (8 - (r.read_header_length & 0x7)) & 0x7; // fprintf (stdout, "New read header length = %d\n", r.read_header_length); // Byte swap ByteSwap2(r.read_header_length); ByteSwap2(r.name_length); // Rewind file pointer fsetpos (fp, &pos); // Write header out again fwrite (&r, 16, 1, fp); // Rewind again fsetpos (fp, &pos); #endif // Read it in and continue fpos_t readStart; fgetpos(fp, &readStart); elements_read = fread(&r, 16, 1, fp); assert(elements_read == 1); //ByteSwap2(r.read_header_length); //ByteSwap2(r.name_length); ByteSwap2(r.clip_qual_left); ByteSwap2(r.clip_qual_right); ByteSwap2(r.clip_adapter_left); ByteSwap2(r.clip_adapter_right); // Fix clipping values r.clip_qual_left = 5; r.clip_adapter_left = 0; r.clip_qual_right = 0; r.clip_adapter_right = 0; ByteSwap2(r.clip_qual_left); ByteSwap2(r.clip_qual_right); ByteSwap2(r.clip_adapter_left); ByteSwap2(r.clip_adapter_right); // Rewind file pointer to beginning of read header fsetpos (fp, &pos); // Write read header fwrite (&r, 16, 1, fp); // Rewind file pointer to beginning of read header fsetpos (fp, &pos); // Read corrected header elements_read = fread(&r, 16, 1, fp); assert(elements_read == 1); ByteSwap2(r.read_header_length); ByteSwap4(r.number_of_bases); ByteSwap2(r.name_length); ByteSwap2(r.clip_qual_left); ByteSwap2(r.clip_qual_right); ByteSwap2(r.clip_adapter_left); ByteSwap2(r.clip_adapter_right); //printf("Read header length: %d\n", r.read_header_length); //printf("Read name length: %d\n", r.name_length); /* flow_index_per_base = (uint8_t *)malloc(sizeof(uint8_t) * r.number_of_bases); bases = (char *)malloc(r.number_of_bases); quality_scores = (uint8_t *)malloc(sizeof(uint8_t) * r.number_of_bases); */ if (r.name_length > 0) { elements_read = fread(name, r.name_length, 1, fp); assert(elements_read == 1); name[r.name_length] = 0; // so we can easily print it } if(1 != ion_readname_to_rowcol(name, &row, &col)) { fprintf (stderr, "Error parsing read name: '%s'\n", name); continue; } int readPadLength = ((8 - ((16 + r.name_length) & 7)))%8; elements_read = fread(padData, readPadLength, 1, fp); assert(elements_read == 1); /* printf("Read: %s (r%d|c%d) has %d bases\n", (r.name_length > 0 ? name : "NONAME"), row, col, r.number_of_bases); printf("Clip left: %d qual: %d right: %d qual: %d\n", r.clip_adapter_left, r.clip_qual_left, r.clip_adapter_right, r.clip_qual_right); printf("Flowgram values:\n"); */ elements_read = fread(flowgram_values, h.number_of_flows_per_read, sizeof(uint16_t), fp); assert(elements_read == sizeof(uint16_t)); elements_read = fread(flow_index_per_base, r.number_of_bases, sizeof(uint8_t), fp); assert(elements_read == sizeof(uint8_t)); elements_read = fread(bases, r.number_of_bases, 1, fp); assert(elements_read == 1); elements_read = fread(quality_scores, r.number_of_bases, sizeof(uint8_t), fp); int bytesRead = h.number_of_flows_per_read * sizeof(uint16_t) + 3 * r.number_of_bases; readPadLength = (8 - (bytesRead & 7))%8; elements_read = fread(padData, readPadLength, 1, fp); assert(elements_read == 1); fpos_t readEnd; fgetpos(fp, &readEnd); // fprintf (stdout, "At end of read. Size: %ld\n", readEnd.__pos-readStart.__pos); //if ((readEnd.__pos-readStart.__pos) != r.read_header_length) { // fprintf (stdout, "mismatch in read_header_length\n"); // exit (1); //} // parse the name to get the row & col, if matched, print out read if(1 != ion_readname_to_rowcol(name, &row, &col)) { fprintf (stderr, "Error parsing read name: '%s'\n", name); continue; } if (row == readRow && col == readCol) { //printf("Ionogram: "); int i; for(i=0;i<h.number_of_flows_per_read;i++) { printf("%.2lf ", (double)(ByteSwap2(flowgram_values[i]))/100.0); } printf("\n"); } /* int f; for(f=0;f<h.number_of_flows_per_read;f++) printf("%d ", ByteSwap2(flowgram_values[f])); printf("\nFlow index per base:\n"); unsigned int b; for(b=0;b<r.number_of_bases;b++) printf("%d ", flow_index_per_base[b]); printf("\nBases called:\n"); for(b=0;b<r.number_of_bases;b++) printf("%c", bases[b]); printf("\nQuality scores:\n"); for(b=0;b<r.number_of_bases;b++) printf("%d ", quality_scores[b]); printf("\nDone with this read\n\n"); */ /* if (name) free(name); free(flowgram_values); free(flow_index_per_base); free(bases); free(quality_scores); */ } free(flowgram_values); free(flow_index_per_base); free(bases); free(quality_scores); fclose(fp); } return 0; }
/* * Compose the portions of the reply packet specific to the * EAP-TNC protocol, in the EAP reply typedata */ int eaptnc_compose(EAP_DS *eap_ds, TNC_PACKET *reply) { uint8_t *ptr; if (reply->code < 3) { //fill: EAP-Type (0x888e) eap_ds->request->type.type = PW_EAP_TNC; DEBUG2("TYPE: EAP-TNC set\n"); rad_assert(reply->length > 0); //alloc enough space for whole TNC-Packet (from Code on) eap_ds->request->type.data = calloc(reply->length, sizeof(unsigned char*)); DEBUG2("Malloc %d bytes for packet\n", reply->length); if (eap_ds->request->type.data == NULL) { radlog(L_ERR, "rlm_eap_tnc: out of memory"); return 0; } //put pointer at position where data starts (behind Type) ptr = eap_ds->request->type.data; //*ptr = (uint8_t)(reply->data_length & 0xFF); //ptr++; *ptr = reply->flags_ver; DEBUG2("Set Flags/Version: %d\n", *ptr); if(reply->data_length!=0){ DEBUG2("Set data-length: %d\n", reply->data_length); ptr++; //move to start-position of "data_length" DEBUG2("Set data-length: %x\n", reply->data_length); DEBUG2("Set data-length (swapped): %x\n", ByteSwap2(reply->data_length)); unsigned long swappedDataLength = ByteSwap2(reply->data_length); //DEBUG2("DATA-length: %d", reply->data_ memcpy(ptr, &swappedDataLength, 4); //*ptr = swappedDataLength; } uint16_t thisDataLength=0; if(reply->data!=NULL){ DEBUG2("Adding TNCCS-Data "); int offset; //if data_length-Field present if(reply->data_length !=0){ DEBUG2("with Fragmentation\n"); offset = TNC_DATA_LENGTH_LENGTH; //length of data_length-field: 4 thisDataLength = reply->length-TNC_PACKET_LENGTH; }else{ //data_length-Field not present DEBUG2("without Fragmentation\n"); offset = 1; thisDataLength = reply->length-TNC_PACKET_LENGTH_WITHOUT_DATA_LENGTH; } DEBUG2("TNCCS-Datalength: %d\n", thisDataLength); ptr=ptr+offset; //move to start-position of "data" memcpy(ptr,reply->data, thisDataLength); }else{ DEBUG2("No TNCCS-Data present"); } //the length of the TNC-packet (behind Type) if(reply->data_length!=0){ eap_ds->request->type.length = TNC_DATA_LENGTH_LENGTH+TNC_FLAGS_VERSION_LENGTH+thisDataLength; //4:data_length, 1: flags_ver }else{ eap_ds->request->type.length = TNC_FLAGS_VERSION_LENGTH+thisDataLength; //1: flags_ver } DEBUG2("Packet built\n"); } else { eap_ds->request->type.length = 0; } eap_ds->request->code = reply->code; return 1; }
/* * We expect only RESPONSE for which REQUEST, SUCCESS or FAILURE is sent back */ TNC_PACKET *eaptnc_extract(EAP_DS *eap_ds) { tnc_packet_t *data; TNC_PACKET *packet; /* * We need a response, of type EAP-TNC */ if (!eap_ds || !eap_ds->response || (eap_ds->response->code != PW_TNC_RESPONSE) || eap_ds->response->type.type != PW_EAP_TNC || !eap_ds->response->type.data || (eap_ds->response->length <= TNC_HEADER_LEN) || (eap_ds->response->type.data[0] <= 0)) { radlog(L_ERR, "rlm_eap_tnc: corrupted data"); return NULL; } packet = eaptnc_alloc(); if (!packet) return NULL; packet->code = eap_ds->response->code; packet->id = eap_ds->response->id; packet->length = eap_ds->response->length; data = (tnc_packet_t *)eap_ds->response->type.data; /* * Already checked the size above. */ packet->flags_ver = data->flags_ver; unsigned char *ptr = (unsigned char*)data; DEBUG2("Flags/Ver: %x\n", packet->flags_ver); int thisDataLength; int dataStart; if(TNC_LENGTH_INCLUDED(packet->flags_ver)){ DEBUG2("data_length included\n"); // memcpy(&packet->flags_ver[1], &data->flags_ver[1], 4); //packet->data_length = data->data_length; memcpy(&packet->data_length, &ptr[1], TNC_DATA_LENGTH_LENGTH); DEBUG2("data_length: %x\n", packet->data_length); DEBUG2("data_length: %d\n", packet->data_length); DEBUG2("data_length: %x\n", ByteSwap2(packet->data_length)); DEBUG2("data_length: %d\n", ByteSwap2(packet->data_length)); packet->data_length = ByteSwap2(packet->data_length); thisDataLength = packet->length-TNC_PACKET_LENGTH; //1: we need space for flags_ver dataStart = TNC_DATA_LENGTH_LENGTH+TNC_FLAGS_VERSION_LENGTH; }else{ DEBUG2("no data_length included\n"); thisDataLength = packet->length-TNC_PACKET_LENGTH_WITHOUT_DATA_LENGTH; packet->data_length = 0; dataStart = TNC_FLAGS_VERSION_LENGTH; } /* * Allocate room for the data, and copy over the data. */ packet->data = malloc(thisDataLength); if (packet->data == NULL) { radlog(L_ERR, "rlm_eap_tnc: out of memory"); eaptnc_free(&packet); return NULL; } memcpy(packet->data, &(eap_ds->response->type.data[dataStart]), thisDataLength); return packet; }