Exemplo n.º 1
0
Arquivo: sff.c Projeto: golharam/TS
int
sff_view_main(int argc, char *argv[])
{
  int i, c;
  sff_file_t *sff_file_in=NULL, *sff_file_out=NULL;
  sff_iter_t *sff_iter = NULL;
  sff_t *sff = NULL;
  char *fn_names = NULL;
  char **names = NULL;
  int32_t names_num = 0, names_mem = 0;
  int32_t out_mode, min_row, max_row, min_col, max_col;

  out_mode = 0;
  min_row = max_row = min_col = max_col = -1;

  while((c = getopt(argc, argv, "r:c:R:bqh")) >= 0) {
      switch(c) {
        case 'r':
          if(ion_parse_range(optarg, &min_row, &max_row) < 0) {
              ion_error(__func__, "-r : format not recognized", Exit, OutOfRange);
          }
          break;
        case 'c':
          if(ion_parse_range(optarg, &min_col, &max_col) < 0) {
              ion_error(__func__, "-c : format not recognized", Exit, OutOfRange);
          }
          break;
        case 'R':
          free(fn_names);
          fn_names = strdup(optarg); break;
        case 'q':
          out_mode |= 1;
          break;
        case 'b':
          out_mode |= 2;
          break;
        case 'h': 
        default: 
          return usage();
      }
  }
  if(argc != 1+optind) {
      return usage();
  }
  else {
      sff_header_t *header = NULL;
      if(3 == out_mode) {
          ion_error(__func__, "options -b and -q cannot be used together", Exit, CommandLineArgument);
      }

      // open the input SFF
      if(-1 != min_row || -1 != max_row || -1 != min_col || -1 != max_col) {
          sff_file_in = sff_fopen(argv[optind], "rbi", NULL, NULL);
      }
      else {
          sff_file_in = sff_fopen(argv[optind], "rb", NULL, NULL);
      }

      header = sff_header_clone(sff_file_in->header); /* copy header, but update n_reads if using index or names */

      // read in the names
      if(NULL != fn_names) {
          FILE *fp = NULL;
          char name[1024]="\0"; // lets hope we don't exceed this length
          names_num = names_mem = 0;
          names = NULL;
          if(!(fp = fopen(fn_names, "rb"))) {
              fprintf(stderr, "** Could not open %s for reading. **\n", fn_names);
              ion_error(__func__, fn_names, Exit, OpenFileError);
          }
          while(EOF != fscanf(fp, "%s", name)) {
              while(names_num == names_mem) {
                  if(0 == names_mem) names_mem = 4;
                  else names_mem *= 2;
                  names = ion_realloc(names, sizeof(char*) * names_mem, __func__, "names");
              }
              names[names_num] = strdup(name);
              if(NULL == names[names_num]) {
                  ion_error(__func__, name, Exit, MallocMemory);
              }
              names_num++;
          }
          names = ion_realloc(names, sizeof(char*) * names_num, __func__, "names");
          fclose(fp);
          header->n_reads = names_num;
      }
      else {
	// if using index, then iterate once through the index to count the entries
	// so we can set the count correctly in the header
	if (-1 != min_row || -1 != max_row || -1 != min_col || -1 != max_col) {
	  int entries = 0;
          sff_iter = sff_iter_query(sff_file_in, min_row, max_row, min_col, max_col);
	  while (NULL != (sff = sff_iter_read(sff_file_in, sff_iter)))
	    entries++;
	  header->n_reads = entries;
	  /* reset sff_iter */
	  sff_iter_destroy(sff_iter);
	  sff_iter = sff_iter_query(sff_file_in, min_row, max_row, min_col, max_col);
	}
      }

      // print the header
      switch(out_mode) {
        case 0:
          sff_header_print(stdout, header);
          break;
        case 1:
          // do nothing: FASTQ
          break;
        case 2:
          sff_file_out = sff_fdopen(fileno(stdout), "wb", header, NULL);
          break;
      }


      while(1) {
          int32_t to_print = 1;
          if(-1 != min_row || -1 != max_row || -1 != min_col || -1 != max_col) {
              if(NULL == (sff = sff_iter_read(sff_file_in, sff_iter))) {
                  break;
              }
          }
          else {
              if(NULL == (sff = sff_read(sff_file_in))) {
                  break;
              }
          }
          if(0 < names_mem) {
              to_print = 0;
              for(i=0;i<names_num;i++) {
                  if(0 == strcmp(names[i], sff_name(sff))) {
                      to_print = 1;
                      break;
                  }
              }
              // shift down
              if(1 == to_print) { // i < names_num
                  free(names[i]);
                  names[i] = NULL;
                  for(;i<names_num-1;i++) {
                      names[i] = names[i+1];
                      names[i+1] = NULL;
                  }
                  names_num--;
              }
          }
          if(1 == to_print) {
              switch(out_mode) {
                case 0:
                  sff_print(stdout, sff);
                  break;
                case 1:
                  if(fprintf(stdout, "@%s\n%s\n+\n",
                             sff->rheader->name->s,
                             sff->read->bases->s + sff->gheader->key_length) < 0) {
                      ion_error(__func__, "stdout", Exit, WriteFileError);
                  }
                  for(i=sff->gheader->key_length;i<sff->read->quality->l;i++) {
                      if(fputc(QUAL2CHAR(sff->read->quality->s[i]), stdout) < 0) {
                          ion_error(__func__, "stdout", Exit, WriteFileError);
                      }
                  }
                  if(fputc('\n', stdout) < 0) {
                      ion_error(__func__, "stdout", Exit, WriteFileError);
                  }
                  break;
                case 2:
                  sff_write(sff_file_out, sff);
                  break;
              }
          }
          sff_destroy(sff);
      }

      sff_fclose(sff_file_in);
      if(2 == out_mode) {
          sff_fclose(sff_file_out);
      }
      if(-1 != min_row || -1 != max_row || -1 != min_col || -1 != max_col) {
          sff_iter_destroy(sff_iter);
      }

      if(0 != names_num) {
          fprintf(stderr, "** Did not find all the reads with (-R). **\n");
          ion_error(__func__, fn_names, Exit, OutOfRange);
      }

      sff_header_destroy(header);

  }
  if(NULL != names && 0 < names_num) {
      free(names);
  }
  free(fn_names);
  return 0;
}
Exemplo n.º 2
0
int main(int argc, char *argv[])
{
	char	*fastqFileName = NULL;
	char	*sffFileName = NULL;
	bool	forceClip = false;
	bool	keyPass = false;
	bool	allReads = false;
	int	readCol = -1;
	int	readRow = -1;
	bool	findRead = false;
	int row, col;
	int numKeypassedReads = 0;
	int qual_offset = DEFAULT_QUAL_OFFSET;
	bool legacyFASTQName = false;	// enable if you want r10|c100 format name in fastq file
	bool debug = false;
	bool legacyReadName = false;
	bool adapterTrim = true;
	bool ignoreLeftQualTrim = false;
	
	// process command-line args
	int argcc = 1;
	while (argcc < argc) {
		if (argv[argcc][0] == '-') {
			switch (argv[argcc][1]) {
				case 'a': // output all reads
					allReads = true;
				break;

				case 'R': // report read at row & column
					argcc++;	
					readRow = atoi(argv[argcc]);
				break;

				case 'C': // report read at row & column
					argcc++;	
					readCol = atoi(argv[argcc]);
				break;

				case 'q': // convert to fastq
					argcc++;
					fastqFileName = argv[argcc];
				break;

				case 'c': // force qual clip left to 5
					forceClip = true;
				break;

				case 's':	// Offset to apply to quality scores
					argcc++;
					qual_offset = atoi(argv[argcc]);
					if(qual_offset==0) {
						fprintf (stderr, "-s option should specify a nonzero quality offset\n");
						exit (1);
					}
				break;
				
				case 'k': // force keypass
					keyPass = true;
					argcc++;
					hackkey = argv[argcc];
					hackkeylen = strlen(hackkey);
				break;
			
				case 'L':	// don't record name of read in comment
					legacyFASTQName = true;
				break;
			
				case 'd':	// enable debug print outs
					debug = true;
				break;
			
				case 'h':	// help info
					printHelp ();
					exit (0);
				break;
			
				case 'u':	// prevent read clipping
					adapterTrim = false;
				break;
			
				case 'b':	// ignore barcodes (ok really its ignoring the left qual trim)
					ignoreLeftQualTrim = true;
				break;
			
				case 'v':	// version info
					fprintf (stdout, "%s", IonVersion::GetFullVersion("SFFRead").c_str());
					exit (0);
				break;

				default:
					//sffFileName = argv[argcc];
					break;
			}
		}
		else {
			sffFileName = argv[argcc];
		}
		argcc++;
	}

	if (!sffFileName) {
		printHelp();
		exit(0);
	}

	if (readCol > -1 && readRow > -1) {
		findRead = true;
		allReads = true;// makes it search all reads
	}

    sff_file_t* sff_file_in = NULL;
    sff_file_in = sff_fopen(sffFileName, "rb", NULL, NULL);

	if (sff_file_in) {
		if (!findRead && !fastqFileName) {
			printf("Reading file: %s\n", sffFileName);
            sff_header_print(stdout, sff_file_in->header);
		}

		// -- read the reads
		int numReads;
		if (allReads) {
			numReads = sff_file_in->header->n_reads;
		}
		else {
			numReads = (sff_file_in->header->n_reads < 10 ? sff_file_in->header->n_reads:10);
		}
		FILE *fpq = NULL;
		if (fastqFileName) {
			numReads = sff_file_in->header->n_reads;
			fpq = fopen(fastqFileName, "w");
			if (!fpq){
				perror (fastqFileName);
				exit (1);
			}
		}

		for(int i=0;i<numReads;i++) {
            sff_read_header_t* rh = sff_read_header_read(sff_file_in->fp);
            sff_read_t* rr = sff_read_read(sff_file_in->fp, sff_file_in->header, rh);

			// optional - ignore the left & right adapter clipping by simply setting these values to 0
			if (!adapterTrim) {
				rh->clip_adapter_left = 0;
				rh->clip_adapter_right = 0;
			}

			if (!fpq && !findRead) {
				printf("Read header length: %d\n", rh->rheader_length);
				printf("Read name length: %d\n", rh->name_length);
			}

			
			// Extract the row and column popsition info for this read
            if (1 != ion_readname_to_rowcol(rh->name->s, &row, &col)) {
                fprintf (stderr, "Error parsing read name: '%s'\n", rh->name->s);
                continue;
            }
            if(1 == ion_readname_legacy(rh->name->s)) {
                legacyReadName = true;
			}
			else {
                legacyReadName = false;
			}
			

			if (!fpq && !findRead) {
				printf("Read: %s (r%05d|c%05d) has %d bases\n",
						(rh->name_length > 0 ? rh->name->s : "NONAME"),
						row, col,
						rh->n_bases);
				printf("Clip left: %d qual: %d right: %d qual: %d\n",
					rh->clip_adapter_left, rh->clip_qual_left,
					rh->clip_adapter_right, rh->clip_qual_right);
				printf("Flowgram values:\n");
			}

			if (findRead) {
				if (row == readRow && col == readCol) {
					//printf("Ionogram: ");
					int i;
					for(i=0;i<sff_file_in->header->flow_length;i++) {
						printf("%.2lf ", (double)(rr->flowgram[i])/100.0);
					}
					printf("\n");
					
					//// now print the bases - all the bases, not clipped!
					//// these bases correspond to the raw flowgram data. in essence
					//for (int b=0;b<r.number_of_bases;b++)
					//	fprintf(stdout, "%c", bases[b]);
					//fprintf(stdout, "\n");
				}
			}
			else if (fpq) {
				bool ok = true;
				if (keyPass) {
					// if (r.number_of_bases > h.key_length) {
					if ((int)rh->n_bases > hackkeylen) {
						int b;
						// for(b=0;b<h.key_length;b++) {
						for(b=0;b<hackkeylen;b++) {
							// if (key_sequence[b] != bases[b]) {
							if (hackkey[b] != rr->bases->s[b]) {
								ok = false;
								break;
							}
						}
					} else
						ok = false; // not long enough
				}

				int clip_left_index = 0;
				int clip_right_index = 0;
				if (ok) {
					//numKeypassedReads++;
					
					// If force-clip option is set, we want to ensure the key gets trimmed
					if (forceClip && rh->clip_adapter_left < 4)
						rh->clip_adapter_left = hackkeylen+1;

					if (ignoreLeftQualTrim)
						clip_left_index = max (1, rh->clip_adapter_left);
					else
						clip_left_index = max (1, max (rh->clip_qual_left, rh->clip_adapter_left));
					clip_right_index = min ((rh->clip_qual_right == 0 ? rh->n_bases:rh->clip_qual_right),
											(rh->clip_adapter_right == 0 ? rh->n_bases:rh->clip_adapter_right));
					if (debug)
						fprintf (stdout, "debug clip: left = %d right = %d\n", clip_left_index, clip_right_index);
					numKeypassedReads++;
					if (clip_left_index > clip_right_index)
						// Suppress output of zero-mer reads (left > right)
						ok = false;
				}
				if (ok) {
					//print id string
					if (legacyFASTQName) {
						fprintf (fpq, "@r%d|c%d\n", row, col);
					}
					else {
						if (legacyReadName){
							//Override legacy name
							char runId[6] = {'\0'};
							strncpy (runId, &rh->name->s[7], 5);
							fprintf (fpq, "@%s:%d:%d\n", runId, row, col);
						}
						else {
							//Copy name verbatim
							fprintf (fpq, "@%s\n", rh->name->s);
						}
					}
						
					//print bases
					for (int b=clip_left_index-1;b<clip_right_index;b++)
						fprintf(fpq, "%c", rr->bases->s[b]);
					fprintf(fpq, "\n");
					//print '+'
					fprintf(fpq, "+\n");
					//print quality scores
					for (int b=clip_left_index-1;b<clip_right_index;b++)
						fprintf(fpq, "%c", QualToFastQ((int)(rr->quality->s[b]),qual_offset));
					fprintf(fpq, "\n");
				}
			}
			else {
				int f;
				for(f=0;f<sff_file_in->header->flow_length;f++)
					printf("%d ", rr->flowgram[f]);
				printf("\nFlow index per base:\n");
				unsigned int b;
				for(b=0;b<rh->n_bases;b++)
					printf("%d ", rr->flow_index[b]);
				printf("\nBases called:\n");
				for(b=0;b<rh->n_bases;b++)
					printf("%c", rr->bases->s[b]);
				printf("\nQuality scores:\n");
				for(b=0;b<rh->n_bases;b++)
					printf("%d ", rr->quality->s[b]);
				printf("\nDone with this read\n\n");
			}

            sff_read_header_destroy(rh);
            sff_read_destroy(rr);
		}

		//	debug print - keypass reads written to the fastq file
		if (fpq) {
		  static char *printkey = "All";
		  if (keyPass) printkey = hackkey;		  
			fprintf (stdout, "Keypass Reads(%s) = %d\n", printkey, numKeypassedReads);
			fprintf (stdout, "Total Reads = %d\n", numReads);
			fprintf (stdout, "Percentage = %.2f%%\n", ((float) numKeypassedReads/ (float) numReads) * 100.0);
		}
        sff_fclose(sff_file_in);
		if (fpq)
			fclose(fpq);
	}
	else {
		perror (sffFileName);
		exit (1);
	}

	return 0;
}