TEST(Fastq, DefaultConstructor)
{
    Fastq fq;
    EXPECT_EQ(0, fq.name().compare(""));
    EXPECT_EQ(0, fq.seq().compare(""));
    EXPECT_EQ(0, fq.qual().compare(""));
}
TEST(Fastq, ReadFromFile)
{
    Fastq fq;
    unsigned int counter = 0;
    ifstream inStream("test_files/fastq_unittest.fastq");

    if (! inStream.is_open())
    {
        cerr << "Error opening test file test_files/fastq_unittest.fastq" << endl;
        exit(1);
    }

    while (fq.fillFromFile(inStream))
    {
        counter++;
        string expectedName = static_cast<ostringstream*>( &(ostringstream() << counter) )->str();
        EXPECT_EQ(0, fq.name().compare(expectedName));
        EXPECT_EQ(0, fq.seq().compare("ACGT"));
    }
}
TEST(Fastq, setValues)
{
    Fastq fq;
    fq.name("name");
    fq.seq("ACGT");
    fq.qual("IIII");
    EXPECT_EQ(0, fq.name().compare("name"));
    EXPECT_EQ(0, fq.seq().compare("ACGT"));
    EXPECT_EQ(0, fq.qual().compare("IIII"));
}
示例#4
0
void check_fastq_fields(const Fastq& record, const string& name, const string& comment, const string& sequence, const string& quals) {
  BOOST_CHECK_EQUAL(record.name()    , name);
  BOOST_CHECK_EQUAL(record.comment() , comment);  
  BOOST_CHECK_EQUAL(record.sequence(), sequence);
  BOOST_CHECK_EQUAL(record.quals()   , quals);
}
示例#5
0
int main( int argc, char *argv[])
{
	// Options
	string   readfile;
	string   prefix            = "result";
	int      winsize           = 6;
	int      minlen            = 30;
	int      threshold         = 30;
	int      offset            = PHREDOFFSET;
	bool     showHelp          = false;

	// Show help when has no options
	if(argc <= 1)
	{
		Help();
		return 0;
	}

	// Parsing options
	for(int i = 1; i < argc; i++)
	{
		int parameterLength = (int)strlen(argv[i]);
		if((PARAMETER_CHECK("-h", 2, parameterLength)) || (PARAMETER_CHECK("--help", 5, parameterLength))) 
			showHelp=true;
		else if((PARAMETER_CHECK("-i", 2, parameterLength)) || (PARAMETER_CHECK("--input", 7, parameterLength)))
		{
			if ((++i) < argc) 
				readfile = argv[i];
		}
		else if((PARAMETER_CHECK("-w", 2, parameterLength)) || (PARAMETER_CHECK("--winsize", 9, parameterLength)))
		{
			if ((++i) < argc)
				winsize = StringUtils::toValue<int>(argv[i]);
		}
		else if ((PARAMETER_CHECK("-m", 2, parameterLength)) || (PARAMETER_CHECK("--minlen", 8, parameterLength)))
		{
			if ((++i) < argc)
				minlen = StringUtils::toValue<int>(argv[i]);
		}
		else if ((PARAMETER_CHECK("-p", 2, parameterLength)) || (PARAMETER_CHECK("--prefix", 8, parameterLength)))
		{
			if ((++i) < argc)
				prefix = argv[i];
		}
		else if ((PARAMETER_CHECK("-o", 2, parameterLength)) || (PARAMETER_CHECK("--offset", 8, parameterLength)))
		{
			if ((++i) < argc)
				offset = StringUtils::toValue<int>(argv[i]);
		}
		else if ((PARAMETER_CHECK("-t", 2, parameterLength)) || (PARAMETER_CHECK("--threshold", 11, parameterLength)))
		{
			if ((++i) < argc)
			{
				threshold = StringUtils::toValue<int>(argv[i]);
				if (threshold <0 || threshold >50)
				{
					cerr << endl << "*****ERROR: Threshold should be within 0~50, reset to default: 30. *****" << endl << endl;
					threshold = 30;
				}
			}
		}
		else
		{
			cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
			showHelp = true;
		}
	}
	
	// Show help if no proper auguments.
	if (showHelp)
	{
		Help();
		return 0;
	}

    // Variables
	int counter=0,lowcounter=0,i;
	int qsum=0,sumThreshold;
	char   buf[MAXSEQREADLEN]; // Store the sequence for trimming
	int start,seqlen;
	Fastq  seq;
	SeqReader fq(readfile);
	Writer trimmed(prefix+"_trimmed.fq");
	Writer lowqual(prefix+"_low_qual.fq");
	
	// open files
	fq.open();
	trimmed.open();
	lowqual.open();
	
	// Read the fastq file.
	sumThreshold=(threshold+offset)*winsize;
	while (fq.getNext(seq))	
	{
		if (++counter%100000==0)
			cerr << "\rFinished  " << counter << " reads.";
		strcpy(buf,seq.phred.c_str());
		seqlen=seq.length();
		qsum=0;
		start=0;

		// Trim 5' ends
		for(i=0;i<winsize;i++)
			qsum+=buf[i];
		while(qsum<sumThreshold)
		{
			start++;
			if (seqlen-start< minlen)
				break;
			qsum+=buf[start+winsize-1]-buf[start-1];
		}
		// Trim 3' ends
		if (seqlen-start >= minlen)
		{
			qsum=0;
			for( i=seqlen-1;i>=seqlen-winsize;i--)
				qsum+=buf[i];
			while (qsum<sumThreshold)
			{
				seqlen--; // trim the last nucleotide.
				if (seqlen-start < minlen)
					break;
				qsum+=buf[seqlen-winsize-1]-buf[seqlen-1];
			}
		}
		if (seqlen-start>=minlen)
		{
			seq.seq=seq.seq.substr(start,seqlen-start);
			seq.phred=seq.phred.substr(start,seqlen-start);
			(*(trimmed.Printer())) << seq;
		}
		else
		{
			(*(lowqual.Printer())) << seq;
			lowcounter++;
		}
	}

	// close files
	fq.close();
	trimmed.close();
	lowqual.close();

	// Print summary
	cerr << endl;
	cerr << "Parameters: \n\tthreshold = " << threshold << "\n\twindow size = " << winsize << "\n\tminimum length = " << minlen << endl; 
	cerr << "Summary:" << endl;
	cerr << "\tTotal reads  : " << counter << endl;
	cerr << "\tTrimmed reads: " << counter-lowcounter << endl;
	cerr << "\tShort reads  : " << lowcounter <<endl;

	return 0;
}