void Cnflearn::extfeature() { FILE *fp = NULL; if ((fp = fopen(this->corpus.c_str(),"r")) == NULL) { fprintf (stderr,"Couln't open %s\n", this->corpus.c_str()); exit (1); } Sequence sq; sq.setColSize(this->sqcolsize); sq.setAllocSize(this->sqallocsize); sq.setArraySize(this->sqarraysize); sq.init(); this->instance = 0; while(feof(fp) == 0) { MyUtil::sqread(fp,&sq,CNF_BUFSIZE); if (sq.getRowSize() == 0) { continue; } this->extlabel(&sq); this->extract(&sq); this->instance++; sq.clear(); } fclose(fp); }
std::pair<point,rhomb> read_points(std::istream& is,Sequence& s) { rhomb rh(INT_MIN,INT_MIN,INT_MIN,INT_MIN); int xmax=0; s.clear(); int y=0; for(std::string str; std::getline(is,str); ++y) { int line_xmax=0; for(int x=0; (x=str.find_first_of('*',x))!=std::string::npos; ++x) { s.push_back(point(x,y)); line_xmax=x; if( x+y>rh.pp)rh.pp= x+y; if( x-y>rh.pm)rh.pm= x-y; if(-x+y>rh.mp)rh.mp=-x+y; if(-x-y>rh.mm)rh.mm=-x-y; } if(line_xmax>xmax)xmax=line_xmax; } return std::make_pair(point(xmax+1,y),rh); }
int main() { Sequence s; for (size_t i = 0; i < 6; ++i) s.add(i); s.display(); cout << "=========="<<endl; s.reset(); for (size_t i = 0; i < 3; ++i) s.next(); s.add(42); s.display(); cout << "=========="<<endl; s.reset(); for (size_t i = 0; i < 2; ++i) s.next(); s.remove(); s.display(); cout << "=========="<<endl; s.clear(); s.display(); cout << "=========="<<endl; }
int main(void) { // Test the fastareader string filename = "/home/laozzzzz/barcode_project/reads/e_coli_10000snp.fa"; bool done = false; bool success = false; Sequence read; std::unique_ptr<SequenceReader> reader(FastaReader::CreateSequenceReader(filename)); while(!success) { reader->NextRead(read, success, done); cout<< read.fowardSeq() <<endl; } read.clear(); filename = "/home/laozzzzz/barcode_project/reads/1000.fq"; done = false; success = false; reader.reset(FastqReader::CreateSequenceReader(filename)); while(!success) { reader->NextRead(read, success, done); cout<< read.fowardSeq() <<endl; } return 0; }
void Cnflearn::learn(unsigned int iter, unsigned int reg) { if (!this->valid) { fprintf (stderr, "It's not initialized\n"); exit(1); } AllocMemdiscard cache(this->cachesize); Sequence sq; sq.setColSize(this->sqcolsize); sq.setAllocSize(this->sqallocsize); sq.setArraySize(this->sqarraysize); sq.init(); int t = 0; for (unsigned int i = 0; i < iter; i++) { FILE *fp = NULL; if ((fp = fopen(this->corpus.c_str(), "r")) == NULL) { fprintf (stderr, "Couldn't open %s\n", this->corpus.c_str()); exit (1); } while (feof(fp) == 0) { MyUtil::sqread(fp, &sq, CNF_BUFSIZE); if (sq.getRowSize() == 0) { continue; } this->decay(t++); this->update(&sq, &cache, reg); sq.clear(); cache.reset(); } fclose(fp); this->lreport(i); } }
void FastaReader::NextRead(Sequence &r, bool &success, bool &done) { if (!r.empty()) r.clear(); int c; success = false; done = false; // Pick off the first at if(this->first_) { c = fHandler_->get(); if(c != '>') { c = fHandler_->getPastNewline(); if(c < 0) { r.clear();success = false; done = true; } } if(c != '>') { std::cerr << "Error: reads file does not look like a FASTA file" << std::endl; throw 1; } assert_eq('>', c); first_ = false; } // Read to the end of the id line, sticking everything after the '>' // into *name string& id = r.id(); while(true) { c = fHandler_->get(); if(c < 0) { r.clear(); success = false; done = true; std::cerr << "Error: reads file does not look like a FASTA file" << std::endl; throw 1; } if(c == '\n' || c == '\r') { // Break at end of line, after consuming all \r's, \n's while(c == '\n' || c == '\r') { c = fHandler_->get(); if(c < 0) { r.clear(); success = false; done = true; std::cerr << "Error: reads file does not look like a FASTA file" << std::endl; throw 1; } } break; } if(c == '>') continue; id += c; //name.append(c); } // fb_ now points just past the first character of a // sequence line, and c holds the first character BTDnaString& sbuf = r.fowardSeq(); while(true){ c = toupper(c); if(c < 0){ r.clear(); success = false; done = true; std::cerr << "Error: reads file does not look like a FASTA file" << std::endl; throw 1; } else{ if(c == '\n' || c == '\r') { break; } sbuf.append(asc2dna[c]); c = fHandler_->get(); } } success = true; done = fHandler_->eof(); is_done_ = done; }
static void test_list_ops_non_unique_seq( BOOST_EXPLICIT_TEMPLATE_TYPE(Sequence)) { typedef typename Sequence::iterator iterator; Sequence ss; for(int i=0;i<10;++i){ ss.push_back(i); ss.push_back(i); ss.push_front(i); ss.push_front(i); } /* 9988776655443322110000112233445566778899 */ ss.unique(); CHECK_EQUAL( ss, {9 _ 8 _ 7 _ 6 _ 5 _ 4 _ 3 _ 2 _ 1 _ 0 _ 1 _ 2 _ 3 _ 4 _ 5 _ 6 _ 7 _ 8 _ 9}); iterator it=ss.begin(); for(int j=0;j<9;++j,++it){} /* it points to o */ Sequence ss2; ss2.splice(ss2.end(),ss,ss.begin(),it); ss2.reverse(); ss.merge(ss2); CHECK_EQUAL( ss, {0 _ 1 _ 1 _ 2 _ 2 _ 3 _ 3 _ 4 _ 4 _ 5 _ 5 _ 6 _ 6 _ 7 _ 7 _ 8 _ 8 _ 9 _ 9}); ss.unique(same_integral_div<3>()); CHECK_EQUAL(ss,{0 _ 3 _ 6 _ 9}); ss.unique(same_integral_div<1>()); CHECK_EQUAL(ss,{0 _ 3 _ 6 _ 9}); /* testcases for bugs reported at * http://lists.boost.org/boost-users/2006/09/22604.php */ { Sequence ss,ss2; ss.push_back(0); ss2.push_back(0); ss.splice(ss.end(),ss2,ss2.begin()); CHECK_EQUAL(ss,{0 _ 0}); BOOST_CHECK(ss2.empty()); ss.clear(); ss2.clear(); ss.push_back(0); ss2.push_back(0); ss.splice(ss.end(),ss2,ss2.begin(),ss2.end()); CHECK_EQUAL(ss,{0 _ 0}); BOOST_CHECK(ss2.empty()); ss.clear(); ss2.clear(); ss.push_back(0); ss2.push_back(0); ss.merge(ss2); CHECK_EQUAL(ss,{0 _ 0}); BOOST_CHECK(ss2.empty()); typedef typename Sequence::value_type value_type; ss.clear(); ss2.clear(); ss.push_back(0); ss2.push_back(0); ss.merge(ss2,std::less<value_type>()); CHECK_EQUAL(ss,{0 _ 0}); BOOST_CHECK(ss2.empty()); } }
void fastqPattern::parseImp(Sequence& read, bool& success, bool& done){ char c; success = false; done = false; // Pick off the first at if(this->first_) { c = this->fb_->get(); if(c != '@') { c = this->fb_->getPastNewline(); if(c < 0) { success = false; done = true; } line_num_ += 1; } if(c != '@') { std::cerr << "Error: reads file does not look like a FASTQ file" << std::endl; throw 1; } assert_eq('@', c); first_ = false; } // Read to the end of the id line, sticking everything after the '@' // into id std::string& id = read.id(); while(true) { c = this->fb_->get(); if(c == '@') continue; if(c < 0) { read.clear(); success = false; done = true; std::cerr << "Error: reads file does not look like a FASTQ file" << std::endl; throw 1; } if(c == '\n' || c == '\r') { // Break at end of line, after consuming all \r's, \n's while(c == '\n' || c == '\r') { c = this->fb_->get(); if(c < 0) { read.clear(); success = false; done = true; std::cerr << "Error: reads file does not look like a FASTQ file" << std::endl; throw 1; } } line_num_ += 1; break; } id += c; } // fb_ now points just past the first character of a // sequence line, and c holds the first character //BTDnaString& sbuf = r->fowardSeq(); std::string& sequence = read.fowardSeq(); while(true){ c = toupper(c); if(c < 0){ read.clear(); success = false; done = true; std::cerr << "Error: reads file does not look like a FASTQ file" << std::endl; throw 1; } else{ if(c == '\n' || c == '\r') { // Break at end of line, after consuming all \r's, \n's while(c == '\n' || c == '\r') { c = this->fb_->get(); if(c < 0) { read.clear(); success = false; done = true; std::cerr << "Error: reads file does not look like a FASTQ file" << std::endl; throw 1; } } line_num_ += 1; break; } // convert N to A if(toupper(c) == 'N') c = 'A'; sequence += c; c = this->fb_->get(); } } assert_eq('+', c); // skip the option sequence at '+' line c = this->fb_->getPastNewline(); line_num_ += 1; if(c <0){ read.clear(); success = false; done = true; std::cerr << "Error: reads file does not look like a FASTQ file" << std::endl; throw 1; } string& qual = read.quality(); while(true){ qual += c; c = this->fb_->get(); if(c == '\n' || c == '\r') break; } line_num_ += 1; success = true; assert(qual.length() == sequence.length()); done = fb_->eof(); }
static void test_list_ops_non_unique_seq() { typedef typename Sequence::iterator iterator; Sequence ss; for(int i=0;i<10;++i){ ss.push_back(i); ss.push_back(i); ss.push_front(i); ss.push_front(i); } /* 9988776655443322110000112233445566778899 */ ss.unique(); CHECK_EQUAL( ss, (9)(8)(7)(6)(5)(4)(3)(2)(1)(0) (1)(2)(3)(4)(5)(6)(7)(8)(9)); iterator it=ss.begin(); for(int j=0;j<9;++j,++it){} /* it points to o */ Sequence ss2; ss2.splice(ss2.end(),ss,ss.begin(),it); ss2.reverse(); ss.merge(ss2); CHECK_EQUAL( ss, (0)(1)(1)(2)(2)(3)(3)(4)(4)(5)(5) (6)(6)(7)(7)(8)(8)(9)(9)); ss.unique(same_integral_div<3>()); CHECK_EQUAL(ss,(0)(3)(6)(9)); ss.unique(same_integral_div<1>()); CHECK_EQUAL(ss,(0)(3)(6)(9)); /* testcases for bugs reported at * http://lists.boost.org/boost-users/2006/09/22604.php */ { Sequence ss,ss2; ss.push_back(0); ss2.push_back(0); ss.splice(ss.end(),ss2,ss2.begin()); CHECK_EQUAL(ss,(0)(0)); BOOST_TEST(ss2.empty()); ss.clear(); ss2.clear(); ss.push_back(0); ss2.push_back(0); ss.splice(ss.end(),ss2,ss2.begin(),ss2.end()); CHECK_EQUAL(ss,(0)(0)); BOOST_TEST(ss2.empty()); ss.clear(); ss2.clear(); ss.push_back(0); ss2.push_back(0); ss.merge(ss2); CHECK_EQUAL(ss,(0)(0)); BOOST_TEST(ss2.empty()); typedef typename Sequence::value_type value_type; ss.clear(); ss2.clear(); ss.push_back(0); ss2.push_back(0); ss.merge(ss2,std::less<value_type>()); CHECK_EQUAL(ss,(0)(0)); BOOST_TEST(ss2.empty()); } }
int main (int argc, char **argv) { if (argc < 5) { fprintf (stderr, "template inputfile.gz colsize bound\n"); exit (1); } FILE *fp = NULL; if ((fp = fopen(*(argv+1),"r")) == NULL) { fprintf (stderr, "Couldn't open %s\n", *(argv+1)); exit (1); } PoolAlloc ac(256, 1000000); std::list<tmpl*> tmpls; char buf[BUFSIZE]; while (fgets(buf, BUFSIZE, fp) != NULL) { MyUtil::chomp(buf); if (MyUtil::IsCommentOut(buf)) { continue; } tmpl *t = new tmpl(buf, &ac); tmpls.push_back(t); } int fd = open(*(argv+2), O_RDONLY); if (fd < 0) { fprintf (stderr, "Couldn't open %s\n",*(argv+2)); exit (1); } gzFile input = gzdopen(fd, GZ_MODE); if (input == NULL) { fprintf (stderr, "failed to gzdopen\n"); exit (1); } int colsize = 0; sscanf (*(argv+3),"%d",&colsize); int bound = 0; sscanf (*(argv+4),"%d",&bound); Sequence sq; sq.setColSize(colsize); sq.init(); Dic features(&ac, CountUp); while (gzgets(input, buf, BUFSIZE) != NULL) { MyUtil::chomp(buf); if (MyUtil::IsEOS(buf)) { int size = (int)sq.getRowSize(); std::list<tmpl*>::iterator it = tmpls.begin(); for (; it != tmpls.end(); it++) { for (int i = 0; i < size; i++) { char *feature = (*it)->expand(&sq,i); //fprintf(stdout, "%s\n",feature); features.insert(feature); ac.release(feature); } } sq.clear(); continue; } sq.push(buf); } nodeptr nil = features.getnil(); for (int i = HASHSIZE-1; i >= 0; i--) { nodeptr *p = features.table+i; if (*p != nil) { recalldump(nil, (*p)->left, bound); recalldump(nil, (*p)->right, bound); } } if (gzclose(input) != Z_OK) { fprintf (stderr,"gzclose failed\n"); exit (1); } return 0; }