Exemple #1
0
void Cnflearn::extfeature()
{
   FILE *fp = NULL;
   if ((fp = fopen(this->corpus.c_str(),"r")) == NULL)
   {
      fprintf (stderr,"Couln't open %s\n", this->corpus.c_str());
      exit (1);
   }
   Sequence sq;
   sq.setColSize(this->sqcolsize);
   sq.setAllocSize(this->sqallocsize);
   sq.setArraySize(this->sqarraysize);
   sq.init();
   this->instance = 0;
   while(feof(fp) == 0)
   {
      MyUtil::sqread(fp,&sq,CNF_BUFSIZE);
      if (sq.getRowSize() == 0)
      {
         continue;
      }
      this->extlabel(&sq);
      this->extract(&sq);
      this->instance++;
      sq.clear();
   }
   fclose(fp);
}
std::pair<point,rhomb> read_points(std::istream& is,Sequence& s)
{
    rhomb rh(INT_MIN,INT_MIN,INT_MIN,INT_MIN);
    int   xmax=0;
    s.clear();
    int y=0;
    for(std::string str; std::getline(is,str); ++y) {
        int line_xmax=0;
        for(int x=0; (x=str.find_first_of('*',x))!=std::string::npos; ++x) {
            s.push_back(point(x,y));
            line_xmax=x;
            if( x+y>rh.pp)rh.pp= x+y;
            if( x-y>rh.pm)rh.pm= x-y;
            if(-x+y>rh.mp)rh.mp=-x+y;
            if(-x-y>rh.mm)rh.mm=-x-y;
        }
        if(line_xmax>xmax)xmax=line_xmax;
    }
    return std::make_pair(point(xmax+1,y),rh);
}
int main() {
	Sequence s;
	for (size_t i = 0; i < 6; ++i) 
		s.add(i);
	s.display();
	cout << "=========="<<endl;
	s.reset();
	for (size_t i = 0; i < 3; ++i)
		s.next();
	s.add(42);
	s.display();
	cout << "=========="<<endl;
	s.reset();
	for (size_t i = 0; i < 2; ++i) 
		s.next();
	s.remove();
	s.display();
	cout << "=========="<<endl;
	s.clear();
	s.display();
	cout << "=========="<<endl;
}
int main(void) {
    // Test the fastareader
    string filename = "/home/laozzzzz/barcode_project/reads/e_coli_10000snp.fa";
    bool done = false;
    bool success = false;
    Sequence read;
    std::unique_ptr<SequenceReader> reader(FastaReader::CreateSequenceReader(filename));
    while(!success) {
        reader->NextRead(read, success, done);
        cout<< read.fowardSeq() <<endl;
    }
    read.clear();
    filename = "/home/laozzzzz/barcode_project/reads/1000.fq";
    done = false;
    success = false;
    reader.reset(FastqReader::CreateSequenceReader(filename));
    while(!success) {
	reader->NextRead(read, success, done);
        cout<< read.fowardSeq() <<endl;
    }
    return 0;
}
Exemple #5
0
void Cnflearn::learn(unsigned int iter, unsigned int reg)
{
   if (!this->valid)
   {
      fprintf (stderr, "It's not initialized\n");
      exit(1);
   }
   AllocMemdiscard cache(this->cachesize);
   Sequence sq;
   sq.setColSize(this->sqcolsize);
   sq.setAllocSize(this->sqallocsize);
   sq.setArraySize(this->sqarraysize);
   sq.init();
   int t = 0;
   for (unsigned int i = 0; i < iter; i++)
   {
      FILE *fp = NULL;
      if ((fp = fopen(this->corpus.c_str(), "r")) == NULL)
      {
         fprintf (stderr, "Couldn't open %s\n", this->corpus.c_str());
         exit (1);
      }
      while (feof(fp) == 0)
      {
         MyUtil::sqread(fp, &sq, CNF_BUFSIZE);
         if (sq.getRowSize() == 0)
         {
            continue;
         }
         this->decay(t++);
         this->update(&sq, &cache, reg);
         sq.clear();
         cache.reset();
      }
      fclose(fp);
      this->lreport(i);
   }
}
void FastaReader::NextRead(Sequence &r, bool &success, bool &done) {

    if (!r.empty())
        r.clear();
    int c;

    success = false;
    done = false;
    // Pick off the first at
    if(this->first_) {
        c = fHandler_->get();
        if(c != '>') {
            c = fHandler_->getPastNewline();
            if(c < 0) {
               r.clear();success = false; done = true;
            }
        }
        if(c != '>') {
            std::cerr << "Error: reads file does not look like a FASTA file" << std::endl;
            throw 1;
        }
        assert_eq('>', c);
        first_ = false;
    }
    // Read to the end of the id line, sticking everything after the '>'
    // into *name
    string& id = r.id();
    while(true) {
        c = fHandler_->get();
        if(c < 0) {
            r.clear(); success = false; done = true;
            std::cerr << "Error: reads file does not look like a FASTA file" << std::endl;
            throw 1;
        }
        if(c == '\n' || c == '\r') {
            // Break at end of line, after consuming all \r's, \n's
            while(c == '\n' || c == '\r') {
                c = fHandler_->get();
                if(c < 0) {
                    r.clear(); success = false; done = true;
                    std::cerr << "Error: reads file does not look like a FASTA file" << std::endl;
                    throw 1;
                }
            }
            break;
        }
        if(c == '>')
            continue;
        id += c;
        //name.append(c);
    }
    // fb_ now points just past the first character of a
    // sequence line, and c holds the first character
    BTDnaString& sbuf = r.fowardSeq();
    while(true){
        c = toupper(c);
        if(c < 0){
            r.clear(); success = false; done = true;
            std::cerr << "Error: reads file does not look like a FASTA file" << std::endl;
            throw 1;
        }
        else{
            if(c == '\n' || c == '\r') {
                break;
            }
            sbuf.append(asc2dna[c]);
            c = fHandler_->get();
        }
    }
    success = true;
    done = fHandler_->eof();
    is_done_ = done;
}
static void test_list_ops_non_unique_seq(
  BOOST_EXPLICIT_TEMPLATE_TYPE(Sequence))
{
  typedef typename Sequence::iterator iterator;

  Sequence ss;
  for(int i=0;i<10;++i){
    ss.push_back(i);
    ss.push_back(i);
    ss.push_front(i);
    ss.push_front(i);
  } /* 9988776655443322110000112233445566778899 */

  ss.unique();
  CHECK_EQUAL(
    ss,
    {9 _ 8 _ 7 _ 6 _ 5 _ 4 _ 3 _ 2 _ 1 _ 0 _
     1 _ 2 _ 3 _ 4 _ 5 _ 6 _ 7 _ 8 _ 9});

  iterator it=ss.begin();
  for(int j=0;j<9;++j,++it){} /* it points to o */

  Sequence ss2;
  ss2.splice(ss2.end(),ss,ss.begin(),it);
  ss2.reverse();
  ss.merge(ss2);
  CHECK_EQUAL(
    ss,
    {0 _ 1 _ 1 _ 2 _ 2 _ 3 _ 3 _ 4 _ 4 _ 5 _ 5 _
     6 _ 6 _ 7 _ 7 _ 8 _ 8 _ 9 _ 9});

  ss.unique(same_integral_div<3>());
  CHECK_EQUAL(ss,{0 _ 3 _ 6 _ 9});

  ss.unique(same_integral_div<1>());
  CHECK_EQUAL(ss,{0 _ 3 _ 6 _ 9});

  /* testcases for bugs reported at
   * http://lists.boost.org/boost-users/2006/09/22604.php
   */
  {
    Sequence ss,ss2;
    ss.push_back(0);
    ss2.push_back(0);
    ss.splice(ss.end(),ss2,ss2.begin());
    CHECK_EQUAL(ss,{0 _ 0});
    BOOST_CHECK(ss2.empty());

    ss.clear();
    ss2.clear();
    ss.push_back(0);
    ss2.push_back(0);
    ss.splice(ss.end(),ss2,ss2.begin(),ss2.end());
    CHECK_EQUAL(ss,{0 _ 0});
    BOOST_CHECK(ss2.empty());

    ss.clear();
    ss2.clear();
    ss.push_back(0);
    ss2.push_back(0);
    ss.merge(ss2);
    CHECK_EQUAL(ss,{0 _ 0});
    BOOST_CHECK(ss2.empty());

    typedef typename Sequence::value_type value_type;
    ss.clear();
    ss2.clear();
    ss.push_back(0);
    ss2.push_back(0);
    ss.merge(ss2,std::less<value_type>());
    CHECK_EQUAL(ss,{0 _ 0});
    BOOST_CHECK(ss2.empty());
  }
}
void fastqPattern::parseImp(Sequence& read,
                            bool& success,
                            bool& done){

    char c;

    success = false;
    done = false;
    // Pick off the first at
    if(this->first_) {
        c = this->fb_->get();
        if(c != '@') {
            c = this->fb_->getPastNewline();
            if(c < 0) {
                success = false; done = true;
            }
            line_num_ += 1;
        }
        if(c != '@') {
            std::cerr << "Error: reads file does not look like a FASTQ file" << std::endl;
            throw 1;
        }
        assert_eq('@', c);
        first_ = false;
    }
    // Read to the end of the id line, sticking everything after the '@'
    // into id

    std::string& id = read.id();

    while(true) {

        c = this->fb_->get();
        if(c == '@')
            continue;
        if(c < 0) {
            read.clear(); success = false; done = true;
            std::cerr << "Error: reads file does not look like a FASTQ file" << std::endl;
            throw 1;
        }
        if(c == '\n' || c == '\r') {
            // Break at end of line, after consuming all \r's, \n's
            while(c == '\n' || c == '\r') {
                c = this->fb_->get();
                if(c < 0) {
                    read.clear(); success = false; done = true;
                    std::cerr << "Error: reads file does not look like a FASTQ file" << std::endl;
                    throw 1;
                }
            }
            line_num_ += 1;
            break;
        }
        id += c;
    }
    // fb_ now points just past the first character of a
    // sequence line, and c holds the first character
    //BTDnaString& sbuf = r->fowardSeq();
    std::string& sequence = read.fowardSeq();
    while(true){
        c = toupper(c);
        if(c < 0){
            read.clear(); success = false; done = true;
            std::cerr << "Error: reads file does not look like a FASTQ file" << std::endl;
            throw 1;
        }
        else{
            if(c == '\n' || c == '\r') {
                // Break at end of line, after consuming all \r's, \n's
                while(c == '\n' || c == '\r') {
                    c = this->fb_->get();
                    if(c < 0) {
                        read.clear(); success = false; done = true;
                        std::cerr << "Error: reads file does not look like a FASTQ file" << std::endl;
                        throw 1;
                    }
                }
                line_num_ += 1;
                break;
            }
            // convert N to A
            if(toupper(c) == 'N')
                c = 'A';
            sequence += c;
            c = this->fb_->get();
        }
    }
    assert_eq('+', c);
    // skip the option sequence at '+' line
    c = this->fb_->getPastNewline();
    line_num_ += 1;
    if(c <0){
        read.clear(); success = false; done = true;
        std::cerr << "Error: reads file does not look like a FASTQ file" << std::endl;
        throw 1;
    }
    string& qual = read.quality();
    while(true){
        qual += c;
        c = this->fb_->get();
        if(c == '\n' || c == '\r')
            break;
    }
    line_num_ += 1;
    success = true;
    assert(qual.length() == sequence.length());
    done = fb_->eof();
}
Exemple #9
0
static void test_list_ops_non_unique_seq()
{
  typedef typename Sequence::iterator iterator;

  Sequence ss;
  for(int i=0;i<10;++i){
    ss.push_back(i);
    ss.push_back(i);
    ss.push_front(i);
    ss.push_front(i);
  } /* 9988776655443322110000112233445566778899 */

  ss.unique();
  CHECK_EQUAL(
    ss,
    (9)(8)(7)(6)(5)(4)(3)(2)(1)(0)
    (1)(2)(3)(4)(5)(6)(7)(8)(9));

  iterator it=ss.begin();
  for(int j=0;j<9;++j,++it){} /* it points to o */

  Sequence ss2;
  ss2.splice(ss2.end(),ss,ss.begin(),it);
  ss2.reverse();
  ss.merge(ss2);
  CHECK_EQUAL(
    ss,
    (0)(1)(1)(2)(2)(3)(3)(4)(4)(5)(5)
    (6)(6)(7)(7)(8)(8)(9)(9));

  ss.unique(same_integral_div<3>());
  CHECK_EQUAL(ss,(0)(3)(6)(9));

  ss.unique(same_integral_div<1>());
  CHECK_EQUAL(ss,(0)(3)(6)(9));

  /* testcases for bugs reported at
   * http://lists.boost.org/boost-users/2006/09/22604.php
   */
  {
    Sequence ss,ss2;
    ss.push_back(0);
    ss2.push_back(0);
    ss.splice(ss.end(),ss2,ss2.begin());
    CHECK_EQUAL(ss,(0)(0));
    BOOST_TEST(ss2.empty());

    ss.clear();
    ss2.clear();
    ss.push_back(0);
    ss2.push_back(0);
    ss.splice(ss.end(),ss2,ss2.begin(),ss2.end());
    CHECK_EQUAL(ss,(0)(0));
    BOOST_TEST(ss2.empty());

    ss.clear();
    ss2.clear();
    ss.push_back(0);
    ss2.push_back(0);
    ss.merge(ss2);
    CHECK_EQUAL(ss,(0)(0));
    BOOST_TEST(ss2.empty());

    typedef typename Sequence::value_type value_type;
    ss.clear();
    ss2.clear();
    ss.push_back(0);
    ss2.push_back(0);
    ss.merge(ss2,std::less<value_type>());
    CHECK_EQUAL(ss,(0)(0));
    BOOST_TEST(ss2.empty());
  }
}
Exemple #10
0
int main (int argc, char **argv)
{
   if (argc < 5)
   {
      fprintf (stderr, "template inputfile.gz colsize bound\n");
      exit (1);
   }
   FILE *fp = NULL;
   if ((fp = fopen(*(argv+1),"r")) == NULL)
   {
      fprintf (stderr, "Couldn't open %s\n", *(argv+1));
      exit (1);
   }
   PoolAlloc ac(256, 1000000);
   std::list<tmpl*> tmpls;
   char buf[BUFSIZE];
   while (fgets(buf, BUFSIZE, fp) != NULL)
   {
      MyUtil::chomp(buf);
      if (MyUtil::IsCommentOut(buf))
      {
         continue;
      }
      tmpl *t = new tmpl(buf, &ac);
      tmpls.push_back(t);
   }

   int fd = open(*(argv+2), O_RDONLY);
   if (fd < 0)
   {
      fprintf (stderr, "Couldn't open %s\n",*(argv+2));
      exit (1);
   }
   gzFile input = gzdopen(fd, GZ_MODE);
   if (input == NULL)
   {
      fprintf (stderr, "failed to gzdopen\n");
      exit (1);
   }

   int colsize = 0;
   sscanf (*(argv+3),"%d",&colsize);
   int bound = 0;
   sscanf (*(argv+4),"%d",&bound);

   Sequence sq;
   sq.setColSize(colsize);
   sq.init();

   Dic features(&ac, CountUp);

   while (gzgets(input, buf, BUFSIZE) != NULL)
   {
      MyUtil::chomp(buf);
      if (MyUtil::IsEOS(buf))
      {
         int size = (int)sq.getRowSize();
         std::list<tmpl*>::iterator it = tmpls.begin();
         for (; it != tmpls.end(); it++)
         {
            for (int i = 0; i < size; i++)
            {
               char *feature = (*it)->expand(&sq,i);
               //fprintf(stdout, "%s\n",feature);
               features.insert(feature);
               ac.release(feature);
            }
         }
         sq.clear();
         continue;
      }
      sq.push(buf);
   }

   nodeptr nil = features.getnil();
   for (int i = HASHSIZE-1; i >= 0; i--)
   {
      nodeptr *p = features.table+i;
      if (*p != nil)
      {
         recalldump(nil, (*p)->left, bound);
         recalldump(nil, (*p)->right, bound);
      }
   }

   if (gzclose(input) != Z_OK)
   {
      fprintf (stderr,"gzclose failed\n");
      exit (1);
   }
   return 0;
}