Exemple #1
0
void Cnflearn::extfeature()
{
   FILE *fp = NULL;
   if ((fp = fopen(this->corpus.c_str(),"r")) == NULL)
   {
      fprintf (stderr,"Couln't open %s\n", this->corpus.c_str());
      exit (1);
   }
   Sequence sq;
   sq.setColSize(this->sqcolsize);
   sq.setAllocSize(this->sqallocsize);
   sq.setArraySize(this->sqarraysize);
   sq.init();
   this->instance = 0;
   while(feof(fp) == 0)
   {
      MyUtil::sqread(fp,&sq,CNF_BUFSIZE);
      if (sq.getRowSize() == 0)
      {
         continue;
      }
      this->extlabel(&sq);
      this->extract(&sq);
      this->instance++;
      sq.clear();
   }
   fclose(fp);
}
Exemple #2
0
void Cnflearn::learn(unsigned int iter, unsigned int reg)
{
   if (!this->valid)
   {
      fprintf (stderr, "It's not initialized\n");
      exit(1);
   }
   AllocMemdiscard cache(this->cachesize);
   Sequence sq;
   sq.setColSize(this->sqcolsize);
   sq.setAllocSize(this->sqallocsize);
   sq.setArraySize(this->sqarraysize);
   sq.init();
   int t = 0;
   for (unsigned int i = 0; i < iter; i++)
   {
      FILE *fp = NULL;
      if ((fp = fopen(this->corpus.c_str(), "r")) == NULL)
      {
         fprintf (stderr, "Couldn't open %s\n", this->corpus.c_str());
         exit (1);
      }
      while (feof(fp) == 0)
      {
         MyUtil::sqread(fp, &sq, CNF_BUFSIZE);
         if (sq.getRowSize() == 0)
         {
            continue;
         }
         this->decay(t++);
         this->update(&sq, &cache, reg);
         sq.clear();
         cache.reset();
      }
      fclose(fp);
      this->lreport(i);
   }
}
Exemple #3
0
int main (int argc, char **argv)
{
   if (argc < 5)
   {
      fprintf (stderr, "template inputfile.gz colsize bound\n");
      exit (1);
   }
   FILE *fp = NULL;
   if ((fp = fopen(*(argv+1),"r")) == NULL)
   {
      fprintf (stderr, "Couldn't open %s\n", *(argv+1));
      exit (1);
   }
   PoolAlloc ac(256, 1000000);
   std::list<tmpl*> tmpls;
   char buf[BUFSIZE];
   while (fgets(buf, BUFSIZE, fp) != NULL)
   {
      MyUtil::chomp(buf);
      if (MyUtil::IsCommentOut(buf))
      {
         continue;
      }
      tmpl *t = new tmpl(buf, &ac);
      tmpls.push_back(t);
   }

   int fd = open(*(argv+2), O_RDONLY);
   if (fd < 0)
   {
      fprintf (stderr, "Couldn't open %s\n",*(argv+2));
      exit (1);
   }
   gzFile input = gzdopen(fd, GZ_MODE);
   if (input == NULL)
   {
      fprintf (stderr, "failed to gzdopen\n");
      exit (1);
   }

   int colsize = 0;
   sscanf (*(argv+3),"%d",&colsize);
   int bound = 0;
   sscanf (*(argv+4),"%d",&bound);

   Sequence sq;
   sq.setColSize(colsize);
   sq.init();

   Dic features(&ac, CountUp);

   while (gzgets(input, buf, BUFSIZE) != NULL)
   {
      MyUtil::chomp(buf);
      if (MyUtil::IsEOS(buf))
      {
         int size = (int)sq.getRowSize();
         std::list<tmpl*>::iterator it = tmpls.begin();
         for (; it != tmpls.end(); it++)
         {
            for (int i = 0; i < size; i++)
            {
               char *feature = (*it)->expand(&sq,i);
               //fprintf(stdout, "%s\n",feature);
               features.insert(feature);
               ac.release(feature);
            }
         }
         sq.clear();
         continue;
      }
      sq.push(buf);
   }

   nodeptr nil = features.getnil();
   for (int i = HASHSIZE-1; i >= 0; i--)
   {
      nodeptr *p = features.table+i;
      if (*p != nil)
      {
         recalldump(nil, (*p)->left, bound);
         recalldump(nil, (*p)->right, bound);
      }
   }

   if (gzclose(input) != Z_OK)
   {
      fprintf (stderr,"gzclose failed\n");
      exit (1);
   }
   return 0;
}