void Cnflearn::extfeature() { FILE *fp = NULL; if ((fp = fopen(this->corpus.c_str(),"r")) == NULL) { fprintf (stderr,"Couln't open %s\n", this->corpus.c_str()); exit (1); } Sequence sq; sq.setColSize(this->sqcolsize); sq.setAllocSize(this->sqallocsize); sq.setArraySize(this->sqarraysize); sq.init(); this->instance = 0; while(feof(fp) == 0) { MyUtil::sqread(fp,&sq,CNF_BUFSIZE); if (sq.getRowSize() == 0) { continue; } this->extlabel(&sq); this->extract(&sq); this->instance++; sq.clear(); } fclose(fp); }
void Cnflearn::learn(unsigned int iter, unsigned int reg) { if (!this->valid) { fprintf (stderr, "It's not initialized\n"); exit(1); } AllocMemdiscard cache(this->cachesize); Sequence sq; sq.setColSize(this->sqcolsize); sq.setAllocSize(this->sqallocsize); sq.setArraySize(this->sqarraysize); sq.init(); int t = 0; for (unsigned int i = 0; i < iter; i++) { FILE *fp = NULL; if ((fp = fopen(this->corpus.c_str(), "r")) == NULL) { fprintf (stderr, "Couldn't open %s\n", this->corpus.c_str()); exit (1); } while (feof(fp) == 0) { MyUtil::sqread(fp, &sq, CNF_BUFSIZE); if (sq.getRowSize() == 0) { continue; } this->decay(t++); this->update(&sq, &cache, reg); sq.clear(); cache.reset(); } fclose(fp); this->lreport(i); } }
int main (int argc, char **argv) { if (argc < 5) { fprintf (stderr, "template inputfile.gz colsize bound\n"); exit (1); } FILE *fp = NULL; if ((fp = fopen(*(argv+1),"r")) == NULL) { fprintf (stderr, "Couldn't open %s\n", *(argv+1)); exit (1); } PoolAlloc ac(256, 1000000); std::list<tmpl*> tmpls; char buf[BUFSIZE]; while (fgets(buf, BUFSIZE, fp) != NULL) { MyUtil::chomp(buf); if (MyUtil::IsCommentOut(buf)) { continue; } tmpl *t = new tmpl(buf, &ac); tmpls.push_back(t); } int fd = open(*(argv+2), O_RDONLY); if (fd < 0) { fprintf (stderr, "Couldn't open %s\n",*(argv+2)); exit (1); } gzFile input = gzdopen(fd, GZ_MODE); if (input == NULL) { fprintf (stderr, "failed to gzdopen\n"); exit (1); } int colsize = 0; sscanf (*(argv+3),"%d",&colsize); int bound = 0; sscanf (*(argv+4),"%d",&bound); Sequence sq; sq.setColSize(colsize); sq.init(); Dic features(&ac, CountUp); while (gzgets(input, buf, BUFSIZE) != NULL) { MyUtil::chomp(buf); if (MyUtil::IsEOS(buf)) { int size = (int)sq.getRowSize(); std::list<tmpl*>::iterator it = tmpls.begin(); for (; it != tmpls.end(); it++) { for (int i = 0; i < size; i++) { char *feature = (*it)->expand(&sq,i); //fprintf(stdout, "%s\n",feature); features.insert(feature); ac.release(feature); } } sq.clear(); continue; } sq.push(buf); } nodeptr nil = features.getnil(); for (int i = HASHSIZE-1; i >= 0; i--) { nodeptr *p = features.table+i; if (*p != nil) { recalldump(nil, (*p)->left, bound); recalldump(nil, (*p)->right, bound); } } if (gzclose(input) != Z_OK) { fprintf (stderr,"gzclose failed\n"); exit (1); } return 0; }