Ejemplo n.º 1
0
bool DecoderFeatureIndex::openFromArray(const char *ptr, size_t size) {
  unsigned int version_ = 0;
  const char *end = ptr + size;
  read_static<unsigned int>(&ptr, &version_);

  CHECK_FALSE(version_ / 100 == version / 100)
      << "model version is different: " << version_
      << " vs " << version;
  int type = 0;
  read_static<int>(&ptr, &type);
  read_static<double>(&ptr, &cost_factor_);
  read_static<unsigned int>(&ptr, &maxid_);
  read_static<unsigned int>(&ptr, &xsize_);

  unsigned int dsize = 0;
  read_static<unsigned int>(&ptr, &dsize);

  unsigned int y_str_size;
  read_static<unsigned int>(&ptr, &y_str_size);
  const char *y_str = read_ptr(&ptr, y_str_size);
  size_t pos = 0;
  while (pos < y_str_size) {
    y_.push_back(y_str + pos);
    while (y_str[pos++] != '\0') {}
  }

  unsigned int tmpl_str_size;
  read_static<unsigned int>(&ptr, &tmpl_str_size);
  const char *tmpl_str = read_ptr(&ptr, tmpl_str_size);
  pos = 0;
  while (pos < tmpl_str_size) {
    const char *v = tmpl_str + pos;
    if (v[0] == '\0') {
      ++pos;
    } else if (v[0] == 'U') {
      unigram_templs_.push_back(v);
    } else if (v[0] == 'B') {
      bigram_templs_.push_back(v);
    } else {
      CHECK_FALSE(true) << "unknown type: " << v;
    }
    while (tmpl_str[pos++] != '\0') {}
  }

  make_templs(unigram_templs_, bigram_templs_, &templs_);

  da_.set_array(const_cast<char *>(ptr));
  ptr += dsize;

  alpha_float_ = reinterpret_cast<const float *>(ptr);
  ptr += sizeof(alpha_float_[0]) * maxid_;

  CHECK_FALSE(ptr == end) << "model file is broken.";

  return true;
}
Ejemplo n.º 2
0
 void shared_blk::compact( void ) {
   int len = length();
   memmove( static_cast< uint8_t* >(_blk->ptr()) , 
       read_ptr() , len );
   _read = 0;
   _write = len;
 }
Ejemplo n.º 3
0
bool CharProperty::open(const char *filename) {
  std::ostringstream error;
  CHECK_FALSE(cmmap_->open(filename, "r"));

  const char *ptr = cmmap_->begin();
  unsigned int csize;
  read_static<unsigned int>(&ptr, csize);

  size_t fsize = sizeof(unsigned int) +
      (32 * csize) + sizeof(unsigned int) * 0xffff;

  CHECK_FALSE(fsize == cmmap_->size())
      << "invalid file size: " << filename;

  clist_.clear();
  for (unsigned int i = 0; i < csize; ++i) {
    const char *s = read_ptr(&ptr, 32);
    clist_.push_back(s);
  }

  map_ = reinterpret_cast<const CharInfo *>(ptr);

  return true;
}
Ejemplo n.º 4
0
 int shared_blk::read( void* p , int sz ) {
   int read_sz = std::min( sz , static_cast<int>(length()));
   memcpy( p , read_ptr() , read_sz );
   read_skip( read_sz );
   return read_sz;
 }
Ejemplo n.º 5
0
	unsigned char *write_ptr()
	{
		return m_writable ? const_cast<unsigned char *>(read_ptr()) : nullptr;
	}
Ejemplo n.º 6
0
NickInfo *load_nick(dbFILE *f, int ver)
{
    NickInfo *ni;
    int32 tmp32;
    int i;

    ni = scalloc(sizeof(NickInfo), 1);
    SAFE(read_buffer(ni->nick, f));
    SAFE(read_buffer(ni->pass, f));
    SAFE(read_string(&ni->url, f));
    SAFE(read_string(&ni->email, f));
    SAFE(read_string(&ni->last_usermask, f));
    if (!ni->last_usermask)
	ni->last_usermask = sstrdup("@");
    SAFE(read_string(&ni->last_realname, f));
    if (!ni->last_realname)
	ni->last_realname = sstrdup("");
    SAFE(read_string(&ni->last_quit, f));
    SAFE(read_int32(&tmp32, f));
    ni->time_registered = tmp32;
    SAFE(read_int32(&tmp32, f));
    ni->last_seen = tmp32;
    SAFE(read_int16(&ni->status, f));
    ni->status &= ~NS_TEMPORARY;
#ifdef USE_ENCRYPTION
    if (!(ni->status & (NS_ENCRYPTEDPW | NS_VERBOTEN))) {
	if (debug)
	    log("debug: %s: encrypting password for `%s' on load",
		s_NickServ, ni->nick);
	if (encrypt_in_place(ni->pass, PASSMAX) < 0)
	    fatal("%s: Can't encrypt `%s' nickname password!",
		  s_NickServ, ni->nick);
	ni->status |= NS_ENCRYPTEDPW;
    }
#else
    if (ni->status & NS_ENCRYPTEDPW) {
	/* Bail: it makes no sense to continue with encrypted
	 * passwords, since we won't be able to verify them */
	fatal("%s: load database: password for %s encrypted "
	      "but encryption disabled, aborting",
	      s_NickServ, ni->nick);
    }
#endif
    /* Store the _name_ of the link target in ni->link for now;
     * we'll resolve it after we've loaded all the nicks */
    SAFE(read_string((char **)&ni->link, f));
    /* We actually recalculate link and channel counts later, but leave
     * them in for now to avoid changing the data file format */
    SAFE(read_int16(&ni->linkcount, f));
    if (ni->link) {
	SAFE(read_int16(&ni->channelcount, f));
	/* No other information saved for linked nicks, since
	 * they get it all from their link target */
	ni->channelmax = CSMaxReg;
	ni->language = DEF_LANGUAGE;
    } else {
	SAFE(read_int32(&ni->flags, f));
	if (!NSAllowKillImmed)
	    ni->flags &= ~NI_KILL_IMMED;
	if (ver >= 9) {
	    read_ptr((void **)&ni->suspendinfo, f);
	} else if (ver == 8 && (ni->flags & 0x10000000)) {
	    /* In version 8, 0x10000000 was NI_SUSPENDED */
	    ni->suspendinfo = (SuspendInfo *)1;
	}
	if (ni->suspendinfo) {
	    SuspendInfo *si = smalloc(sizeof(*si));
	    SAFE(read_buffer(si->who, f));
	    SAFE(read_string(&si->reason, f));
	    SAFE(read_int32(&tmp32, f));
	    si->suspended = tmp32;
	    SAFE(read_int32(&tmp32, f));
	    si->expires = tmp32;
	    ni->suspendinfo = si;
	}
	SAFE(read_int16(&ni->accesscount, f));
	if (ni->accesscount) {
	    char **access;
	    access = smalloc(sizeof(char *) * ni->accesscount);
	    ni->access = access;
	    for (i = 0; i < ni->accesscount; i++, access++)
		SAFE(read_string(access, f));
	}
	SAFE(read_int16(&ni->channelcount, f));
	SAFE(read_int16(&ni->channelmax, f));
	if (ver <= 8) {
	    /* Fields not initialized or updated properly */
	    /* These will be updated by load_cs_dbase() */
	    ni->channelcount = 0;
	    if (ver == 5)
		ni->channelmax = CSMaxReg;
	}
	SAFE(read_int16(&ni->language, f));
	if (!langtexts[ni->language])
	    ni->language = DEF_LANGUAGE;
    }
    /* Link and channel counts are recalculated later */
    ni->linkcount = 0;
    ni->channelcount = 0;
    ni->historycount = 0;
    return ni;
}
Ejemplo n.º 7
0
//加载model文件
//返回值<0 为出错,=0为正常
int CrfModel::load_model(const char *filename, float this_path_factor) {
    //CHECK_FALSE(mmap_.open(filename1)) << mmap_.what();
    //使用mmap读入模型文件
    //原来使用mmap加载双数组和alpha数组,现在改为全部复制到model的内存区域中
    Mmap <char> mmap_;
    FILE* fp = fopen(filename, "r");
    if(fp == NULL) {
        ul_writelog(UL_LOG_FATAL, "[%s]: open model file[%s] failed! Error[%m]", __func__, filename);
        return -1;
    }
    fclose(fp);

    if (!mmap_.open(filename)) {
        ul_writelog(UL_LOG_FATAL, "[%s]: open filename[%s] failed", __func__, filename);
        return -1;
    }

    char *ptr = mmap_.begin();
    //读入版本号
    read_static<unsigned int>(&ptr, &version);

    ul_writelog(UL_LOG_TRACE, "[%s]: VERSION = %d", __func__,version);
    //CHECK_FALSE(version_ / 100 == version / 100)
    //	<< "model version is different: " << version_
    //	<< " vs " << version << " : " << filename1;

    int type = 0;
    read_static<int>(&ptr, &type);
    read_static<double>(&ptr, &cost_factor_);
    read_static<unsigned int>(&ptr, &maxid_);
    read_static<unsigned int>(&ptr, &xsize_);

    unsigned int dsize = 0;
    read_static<unsigned int>(&ptr, &dsize);

    //使用字符串数组代替vector<string>保存类别的字符表示y_
    unsigned int y_str_size;
    read_static<unsigned int>(&ptr, &y_str_size);
    char *y_str = read_ptr(&ptr, y_str_size);
    size_t pos = 0;

    //减少y_占用内存
    /*
    ysize_ = 0;
    while (pos < y_str_size) {
    	//y_.push_back(y_str + pos);
    	strncpy(y_[ysize_], y_str + pos, CRF_MAX_WORD_LEN);
    	ysize_++;
    	while (y_str[pos++] != '\0') {}
    }
    */

    //先计算YSIZE和最大yname的长度
    ysize_ = 0;
    max_yname_len = 0;
    while(pos < y_str_size) {
        ysize_++;
        unsigned int yname_len = 0;
        while(y_str[pos++] != '\0') {
            yname_len++;
        }
        if(yname_len > max_yname_len) {
            max_yname_len = yname_len;
        }
    }
    //分配y_用来存放yname
    y_ = (char **)calloc(ysize_, sizeof(char *));
    if(NULL == y_) {
        ul_writelog(UL_LOG_FATAL, "[%s]: fail to malloc for yname", __func__);
        return -1;
    }
    for(unsigned int i = 0; i < ysize_; i++) {
        y_[i] = (char*)calloc(max_yname_len + 1, sizeof(char));
        if(NULL == y_[i]) {
            ul_writelog(UL_LOG_FATAL, "[%s]: fail to malloc for yname[%d]", __func__, i);
            return -1;
        }
    }
    //重新扫描复制yname
    pos = 0;
    unsigned int ycount = 0;
    while(pos < y_str_size) {
        bzero(y_[ycount], max_yname_len + 1);
        strncpy(y_[ycount], y_str + pos, max_yname_len);
        while(y_str[pos++] != '\0') {
        }
        ycount++;
    }

    //debug
    /*
    for(unsigned int i = 0; i < ysize_; i++){
    fprintf(stderr, "y[%d]=[%s]\n", i, y_[i]);
    	}
    */

    //	fprintf(stderr,"ysize = %d\n",ysize_);
    // load unigram templs and bigram templs
    //模型使用字符串数组代替vector<string>存储,并为减少函数调用开销,CrfTag在标注中获得模板指针
    unigram_templs_num = 0;
    unigram_templs_ = (char **)calloc(MAX_TEMPLS_NUM, sizeof(char *));
    if(NULL == unigram_templs_) {
        ul_writelog(UL_LOG_FATAL, "[%s]: fail to malloc for unigram templs", __func__);
        return -1;
    }
    bigram_templs_num = 0;
    bigram_templs_ = (char **)calloc(MAX_TEMPLS_NUM, sizeof(char *));
    if(NULL == bigram_templs_) {
        ul_writelog(UL_LOG_FATAL, "[%s]: fail to malloc for bigram templs", __func__);
        return -1;
    }

    unsigned int tmpl_str_size;
    read_static<unsigned int>(&ptr, &tmpl_str_size);
    char *tmpl_str = read_ptr(&ptr, tmpl_str_size);
    pos = 0;
    while (pos < tmpl_str_size) {
        char *v = tmpl_str + pos;
        if (v[0] == '\0') {
            ++pos;
        } else if (v[0] == 'U') {
            if(unigram_templs_num > MAX_TEMPLS_NUM) {
                ul_writelog(UL_LOG_FATAL, "[%s]: too many unigram templs, unigram templs num=%d", __func__, unigram_templs_num);
                return -1;
            }
            unigram_templs_[unigram_templs_num] = (char *)calloc(MAX_TEMPLS_LEN, sizeof(char));
            if(NULL == unigram_templs_[unigram_templs_num]) {
                ul_writelog(UL_LOG_FATAL, "[%s]: fail to malloc for unigram templs", __func__);
                return -1;
            }
            strncpy(unigram_templs_[unigram_templs_num], v, MAX_TEMPLS_LEN);
            ul_writelog(UL_LOG_TRACE, "[%s]: unigram_templs[%d](%s)", __func__, unigram_templs_num, unigram_templs_[unigram_templs_num]);
            unigram_templs_num++;
        } else if (v[0] == 'B') {
            if(bigram_templs_num > MAX_TEMPLS_NUM) {
                ul_writelog(UL_LOG_FATAL, "[%s]: too many bigram templs, bigram templs num=%d", __func__, bigram_templs_num);
                return -1;
            }
            bigram_templs_[bigram_templs_num] = (char *)calloc(MAX_TEMPLS_LEN, sizeof(char));
            if(NULL == bigram_templs_[bigram_templs_num]) {
                ul_writelog(UL_LOG_FATAL, "[%s]: fail to malloc for bigram templs", __func__);
                return -1;
            }
            strncpy(bigram_templs_[bigram_templs_num], v, MAX_TEMPLS_LEN);
            ul_writelog(UL_LOG_TRACE, "[%s]: bigram_templs[%d](%s)", __func__, bigram_templs_num, bigram_templs_[bigram_templs_num]);
            bigram_templs_num++;
        } else {
            //CHECK_FALSE(true) << "unknown type: " << v;
        }
        while (tmpl_str[pos++] != '\0') {}
    }

    //检查是否为单一B模板
    if( bigram_templs_num != 1 || strcmp(bigram_templs_[0],"B") !=0 )
    {
        ul_writelog(UL_LOG_WARNING,"this is not a single bigram templetes.\n");
        return -1;
    }
    //复制双数组da
    da_mem_ = (void*)malloc(dsize);
    if(da_mem_ == NULL) {
        ul_writelog(UL_LOG_WARNING, "[%s]: malloc for da memory fail.", __func__);
        return -1;
    }
    memcpy(da_mem_, ptr, dsize);
    ul_writelog(UL_LOG_DEBUG, "[%s]: copy double array to memory, dsize=%d", __func__, dsize);

    da_.set_array(da_mem_);
    ptr += dsize;
    //复制alpha数组,从float转为int,乘上CONV_INT_FACTOR。该因子较大时基本无精度损失
    float *alpha_float_ = reinterpret_cast<float *>(ptr);
    ptr += sizeof(alpha_float_[0]) * maxid_;

    alpha_int_ = (int*)calloc(maxid_ + 1, sizeof(int));
    if(alpha_int_ == NULL) {
        ul_writelog(UL_LOG_WARNING, "[%s]: malloc for alpha int array fail.maxid=%d", __func__, maxid_);
        return -1;
    }
    int conv_alpha_int;
    for(unsigned int i = 0; i < maxid_; i++) {
        conv_alpha_int = (int)(alpha_float_[i] * CONV_INT_FACTOR);
        alpha_int_[i] = conv_alpha_int;
        //		fprintf(stderr,"<<>>%d\n",alpha_int_[i]);
    }
    ul_writelog(UL_LOG_DEBUG, "[%s]: conv alpha from float to int, maxid=%d", __func__, maxid_);
    if( version == 100 )
    {
        if (ptr != mmap_.end())
        {
            ul_writelog(UL_LOG_FATAL, "[%s]: model file is broken: %s", __func__, filename);
            return -1;
        }
    }
    else
    {
        //my add
        read_static<int>(&ptr, &dan_size); //读取单一模板的辅助数据结构的size
        read_static<int>(&ptr, &zu_size);   //读取组合模板的辅助数据结构的size
        dan_array = (int*)calloc(dan_size,sizeof(int));
        zu_array = (int*)calloc(zu_size,sizeof(int));
        if( dan_array == NULL || zu_array == NULL)
        {
            ul_writelog(UL_LOG_FATAL, "dan_array or zu_array is NULL");
            return -1;
        }
        ul_writelog(UL_LOG_DEBUG, "%d %d\n",dan_size,zu_size);
        memcpy(dan_array,ptr,dan_size*sizeof(int));
        ptr+=dan_size*sizeof(int);
        memcpy(zu_array,ptr,zu_size*sizeof(int));
        ptr+=zu_size*sizeof(int);
        if (ptr != mmap_.end())
        {
            ul_writelog(UL_LOG_FATAL, "[%s]: model file is broken: %s", __func__, filename);
            return -1;
        }
    }
    mmap_.close(); //释放mmap占用内存
    b_templs_alpha_p = get_b_templs_start(); //获取B指针
    if( NULL == b_templs_alpha_p )
    {
        ul_writelog(UL_LOG_WARNING,"b_templs_alpha_p is NULL.\n");
        return -1;
    }

    path_factor = this_path_factor;
    for(unsigned int i = 0; i < ysize_ * ysize_; i++) {
        *(b_templs_alpha_p + i) = int(*(b_templs_alpha_p + i) * path_factor * cost_factor_);
        /*
        fprintf(stderr, "%d ", *(b_templs_alpha_p + i));
        if((i+1) % ysize_ == 0){
        	fprintf(stderr, "\n");
        }
        */
    }
    ul_writelog(UL_LOG_DEBUG, "[%s]: set path factor to %f", __func__, path_factor);
    model_init_stat = MODEL_INIT_FINISH;
    return 0;
}
Ejemplo n.º 8
0
template <class T> static inline void read_static(char **ptr,
        T *value) {
    char *r = read_ptr(ptr, sizeof(T));
    memcpy(value, r, sizeof(T));
}