Esempio n. 1
0
bool EncoderFeatureIndex::openTagSet(const char *filename) {
  std::ifstream ifs(WPATH(filename));
  CHECK_FALSE(ifs) << "no such file or directory: " << filename;

  scoped_fixed_array<char, 8192> line;
  scoped_fixed_array<char *, 1024> column;
  size_t max_size = 0;
  std::set<std::string> candset;

  while (ifs.getline(line.get(), line.size())) {
    if (line[0] == '\0' || line[0] == ' ' || line[0] == '\t') {
      continue;
    }
    const size_t size = tokenize2(line.get(), "\t ",
                                  column.get(), column.size());
    if (max_size == 0) {
      max_size = size;
    }
    CHECK_FALSE(max_size == size)
        << "inconsistent column size: "
        << max_size << " " << size << " " << filename;
    xsize_ = size - 1;
    candset.insert(column[max_size-1]);
  }

  y_.clear();
  for (std::set<std::string>::iterator it = candset.begin();
       it != candset.end(); ++it) {
    y_.push_back(*it);
  }

  ifs.close();

  return true;
}
Esempio n. 2
0
 bool TaggerImpl::add(const char* line) {
   const char* column[8192];
   char *p = feature_index_->strdup(line);
   size_t size = tokenize2(p, "\t ", column, sizeof(column));
   if (!add2(size, column, false)) return false;
   return true;
 }
Esempio n. 3
0
void readFile(){
	proc *process = (proc *) malloc(sizeof(proc));
	char line[256] = {0};
	FILE *fp = fopen("processes_q5.txt","r");

	// error checking
	if(fp==NULL){
		perror("Error opening file.\n");
		return;
	}

	for(int i=0; i<10; i++){
		fgets(line,256,fp);
		for(int j=0;j<256;j++){process->name[j] = 0;} // zero char array
		char **tokenized = tokenize2(line,", ");
		strcpy(process->name, tokenized[0]);
		// assign integer values from file		
		process->priority = atoi(tokenized[1]);
		process->pid = 0;
		process->runtime = atoi(tokenized[2]);
		
		// push process onto queue
		push(*process);

		// if queue contains no processes
		if(first==NULL){
			first = last;
		}
	}

	fclose(fp);
}
bool POSIDGenerator::open(const char *filename,
                          Iconv *iconv) {
  std::ifstream ifs(filename);
  if (!ifs) {
    std::cerr << filename
              << " is not found. minimum setting is used" << std::endl;
    rewrite_.resize(1);
    rewrite_.back().set_pattern("*", "1");
    return true;
  }

  std::string line;
  char *col[2];
  while (std::getline(ifs, line)) {
    if (iconv) iconv->convert(&line);
    const size_t n = tokenize2(const_cast<char *>(line.c_str()),
                               " \t", col, 2);
    CHECK_DIE(n == 2) << "format error: " << line;
    for (char *p = col[1]; *p; ++p) {
      CHECK_DIE(*p >= '0' && *p <= '9') << "not a number: " << col[1];
    }
    rewrite_.resize(rewrite_.size() + 1);
    rewrite_.back().set_pattern(col[0], col[1]);
  }
  return true;
}
Esempio n. 5
0
bool POSIDGenerator::open(const char *filename,
                          Iconv *iconv) {
  scoped_ptr<std::istream> p_ist;

  const jma::DictUnit* dict = jma::JMA_Dictionary::instance()->getDict(filename);
  if(dict)
    p_ist.reset(new std::istrstream(dict->text_, dict->length_));
  else
    p_ist.reset(new std::ifstream(filename));

  if (!*p_ist) {
    std::cerr << filename
              << " is not found. minimum setting is used" << std::endl;
    rewrite_.resize(1);
    rewrite_.back().set_pattern("*", "1");
    return true;
  }

  std::string line;
  char *col[2];
  while (std::getline(*p_ist, line)) {
    if (iconv) iconv->convert(&line);
    const size_t n = tokenize2(const_cast<char *>(line.c_str()),
                               " \t", col, 2);
    CHECK_DIE(n == 2) << "format error: " << line;
    for (char *p = col[1]; *p; ++p) {
      CHECK_DIE(*p >= '0' && *p <= '9') << "not a number: " << col[1];
    }
    rewrite_.resize(rewrite_.size() + 1);
    rewrite_.back().set_pattern(col[0], col[1]);
  }
  return true;
}
Esempio n. 6
0
void readFile(queue** p1, int priority_filter){
  //if priority_filter = -1 -> load all processes
  //if priority_filter = 0  -> load processes with priority = 0
  //if priority_filter = 1  -> load all processes with priority != 0
  char buffer[CHAR_LENGTH] = {0};
  FILE *f1 = fopen("processes_q5.txt","r");
  if (f1 == NULL){
    perror("Error opening file\n");
    return;    
  }

  for (int i = 0; i < FILE_LENGTH; i++){
    fgets(buffer,CHAR_LENGTH, f1);
    proc *temp_proc = (proc *)  malloc(sizeof(proc));
    char **tokenized = tokenize2(buffer,", ");
    //priority is in tokenized[1]
  if ((atoi(tokenized[1]) == 0 && priority_filter == 0)|| //only priority == 0
  (atoi(tokenized[1]) != 0 && priority_filter == 1)|| //only priority != 0
  (priority_filter == -1)) //all
  {
      strcpy(temp_proc->name, tokenized[0]); //name
      temp_proc->priority = atoi(tokenized[1]);//priority
      temp_proc->pid = 0;//pid
      temp_proc->address = 0; //address
      temp_proc->memory = atoi(tokenized[2]); //memory
      temp_proc->runtime = atoi(tokenized[3]);//runtime

      //push process onto queue
      push(p1,*temp_proc);
    }
  }
  fclose(f1);
}
Esempio n. 7
0
/*! @decl array(array(string)|string) tokenize(string code)
 *!
 *!   Tokenize a string of Pike tokens.
 *!
 *! @returns
 *!   Returns an array with Pike-level tokens and the remainder (a
 *!   partial token), if any.
 */
static void f_tokenize( INT32 args )
{
  struct array *res;
  struct pike_string *left_s = NULL; /* Make gcc happy. */
  struct pike_string *data;
  int left;
  ONERROR tmp;

  get_all_args("tokenize", args, "%W", &data);

  if(!data->len)
  {
    pop_n_elems(args);
    push_empty_array();
    push_empty_string();
    f_aggregate(2);
    return;
  }

  res = allocate_array_no_init( 0, 128 );
  SET_ONERROR(tmp, do_free_arrayptr, &res);
  
  switch(data->size_shift)
  {
    case 0:
      left = tokenize0(&res, STR0(data), data->len);
      left_s = make_shared_binary_string0(STR0(data)+left, data->len-left);
      break;
    case 1:
      left = tokenize1(&res, STR1(data), data->len);
      left_s = make_shared_binary_string1(STR1(data)+left, data->len-left);
      break;
    case 2:
      left = tokenize2(&res,STR2(data), data->len);
      left_s = make_shared_binary_string2(STR2(data)+left, data->len-left);
      break;
#ifdef PIKE_DEBUG
    default:
      Pike_error("Unknown shift size %d.\n", data->size_shift);
#endif
  }

  UNSET_ONERROR(tmp);
  pop_n_elems(args);
  if (!res->size) {
    free_array(res);
    push_empty_array();
  }
  else
    push_array(res);
  push_string( left_s );
  f_aggregate( 2 );
}
Esempio n. 8
0
 static bool parseLevel(const char *level_str,
                        std::vector<int> *level) {
   char buf[BUF_SIZE];
   char *col[512];
   std::strncpy(buf, level_str, sizeof(buf));
   level->clear();
   size_t n = tokenize2(buf, "\t ", col, sizeof(col));
   for (size_t i = 0; i < n; ++i) {
     level->push_back(std::atoi(col[i]));
   }
   return true;
 }
Esempio n. 9
0
 static bool parseLevel(const char *level_str,
                        std::vector<int> *level) {
   scoped_fixed_array<char, BUF_SIZE> buf;
   scoped_fixed_array<char *, 512> col;
   std::strncpy(buf.get(), level_str, buf.size());
   level->clear();
   size_t n = tokenize2(buf.get(), "\t ", col.get(), col.size());
   for (size_t i = 0; i < n; ++i) {
     level->push_back(std::atoi(col[i]));
   }
   return true;
 }
bool FeatureIndex::openTemplate(const Param &param) {
  if (param.get<bool>("identity-template")) {
    unigram_templs_.clear();
    bigram_templs_.clear();
    unigram_templs_.push_back("U:%u");
    bigram_templs_.push_back("B:%r/%l");
    return true;
  }

  std::string filename = create_filename(param.get<std::string>("dicdir"),
                                         FEATURE_FILE);
  std::ifstream ifs(filename.c_str());
  CHECK_FALSE(ifs) << "no such file or directory: " << filename;

  char buf[BUF_SIZE];
  char *column[4];

  unigram_templs_.clear();
  bigram_templs_.clear();

  while (ifs.getline(buf, sizeof(buf))) {
    if (buf[0] == '\0' || buf[0] == '#' || buf[0] == ' ') continue;
    CHECK_FALSE(tokenize2(buf, "\t ", column, 2) == 2)
        << "format error: " <<filename;

    if (std::strcmp(column[0], "UNIGRAM") == 0)
      unigram_templs_.push_back(this->strdup(column[1]));
    else if (std::strcmp(column[0], "BIGRAM") == 0 )
      bigram_templs_.push_back(this->strdup(column[1]));
    else
      CHECK_FALSE(false) << "format error: " <<  filename;
  }

  // second, open rewrite rules
  filename = create_filename(param.get<std::string>("dicdir"),
                             REWRITE_FILE);
  rewrite_.open(filename.c_str());

  return true;
}
bool FeatureIndex::convert(const char* txtfile, const char *binfile) {
  std::ifstream ifs(txtfile);

  CHECK_DIE(ifs) << "no such file or directory: " << txtfile;

  char buf[BUF_SIZE];
  char *column[4];
  std::map<std::string, double> dic;

  while (ifs.getline(buf, sizeof(buf))) {
    CHECK_DIE(tokenize2(buf, "\t", column, 2) == 2)
        << "format error: " << buf;

    dic.insert(std::make_pair<std::string, double>
               (std::string(column[1]), atof(column[0]) ));
  }

  std::ofstream ofs(binfile, std::ios::out | std::ios::binary);
  CHECK_DIE(ofs) << "permission denied: " << binfile;

  std::vector<char *> key;
  unsigned int size = static_cast<unsigned int>(dic.size());
  ofs.write(reinterpret_cast<const char*>(&size), sizeof(unsigned int));

  for (std::map<std::string, double>::const_iterator
           it = dic.begin(); it != dic.end(); ++it) {
    key.push_back(const_cast<char*>(it->first.c_str()));
    ofs.write(reinterpret_cast<const char*>(&it->second), sizeof(double));
  }

  Darts::DoubleArray da;
  CHECK_DIE(da.build(key.size(), &key[0], 0, 0, 0) == 0)
      << "unkown error in building double array: " << binfile;

  ofs.write(reinterpret_cast<const char*>(da.array()),
            da.unit_size() * da.size());

  return true;
}
Esempio n. 12
0
std::vector<wxString> get_fields2(const std::string & line, const char delim){
	std::string tmp = line;
	if (iscntrl(tmp.back())) tmp.pop_back(); // remove Microsoft end-of-line
	return tokenize2(tmp, delim);
}
Esempio n. 13
0
bool CharProperty::compile(const char *cfile,
                           const char *ufile,
                           const char *ofile) {
  scoped_fixed_array<char, BUF_SIZE> line;
  scoped_fixed_array<char *, 512> col;
  size_t id = 0;
  std::vector<Range> range;
  std::map<std::string, CharInfo> category;
  std::vector<std::string> category_ary;
  std::ifstream ifs(WPATH(cfile));
  std::istringstream iss(CHAR_PROPERTY_DEF_DEFAULT);
  std::istream *is = &ifs;

  if (!ifs) {
    std::cerr << cfile
              << " is not found. minimum setting is used" << std::endl;
    is = &iss;
  }

  while (is->getline(line.get(), line.size())) {
    if (std::strlen(line.get()) == 0 || line[0] == '#') {
      continue;
    }
    const size_t size = tokenize2(line.get(), "\t ", col.get(), col.size());
    CHECK_DIE(size >= 2) << "format error: " << line.get();

    // 0xFFFF..0xFFFF hoge hoge hgoe #
    if (std::strncmp(col[0], "0x", 2) == 0) {
      std::string low = col[0];
      std::string high;
      size_t pos = low.find("..");

      if (pos != std::string::npos) {
        high = low.substr(pos + 2, low.size() - pos - 2);
        low  = low.substr(0, pos);
      } else {
        high = low;
      }

      Range r;
      r.low = atohex(low.c_str());
      r.high = atohex(high.c_str());

      CHECK_DIE(r.low >= 0 && r.low < 0xffff &&
                r.high >= 0 && r.high < 0xffff &&
                r.low <= r.high)
          << "range error: low=" << r.low << " high=" << r.high;

      for (size_t i = 1; i < size; ++i) {
        if (col[i][0] == '#') {
          break;  // skip comments
        }
        CHECK_DIE(category.find(std::string(col[i])) != category.end())
            << "category [" << col[i] << "] is undefined";
        r.c.push_back(col[i]);
      }
      range.push_back(r);
    } else {
      CHECK_DIE(size >= 4) << "format error: " << line.get();

      std::string key = col[0];
      CHECK_DIE(category.find(key) == category.end())
          << "category " << key << " is already defined";

      CharInfo c;
      std::memset(&c, 0, sizeof(c));
      c.invoke  = std::atoi(col[1]);
      c.group   = std::atoi(col[2]);
      c.length  = std::atoi(col[3]);
      c.default_type = id++;

      category.insert(std::pair<std::string, CharInfo>(key, c));
      category_ary.push_back(key);
    }
  }

  CHECK_DIE(category.size() < 18) << "too many categories(>= 18)";

  CHECK_DIE(category.find("DEFAULT") != category.end())
      << "category [DEFAULT] is undefined";

  CHECK_DIE(category.find("SPACE") != category.end())
      << "category [SPACE] is undefined";

  std::istringstream iss2(UNK_DEF_DEFAULT);
  std::ifstream ifs2(WPATH(ufile));
  std::istream *is2 = &ifs2;

  if (!ifs2) {
    std::cerr << ufile
              << " is not found. minimum setting is used." << std::endl;
    is2 = &iss2;
  }

  std::set<std::string> unk;
  while (is2->getline(line.get(), line.size())) {
    const size_t n = tokenizeCSV(line.get(), col.get(), 2);
    CHECK_DIE(n >= 1) << "format error: " << line.get();
    const std::string key = col[0];
    CHECK_DIE(category.find(key) != category.end())
        << "category [" << key << "] is undefined in " << cfile;
    unk.insert(key);
  }

  for (std::map<std::string, CharInfo>::const_iterator it = category.begin();
       it != category.end();
       ++it) {
    CHECK_DIE(unk.find(it->first) != unk.end())
        << "category [" << it->first << "] is undefined in " << ufile;
  }

  std::vector<CharInfo> table(0xffff);
  {
    std::vector<std::string> tmp;
    tmp.push_back("DEFAULT");
    const CharInfo c = encode(tmp, &category);
    std::fill(table.begin(), table.end(), c);
  }

  for (std::vector<Range>::const_iterator it = range.begin();
       it != range.end();
       ++it) {
    const CharInfo c = encode(it->c, &category);
    for (int i = it->low; i <= it->high; ++i) {
      table[i] = c;
    }
  }

  // output binary table
  {
    std::ofstream ofs(WPATH(ofile), std::ios::binary|std::ios::out);
    CHECK_DIE(ofs) << "permission denied: " << ofile;

    unsigned int size = static_cast<unsigned int>(category.size());
    ofs.write(reinterpret_cast<const char*>(&size), sizeof(size));
    for (std::vector<std::string>::const_iterator it = category_ary.begin();
         it != category_ary.end();
         ++it) {
      char buf[32];
      std::fill(buf, buf + sizeof(buf), '\0');
      std::strncpy(buf, it->c_str(), sizeof(buf) - 1);
      ofs.write(reinterpret_cast<const char*>(buf), sizeof(buf));
    }
    ofs.write(reinterpret_cast<const char*>(&table[0]),
              sizeof(CharInfo) * table.size());
    ofs.close();
  }

  return true;
}