bool EncoderFeatureIndex::openTagSet(const char *filename) { std::ifstream ifs(WPATH(filename)); CHECK_FALSE(ifs) << "no such file or directory: " << filename; scoped_fixed_array<char, 8192> line; scoped_fixed_array<char *, 1024> column; size_t max_size = 0; std::set<std::string> candset; while (ifs.getline(line.get(), line.size())) { if (line[0] == '\0' || line[0] == ' ' || line[0] == '\t') { continue; } const size_t size = tokenize2(line.get(), "\t ", column.get(), column.size()); if (max_size == 0) { max_size = size; } CHECK_FALSE(max_size == size) << "inconsistent column size: " << max_size << " " << size << " " << filename; xsize_ = size - 1; candset.insert(column[max_size-1]); } y_.clear(); for (std::set<std::string>::iterator it = candset.begin(); it != candset.end(); ++it) { y_.push_back(*it); } ifs.close(); return true; }
bool TaggerImpl::add(const char* line) { const char* column[8192]; char *p = feature_index_->strdup(line); size_t size = tokenize2(p, "\t ", column, sizeof(column)); if (!add2(size, column, false)) return false; return true; }
void readFile(){ proc *process = (proc *) malloc(sizeof(proc)); char line[256] = {0}; FILE *fp = fopen("processes_q5.txt","r"); // error checking if(fp==NULL){ perror("Error opening file.\n"); return; } for(int i=0; i<10; i++){ fgets(line,256,fp); for(int j=0;j<256;j++){process->name[j] = 0;} // zero char array char **tokenized = tokenize2(line,", "); strcpy(process->name, tokenized[0]); // assign integer values from file process->priority = atoi(tokenized[1]); process->pid = 0; process->runtime = atoi(tokenized[2]); // push process onto queue push(*process); // if queue contains no processes if(first==NULL){ first = last; } } fclose(fp); }
bool POSIDGenerator::open(const char *filename, Iconv *iconv) { std::ifstream ifs(filename); if (!ifs) { std::cerr << filename << " is not found. minimum setting is used" << std::endl; rewrite_.resize(1); rewrite_.back().set_pattern("*", "1"); return true; } std::string line; char *col[2]; while (std::getline(ifs, line)) { if (iconv) iconv->convert(&line); const size_t n = tokenize2(const_cast<char *>(line.c_str()), " \t", col, 2); CHECK_DIE(n == 2) << "format error: " << line; for (char *p = col[1]; *p; ++p) { CHECK_DIE(*p >= '0' && *p <= '9') << "not a number: " << col[1]; } rewrite_.resize(rewrite_.size() + 1); rewrite_.back().set_pattern(col[0], col[1]); } return true; }
bool POSIDGenerator::open(const char *filename, Iconv *iconv) { scoped_ptr<std::istream> p_ist; const jma::DictUnit* dict = jma::JMA_Dictionary::instance()->getDict(filename); if(dict) p_ist.reset(new std::istrstream(dict->text_, dict->length_)); else p_ist.reset(new std::ifstream(filename)); if (!*p_ist) { std::cerr << filename << " is not found. minimum setting is used" << std::endl; rewrite_.resize(1); rewrite_.back().set_pattern("*", "1"); return true; } std::string line; char *col[2]; while (std::getline(*p_ist, line)) { if (iconv) iconv->convert(&line); const size_t n = tokenize2(const_cast<char *>(line.c_str()), " \t", col, 2); CHECK_DIE(n == 2) << "format error: " << line; for (char *p = col[1]; *p; ++p) { CHECK_DIE(*p >= '0' && *p <= '9') << "not a number: " << col[1]; } rewrite_.resize(rewrite_.size() + 1); rewrite_.back().set_pattern(col[0], col[1]); } return true; }
void readFile(queue** p1, int priority_filter){ //if priority_filter = -1 -> load all processes //if priority_filter = 0 -> load processes with priority = 0 //if priority_filter = 1 -> load all processes with priority != 0 char buffer[CHAR_LENGTH] = {0}; FILE *f1 = fopen("processes_q5.txt","r"); if (f1 == NULL){ perror("Error opening file\n"); return; } for (int i = 0; i < FILE_LENGTH; i++){ fgets(buffer,CHAR_LENGTH, f1); proc *temp_proc = (proc *) malloc(sizeof(proc)); char **tokenized = tokenize2(buffer,", "); //priority is in tokenized[1] if ((atoi(tokenized[1]) == 0 && priority_filter == 0)|| //only priority == 0 (atoi(tokenized[1]) != 0 && priority_filter == 1)|| //only priority != 0 (priority_filter == -1)) //all { strcpy(temp_proc->name, tokenized[0]); //name temp_proc->priority = atoi(tokenized[1]);//priority temp_proc->pid = 0;//pid temp_proc->address = 0; //address temp_proc->memory = atoi(tokenized[2]); //memory temp_proc->runtime = atoi(tokenized[3]);//runtime //push process onto queue push(p1,*temp_proc); } } fclose(f1); }
/*! @decl array(array(string)|string) tokenize(string code) *! *! Tokenize a string of Pike tokens. *! *! @returns *! Returns an array with Pike-level tokens and the remainder (a *! partial token), if any. */ static void f_tokenize( INT32 args ) { struct array *res; struct pike_string *left_s = NULL; /* Make gcc happy. */ struct pike_string *data; int left; ONERROR tmp; get_all_args("tokenize", args, "%W", &data); if(!data->len) { pop_n_elems(args); push_empty_array(); push_empty_string(); f_aggregate(2); return; } res = allocate_array_no_init( 0, 128 ); SET_ONERROR(tmp, do_free_arrayptr, &res); switch(data->size_shift) { case 0: left = tokenize0(&res, STR0(data), data->len); left_s = make_shared_binary_string0(STR0(data)+left, data->len-left); break; case 1: left = tokenize1(&res, STR1(data), data->len); left_s = make_shared_binary_string1(STR1(data)+left, data->len-left); break; case 2: left = tokenize2(&res,STR2(data), data->len); left_s = make_shared_binary_string2(STR2(data)+left, data->len-left); break; #ifdef PIKE_DEBUG default: Pike_error("Unknown shift size %d.\n", data->size_shift); #endif } UNSET_ONERROR(tmp); pop_n_elems(args); if (!res->size) { free_array(res); push_empty_array(); } else push_array(res); push_string( left_s ); f_aggregate( 2 ); }
static bool parseLevel(const char *level_str, std::vector<int> *level) { char buf[BUF_SIZE]; char *col[512]; std::strncpy(buf, level_str, sizeof(buf)); level->clear(); size_t n = tokenize2(buf, "\t ", col, sizeof(col)); for (size_t i = 0; i < n; ++i) { level->push_back(std::atoi(col[i])); } return true; }
static bool parseLevel(const char *level_str, std::vector<int> *level) { scoped_fixed_array<char, BUF_SIZE> buf; scoped_fixed_array<char *, 512> col; std::strncpy(buf.get(), level_str, buf.size()); level->clear(); size_t n = tokenize2(buf.get(), "\t ", col.get(), col.size()); for (size_t i = 0; i < n; ++i) { level->push_back(std::atoi(col[i])); } return true; }
bool FeatureIndex::openTemplate(const Param ¶m) { if (param.get<bool>("identity-template")) { unigram_templs_.clear(); bigram_templs_.clear(); unigram_templs_.push_back("U:%u"); bigram_templs_.push_back("B:%r/%l"); return true; } std::string filename = create_filename(param.get<std::string>("dicdir"), FEATURE_FILE); std::ifstream ifs(filename.c_str()); CHECK_FALSE(ifs) << "no such file or directory: " << filename; char buf[BUF_SIZE]; char *column[4]; unigram_templs_.clear(); bigram_templs_.clear(); while (ifs.getline(buf, sizeof(buf))) { if (buf[0] == '\0' || buf[0] == '#' || buf[0] == ' ') continue; CHECK_FALSE(tokenize2(buf, "\t ", column, 2) == 2) << "format error: " <<filename; if (std::strcmp(column[0], "UNIGRAM") == 0) unigram_templs_.push_back(this->strdup(column[1])); else if (std::strcmp(column[0], "BIGRAM") == 0 ) bigram_templs_.push_back(this->strdup(column[1])); else CHECK_FALSE(false) << "format error: " << filename; } // second, open rewrite rules filename = create_filename(param.get<std::string>("dicdir"), REWRITE_FILE); rewrite_.open(filename.c_str()); return true; }
bool FeatureIndex::convert(const char* txtfile, const char *binfile) { std::ifstream ifs(txtfile); CHECK_DIE(ifs) << "no such file or directory: " << txtfile; char buf[BUF_SIZE]; char *column[4]; std::map<std::string, double> dic; while (ifs.getline(buf, sizeof(buf))) { CHECK_DIE(tokenize2(buf, "\t", column, 2) == 2) << "format error: " << buf; dic.insert(std::make_pair<std::string, double> (std::string(column[1]), atof(column[0]) )); } std::ofstream ofs(binfile, std::ios::out | std::ios::binary); CHECK_DIE(ofs) << "permission denied: " << binfile; std::vector<char *> key; unsigned int size = static_cast<unsigned int>(dic.size()); ofs.write(reinterpret_cast<const char*>(&size), sizeof(unsigned int)); for (std::map<std::string, double>::const_iterator it = dic.begin(); it != dic.end(); ++it) { key.push_back(const_cast<char*>(it->first.c_str())); ofs.write(reinterpret_cast<const char*>(&it->second), sizeof(double)); } Darts::DoubleArray da; CHECK_DIE(da.build(key.size(), &key[0], 0, 0, 0) == 0) << "unkown error in building double array: " << binfile; ofs.write(reinterpret_cast<const char*>(da.array()), da.unit_size() * da.size()); return true; }
std::vector<wxString> get_fields2(const std::string & line, const char delim){ std::string tmp = line; if (iscntrl(tmp.back())) tmp.pop_back(); // remove Microsoft end-of-line return tokenize2(tmp, delim); }
bool CharProperty::compile(const char *cfile, const char *ufile, const char *ofile) { scoped_fixed_array<char, BUF_SIZE> line; scoped_fixed_array<char *, 512> col; size_t id = 0; std::vector<Range> range; std::map<std::string, CharInfo> category; std::vector<std::string> category_ary; std::ifstream ifs(WPATH(cfile)); std::istringstream iss(CHAR_PROPERTY_DEF_DEFAULT); std::istream *is = &ifs; if (!ifs) { std::cerr << cfile << " is not found. minimum setting is used" << std::endl; is = &iss; } while (is->getline(line.get(), line.size())) { if (std::strlen(line.get()) == 0 || line[0] == '#') { continue; } const size_t size = tokenize2(line.get(), "\t ", col.get(), col.size()); CHECK_DIE(size >= 2) << "format error: " << line.get(); // 0xFFFF..0xFFFF hoge hoge hgoe # if (std::strncmp(col[0], "0x", 2) == 0) { std::string low = col[0]; std::string high; size_t pos = low.find(".."); if (pos != std::string::npos) { high = low.substr(pos + 2, low.size() - pos - 2); low = low.substr(0, pos); } else { high = low; } Range r; r.low = atohex(low.c_str()); r.high = atohex(high.c_str()); CHECK_DIE(r.low >= 0 && r.low < 0xffff && r.high >= 0 && r.high < 0xffff && r.low <= r.high) << "range error: low=" << r.low << " high=" << r.high; for (size_t i = 1; i < size; ++i) { if (col[i][0] == '#') { break; // skip comments } CHECK_DIE(category.find(std::string(col[i])) != category.end()) << "category [" << col[i] << "] is undefined"; r.c.push_back(col[i]); } range.push_back(r); } else { CHECK_DIE(size >= 4) << "format error: " << line.get(); std::string key = col[0]; CHECK_DIE(category.find(key) == category.end()) << "category " << key << " is already defined"; CharInfo c; std::memset(&c, 0, sizeof(c)); c.invoke = std::atoi(col[1]); c.group = std::atoi(col[2]); c.length = std::atoi(col[3]); c.default_type = id++; category.insert(std::pair<std::string, CharInfo>(key, c)); category_ary.push_back(key); } } CHECK_DIE(category.size() < 18) << "too many categories(>= 18)"; CHECK_DIE(category.find("DEFAULT") != category.end()) << "category [DEFAULT] is undefined"; CHECK_DIE(category.find("SPACE") != category.end()) << "category [SPACE] is undefined"; std::istringstream iss2(UNK_DEF_DEFAULT); std::ifstream ifs2(WPATH(ufile)); std::istream *is2 = &ifs2; if (!ifs2) { std::cerr << ufile << " is not found. minimum setting is used." << std::endl; is2 = &iss2; } std::set<std::string> unk; while (is2->getline(line.get(), line.size())) { const size_t n = tokenizeCSV(line.get(), col.get(), 2); CHECK_DIE(n >= 1) << "format error: " << line.get(); const std::string key = col[0]; CHECK_DIE(category.find(key) != category.end()) << "category [" << key << "] is undefined in " << cfile; unk.insert(key); } for (std::map<std::string, CharInfo>::const_iterator it = category.begin(); it != category.end(); ++it) { CHECK_DIE(unk.find(it->first) != unk.end()) << "category [" << it->first << "] is undefined in " << ufile; } std::vector<CharInfo> table(0xffff); { std::vector<std::string> tmp; tmp.push_back("DEFAULT"); const CharInfo c = encode(tmp, &category); std::fill(table.begin(), table.end(), c); } for (std::vector<Range>::const_iterator it = range.begin(); it != range.end(); ++it) { const CharInfo c = encode(it->c, &category); for (int i = it->low; i <= it->high; ++i) { table[i] = c; } } // output binary table { std::ofstream ofs(WPATH(ofile), std::ios::binary|std::ios::out); CHECK_DIE(ofs) << "permission denied: " << ofile; unsigned int size = static_cast<unsigned int>(category.size()); ofs.write(reinterpret_cast<const char*>(&size), sizeof(size)); for (std::vector<std::string>::const_iterator it = category_ary.begin(); it != category_ary.end(); ++it) { char buf[32]; std::fill(buf, buf + sizeof(buf), '\0'); std::strncpy(buf, it->c_str(), sizeof(buf) - 1); ofs.write(reinterpret_cast<const char*>(buf), sizeof(buf)); } ofs.write(reinterpret_cast<const char*>(&table[0]), sizeof(CharInfo) * table.size()); ofs.close(); } return true; }