static void ora_avemariam(const size_t numerus, const size_t dies) { size_t n; const char **rosario_hodiernus = rosario[dies]; for(n = 0; n < 4; n++) dic(avemaria[n]); dic(rosario_hodiernus[numerus]); for(n = 4; n < 8; n++) dic(avemaria[n]); }
shared_ptr<Dic> IndexPersister::readIndexFromFile(string termFile, string postingsFile) { cout << "Reading index from disk.." << endl; ifstream termStream(termFile, ios::binary); ifstream postingsStream(postingsFile,ios::binary); shared_ptr<Dic> dic(new Dic()); char charCount; while(termStream.read(&charCount, sizeof(charCount))) { char term_cstr[255]; termStream.read(term_cstr, charCount); term_cstr[charCount] = 0; string term(term_cstr); int docNum = 0; termStream.read(reinterpret_cast<char *>(&docNum), sizeof(docNum)); shared_ptr<List> list(new List(term)); for(int i = 0; i < docNum; i++) { int docId; postingsStream.read(reinterpret_cast<char *>(&docId), sizeof(docId)); list->addPosting(docId); } dic->addList(term, *list); } dic->setSorted(true); return dic; }
int main(int argc, char *argv[]) { (void)argc; (void)argv; std::ofstream dic ("hunspell-test.dic"); dic << "2\nHello\nWorld"; dic.close(); std::ofstream aff ("hunspell-test.aff"); aff << "SET UTF-8\nTRY loredWH\nMAXDIFF 1"; aff.close(); Hunspell h("hunspell-test.aff", "hunspell-test.dic"); if (h.spell("Hello") == 0) { std::cerr << "Error: hunspell marked correct word as wrong" << std::endl; } if (h.spell("wrld") != 0) { std::cerr << "Error: hunspell marked wrong word as correct" << std::endl; } char ** result; int n = h.suggest(&result, "ell"); for (int i = 0; i < n; i++) std::cout << result[i]; return 0; }
BOOL DirectInputInitialize() { HRESULT hr; UINT error_mode; dic_proc dic; if (hDLL != NULL) return TRUE; hDLL = NULL; /* Turn off error dialog for this call */ error_mode = SetErrorMode(0); hDLL = LoadLibrary(TEXT("dinput.dll")); SetErrorMode(error_mode); if (hDLL == NULL) return FALSE; #ifdef UNICODE dic = (dic_proc)GetProcAddress((HINSTANCE)hDLL, "DirectInputCreateW"); #else dic = (dic_proc)GetProcAddress((HINSTANCE)hDLL, "DirectInputCreateA"); #endif if (dic == NULL) return FALSE; hr = dic(GetModuleHandle(NULL), DIRECTINPUT_VERSION, &di, NULL); if (FAILED(hr)) { hr = dic(GetModuleHandle(NULL), 0x0300, &di, NULL); if (FAILED(hr)) { ErrorMsg("DirectInputCreate failed! error=%x\n", (unsigned int)hr); di = NULL; return FALSE; } } return TRUE; }
bool DirectInputInitialize(void) { HRESULT hr; dic_proc dic; /* Turn off error dialog for this call */ UINT error_mode = SetErrorMode(0); HANDLE hDLL = LoadLibrary(TEXT("dinput.dll")); SetErrorMode(error_mode); if (hDLL == NULL) return false; dic = (dic_proc)GetProcAddress((HINSTANCE)hDLL, "DirectInputCreateW"); if (dic == NULL) return false; hr = dic(GetModuleHandle(NULL), 0x0700, &dinp, NULL); if (FAILED(hr)) { hr = dic(GetModuleHandle(NULL), 0x0500, &dinp, NULL); if (FAILED(hr)) { hr = dic(GetModuleHandle(NULL), 0x0300, &dinp, NULL); if (FAILED(hr)) { ErrorMessageBox("DirectInputCreate failed! error=%x\n", (unsigned int)hr); dinp = NULL; return false; } } } return true; }
int romanToInt(std::string s) { std::unordered_map<char, int> dic({{'I', 1}, {'V', 5}, {'X', 10}, {'L', 50}, {'C', 100}, {'D', 500}, {'M', 1000}}); int ans = 0; int prev_c2n = 10000; int curr_c2n = 0; for(char c : s) { curr_c2n = dic[c]; ans += curr_c2n; if(prev_c2n<curr_c2n) { ans -= prev_c2n<<1; } prev_c2n = curr_c2n; } return ans; }
int maxProduct(vector<string>& words) { int n = words.size(); sort(words.begin(),words.end(),compare); vector<int> dic(n); for (int i=0;i<n;i++){ int bit = 0; for (auto word:words[i]){ bit = bit | 1<<(word-'a'); } dic[i]=bit; } int m = 0; for (int i=0;i<n-1;i++){ if (words[i].length() * words[i].length() < m) continue; for (int j=i+1;j<n;j++){ if (dic[i] & dic[j]) continue; m = max(m,int(words[i].size() * words[j].size())); break; } } return m; }
static void ora(const char **comprecatio, const size_t numerus) { size_t n; for(n = 0; n < numerus; n++) dic(comprecatio[n]); }
/** * Create an FST based on an RNN */ void FlatBOFstBuilder::convertRNN(CRnnLM & rnnlm, VectorFst<LogArc> &fst) { queue<NeuronFstHistory> q; VectorFst<LogArc> new_fst; NeuronFstHistory fsth(rnnlm.getHiddenLayerSize(),getNumBins()); FstIndex id = 0; NeuronFstHistory new_fsth(rnnlm.getHiddenLayerSize(),getNumBins()); FstIndex new_id; NeuronFstHistory min_backoff(rnnlm.getHiddenLayerSize(),getNumBins()); set<NeuronFstHistory>set_min_backoff; NeuronFstHistory bo_fsth(rnnlm.getHiddenLayerSize(),getNumBins()); bool backoff = false; vector<FstIndex> deleted; real p = 0.00; real p_joint = 0.00; real entropy = 0.0; real delta = 0.0; vector<real> all_prob(rnnlm.getVocabSize()); vector<real> posterior(10); map< FstIndex,set<FstIndex> > pred; vector<bool> non_bo_pred(rnnlm.getVocabSize()); vector<int> to_be_added; vector<int> to_be_removed; for (int i = 0; i < rnnlm.getVocabSize(); i++) { to_be_removed.push_back(i); } vector<real> to_be_added_prob; FstIndex n_added = 0; FstIndex n_processed = 0; FstIndex next_n_added = 0; FstIndex next_n_processed = 0; FstIndex n_backoff = 0; FstIndex n_only_backoff = 0; int v = rnnlm.getVocabSize(); int w = 0; // Initialize rnnlm.copyHiddenLayerToInput(); // printNeurons(rnnlm.getInputLayer(),0,10); // Initial state ( 0 | hidden layer after </s>) printNeurons(rnnlm.getHiddenLayer(),0,10); fsth.setFstHistory(rnnlm, *dzer); fsth.setLastWord(0); q.push(fsth); addFstState(id, new NeuronFstHistory(fsth), fst); fst.SetStart(INIT_STATE); // Final state (don't care about the associated discrete representation) fst.AddState(); fst.SetFinal(FINAL_STATE, LogWeight::One()); /*posterior.at(INIT_STATE) = MY_LOG_ONE;*/ min_backoff.setLastWord(-1); computeEntropyAndConditionals(entropy, all_prob, rnnlm, min_backoff); min_backoff = getBackoff(rnnlm, min_backoff, set_min_backoff, all_prob, to_be_removed); cout << "MIN BACKOFF " << min_backoff.toString() << endl; set_min_backoff.insert(min_backoff); // addFstState(id, min_backoff, fst); // q.push(min_backoff); // Estimate number of backoff loop to bound the backoff path length // float ratioa = 0.0; // float ratiob = 0.0; float ratio = 0.0; // for (int i=0; i < min_backoff.getNumDims(); i++) { // if (min_backoff.getDim(i) == 1) { // ratioa++; // } // if (fsth.getDim(i) == 1) { // ratiob++; // } // } // ratioa /= min_backoff.getNumDims(); // ratiob /= min_backoff.getNumDims(); // ratio = (ratioa*(1.0-ratiob))+(ratiob*(1.0-ratioa)); ratio=1.0; // printf("ratio=%f\t%i BO loops\n", ratio, n_bo_loops); //foreach state in the queue while (!q.empty()) { fsth = q.front(); q.pop(); id = h2state[&fsth]; state2h.push_back(new NeuronFstHistory(fsth)); if (id == FINAL_STATE) { continue; } dprintf(1,"-- STUDY STATE %li = %s\n", id, fsth.toString().c_str()); /* try { posterior.at(id) = MY_LOG_ONE; } catch (exception e) { posterior.resize((int) (posterior.size()*1.5)+1); posterior.at(id) = MY_LOG_ONE; }*/ computeEntropyAndConditionals(entropy, all_prob, rnnlm, fsth); //compute BO in advance and check if it is a min BO node bo_fsth = getBackoff(rnnlm, fsth, set_min_backoff, all_prob, to_be_removed); if (bo_fsth == fsth) { bo_fsth = min_backoff; } //foreach w (ie, foreach word of each class c) //test if the edge has to kept or removed backoff = false; //no backoff yet since no edge has been removed for (w=0; w < rnnlm.getVocabSize(); w++) { p = all_prob[w]; /*p_joint = exp(-posterior[id]-p);*/ p_joint = exp(-p); delta = -1.0*p_joint*log2(p_joint); //accept edge if this leads to a minimum //relative gain of the entropy dprintf(2,"P = %e \tP_joint = %e \tH = %e \tDelta =%e \tDelta H = %.6f %%\n",exp(-p), p_joint, entropy, delta, 100.0*delta/entropy); if (set_min_backoff.find(fsth) != set_min_backoff.end() || (delta > pruning_threshold*entropy)) { // if ((fsth == min_backoff) || (delta > pruning_threshold*entropy)) { next_n_added++; to_be_added.push_back(w); to_be_added_prob.push_back(p); dprintf(2,"\tACCEPT [%li] -- %i (%s) / %f --> ...\t(%e > %e)\n", id, w, rnnlm.getWordString(w), p, delta, pruning_threshold*entropy); // to_be_removed.push_back(w); } //backoff else { // to_be_removed.push_back(w); backoff = true; dprintf(2,"\tPRUNE [%li] -- %i / %f --> ...\n", id, w, p); } //print if (next_n_processed % 100000 == 0) { fprintf(stderr, "\rH=%.5f / N proc'd=%li / N added=%li (%.5f %%) / N bo=%li (%.5f %%) / %li/%li Nodes (%2.1f %%) / N min BO=%i", entropy, n_processed, n_added, ((float) n_added/ (float)n_processed)*100.0, n_backoff, ((float) n_backoff/ (float)n_added)*100.0, id, id+q.size(), 100.0 - (float) (100.0*id/(id+q.size())), (int) set_min_backoff.size()); } next_n_processed++; // } } //Set a part of the new FST history new_fsth.setFstHistory(rnnlm, *dzer); //if at least one word is backing off if (backoff) { n_backoff++; if (to_be_added.size() == 0) { n_only_backoff++; } if (addFstState(new_id, new NeuronFstHistory(bo_fsth), fst)) { q.push(bo_fsth); try { non_bo_pred.at(new_id) = false; } catch (exception e) { non_bo_pred.resize(new_id+(int) (non_bo_pred.size()*0.5)+1); non_bo_pred.at(new_id) = false; } } dprintf(1,"BACKOFF\t[%li]\t(%s)\n-------\t[%li]\t(%s)\n", id, fsth.toString().c_str(), new_id, bo_fsth.toString().c_str()); fst.AddArc(id, LogArc(EPSILON, EPSILON, LogWeight::Zero(), new_id)); addPred(pred, new_id, id); } vector<real>::iterator it_p = to_be_added_prob.begin(); for (vector<int>::iterator it = to_be_added.begin(); it != to_be_added.end(); ++it) { w = *it; p = *it_p; if (w == 0) { fst.AddArc(id, LogArc(FstWord(w),FstWord(w),p,FINAL_STATE)); dprintf(1,"EDGE [%li] (%s)\n---- %i (%s) / %f -->\n---- [%li] FINAL STATE)\n\n", id, fsth.toString().c_str(), FstWord(w), rnnlm.getWordString(w), p, FINAL_STATE); } //accept edge else { new_fsth.setLastWord(w); //if sw not in the memory //then add a new state for sw in the FST and push sw in the queue if (addFstState(new_id, new NeuronFstHistory(new_fsth), fst)) { q.push(new_fsth); try { non_bo_pred.at(new_id) = true; } catch (exception e) { non_bo_pred.resize(new_id+(int) (non_bo_pred.size()*0.5)+1); non_bo_pred.at(new_id) = true; } } else { /* already exists */ } //add the edge in the FST non_bo_pred.at(new_id) = true; fst.AddArc(id, LogArc(FstWord(w),FstWord(w),p,new_id)); dprintf(1,"EDGE [%li] (%s)\n---- %i (%s) / %f -->\n---- [%li] (%s)\n\n", id, fsth.toString().c_str(), FstWord(w), rnnlm.getWordString(w), p, new_id, new_fsth.toString().c_str()); // posterior.at(new_id) += posterior[id]*p; } /*if (posterior[id]+p < LogWeight::Zero().Value()) { p_joint = exp(-posterior[id]-p); entropy -= p_joint*log2(p_joint); }*/ ++it_p; } n_added = next_n_added; n_processed = next_n_processed; //reset queues to_be_added.clear(); to_be_added_prob.clear(); // to_be_removed.clear(); } cout << endl; //compute backoff weights deleted = compactBackoffNodes(fst, pred, non_bo_pred); computeAllBackoff(fst, pred); //remove useless nodes removeStates(fst, new_fst, deleted); fst.DeleteStates(); fst = new_fst; //Fill the table of symbols SymbolTable dic("dictionnary"); dic.AddSymbol("*", 0); for (int i=0; i<rnnlm.getVocabSize(); i++) { dic.AddSymbol(string(rnnlm.getWordString(i)), i+1); } fst.SetInputSymbols(&dic); fst.SetOutputSymbols(&dic); //printf("H=%.5f / N proc'd=%li / N added=%li (%.5f %%) %li/%li Nodes (%2.1f %%)\n", entropy, n_processed, n_added, ((float) n_added/ (float)n_processed)*100.0, id, id+q.size(), 100.0 - (float) (100.0*id/(id+q.size()))); cout << "END" << endl; }
/*-------------------------------------------------------------------------*/ void AzPrepText2::gen_regions_parsup(int argc, const char *argv[]) const { const char *eyec = "AzPrepText2::gen_regions_parsup"; AzPrepText2_gen_regions_parsup_Param p(argc, argv, out); check_batch_id(p.s_batch_id); AzMats_file<AzSmat> mfile; int feat_data_num = mfile.reset_for_read(p.s_feat_fn.c_str()); AzStrPool sp_typ(10,10); sp_typ.put(kw_bow, kw_seq); AzXi::check_input(p.s_xtyp.c_str(), &sp_typ, eyec, kw_xtyp); bool do_xseq = p.s_xtyp.equals(kw_seq); bool do_skip_stopunk = (do_xseq) ? false : true; AzDic dic(p.s_xdic_fn.c_str()); AzX::throw_if((dic.size() <= 0), AzInputError, eyec, "No vocabulary"); /*--- scan files to determine buffer size and #data ---*/ AzOut noout; AzStrPool sp_list; AzIntArr ia_data_num; int buff_size = AzTools_text::scan_files_in_list(p.s_inp_fn.c_str(), p.s_txt_ext.c_str(), noout, &sp_list, &ia_data_num); int data_num = ia_data_num.sum(); AzX::throw_if ((data_num != feat_data_num), eyec, "#data mismatch"); /*--- read data and generate features ---*/ AzDataArr<AzSmat> am_x(data_num), am_y(data_num); buff_size += 256; AzBytArr s_buff; AzByte *buff = s_buff.reset(buff_size, 0); int no_data = 0, data_no = 0, cnum = 0, cnum_before_reduce = 0; feat_info fi[2]; for (int fx = 0; fx < sp_list.size(); ++fx) { /* for each file */ AzBytArr s_fn(sp_list.c_str(fx), p.s_txt_ext.c_str()); const char *fn = s_fn.c_str(); AzTimeLog::print(fn, log_out); AzFile file(fn); file.open("rb"); int num_in_file = ia_data_num.get(fx); int inc = num_in_file / 50, milestone = inc; int dx = 0; for ( ; ; ++dx) { /* for each doc */ AzTools::check_milestone(milestone, dx, inc); int len = file.gets(buff, buff_size); if (len <= 0) break; /*--- X ---*/ AzBytArr s_data(buff, len); int my_len = s_data.length(); AzIntArr ia_tokno; int nn = 1; AzTools_text::tokenize(s_data.point_u(), my_len, &dic, nn, p.do_lower, p.do_utf8dashes, &ia_tokno); AzIntArr ia_pos; bool do_allow_zero = false; if (do_xseq) gen_X_seq(ia_tokno, dic.size(), p.pch_sz, p.pch_step, p.padding, do_allow_zero, do_skip_stopunk, am_x.point_u(data_no), &ia_pos); else gen_X_bow(ia_tokno, dic.size(), p.pch_sz, p.pch_step, p.padding, do_skip_stopunk, am_x.point_u(data_no), &ia_pos); AzSmat m_feat; mfile.read(&m_feat); if (am_x.point(data_no)->colNum() <= 0) { ++no_data; continue; } if (p.top_num_each > 0 || p.top_num_total > 0 || p.scale_y > 0) { double min_ifeat = m_feat.min(); AzX::no_support((min_ifeat < 0), eyec, "Negative values for internal-feature components."); } /*--- Y (ifeat: internal features generated by a supervised model) ---*/ gen_Y_ifeat(p.top_num_each, p.top_num_total, &m_feat, &ia_tokno, &ia_pos, p.pch_sz, -p.dist, p.dist, p.do_nolr, p.f_pch_sz, p.f_pch_step, p.f_padding, am_y.point_u(data_no), fi); if (p.min_yval > 0) { am_y.point_u(data_no)->cut(p.min_yval); } cnum_before_reduce += am_x.point(data_no)->colNum(); reduce_xy(p.min_x, p.min_y, am_x.point_u(data_no), am_y.point_u(data_no)); if (am_x.point(data_no)->colNum() <= 0) { ++no_data; continue; } cnum += am_x.point(data_no)->colNum(); ++data_no; } /* for each doc */ AzTools::finish_milestone(milestone); AzBytArr s(" #data="); s << data_no << " no_data=" << no_data << " #col=" << cnum; AzPrint::writeln(out, s); } /* for each file */ mfile.done(); AzBytArr s("#data="); s << data_no << " no_data=" << no_data << " #col=" << cnum << " #col_all=" << cnum_before_reduce; AzPrint::writeln(out, s); s.reset("all:"); fi[0].show(s); AzPrint::writeln(out, s); s.reset("top:"); fi[1].show(s); AzPrint::writeln(out, s); if (p.do_binarize) { AzTimeLog::print("Binarizing Y ... ", log_out); for (int dx = 0; dx < data_no; ++dx) am_y(dx)->binarize(); /* (x>0) ? 1 : (x<0) ? -1 : 0 */ } else if (p.scale_y > 0) { double max_top = fi[1].max_val; double scale = 1; if (max_top < p.scale_y) for ( ; ; scale *= 2) if (max_top*scale >= p.scale_y) break; if (max_top > p.scale_y*2) for ( ; ; scale /= 2) if (max_top*scale <= p.scale_y*2) break; s.reset("Multiplying Y with "); s << scale; AzPrint::writeln(out, s); for (int dx = 0; dx < data_no; ++dx) am_y(dx)->multiply(scale); } const char *outnm = p.s_rnm.c_str(); AzTimeLog::print("Generating X ... ", out); write_XY(am_x, data_no, p.s_batch_id, outnm, p.s_x_ext.c_str(), &dic, xtext_ext); AzTimeLog::print("Generating Y ... ", out); write_XY(am_y, data_no, p.s_batch_id, outnm, p.s_y_ext.c_str()); }
int main() { //string url("http://www.amazon.com/"); std::string a("http://www.amazon.com/Turtle-Beach-Call-Duty-Playstation-3/"); std::string b("dp/B005EEMYCO/ref=sr_1_7?ie=UTF8&qid=1329798456&sr=8-7"); //std::string a("http://www.amazon.com/dp/B002GYWHSQ/?tag=googhydr-"); //std::string b("20&hvadid=16413069595&ref=pd_sl_1gtlqzgaj7_b"); std::string c = a.append(b); //std::string c("http://eng.utah.edu/~ccurtis/page1.html"); std::string url(c); std::string filename ("words.txt"); Dictionary dic(filename); RateLimiter limiter(8); /* DEBUG */ printf("Calling Map!\n"); Page *page = Map(url, limiter, dic); //Page page = Map(url, dic); //printf("Page URL: %s\n", page.Url().c_str()); printf("<<Map has Finished Running>>\n"); std::vector<std::string> links = page->GetLinks(); //std::vector<string>::iterator it; //cout << "Links: \n \n" << endl; //for (it = links.begin(); it < links.end(); ++it) //cout << "link ~ " << *it << "\n\n" << endl; /* Set up Page's Histogram */ Word_Count *histogram = page->GetHistogram(); // Print the Histogram printf("Page has %i total English words!\n", histogram->NumberOfWords()); std::vector<std::string> words = histogram->GetWords(); printf("There are %i misspellings!\n", page->GetMisspellings()->NumberOfWords()); std::vector<AmazonCustomer> customers = page->GetReviewers(); std::vector<AmazonCustomer>::iterator it; std::vector<std::pair <double, double> >::iterator it2; printf("\nPage has %i images\n\n", page->GetImageCount()); printf("\nReviewers: \n\n"); double x; std::vector <std::pair <double, double> > pairs; for (it = customers.begin(); it < customers.end(); ++it) { x = ((double)(*it).length_of_reviews/(*it).number_of_reviews); printf(" ~ %s\n", (*it).name.c_str()); printf("Total Words = %i\n", (*it).length_of_reviews); printf("Has %i total reviews.\n", (*it).number_of_reviews); printf("With an average review length of %f words.\n", x); printf("And %i misspellings.\n\n", (*it).misspellings_count); printf("Max Review Length of %i\n", (*it).max_review_length); pairs = (*it).stars_cost; int j = 0; for (it2 = pairs.begin(); it2 < pairs.end(); ++it2) { printf("(Stars, Cost) = (%f, %f)\n", pairs.at(j).first, pairs.at(j).second); j += 1; } } return 0; }