void BytesTrieTest::TestHasUniqueValue() { LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); if(trie.isNull()) { return; // buildTrie() reported an error } int32_t uniqueValue; if(trie->hasUniqueValue(uniqueValue)) { errln("unique value at root"); } trie->next('j'); trie->next('a'); trie->next('n'); // hasUniqueValue() directly after next() if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { errln("not unique value 1 after \"jan\""); } trie->first('j'); trie->next('u'); if(trie->hasUniqueValue(uniqueValue)) { errln("unique value after \"ju\""); } if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { errln("not normal value 6 after \"jun\""); } // hasUniqueValue() after getValue() if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { errln("not unique value 6 after \"jun\""); } // hasUniqueValue() from within a linear-match node trie->first('a'); trie->next('u'); if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { errln("not unique value 8 after \"au\""); } }
void BytesTrieTest::TestIteratorFromLinearMatch() { LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); if(trie.isNull()) { return; // buildTrie() reported an error } // Go into a linear-match node. trie->next('j'); trie->next('a'); trie->next('n'); trie->next('u'); trie->next('a'); IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); BytesTrie::Iterator iter(*trie, 0, errorCode); if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { return; } // Expected data: Same as in buildMonthsTrie(), except only the suffixes // following "janua". static const StringAndValue data[]={ { "r", 1 }, { "ry", 1 } }; checkIterator(iter, data, UPRV_LENGTHOF(data)); // Reset, and we should get the same result. logln("after iter.reset()"); checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); }
void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() { static const StringAndValue data[]={ { "abcdef", 10 }, { "abcdepq", 200 }, { "abcdeyz", 3000 } }; LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); if(trie.isNull()) { return; // buildTrie() reported an error } // Go into a linear-match node. trie->next('a'); trie->next('b'); trie->next('c'); IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); // Truncate after the linear-match node. BytesTrie::Iterator iter(*trie, 3, errorCode); if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { return; } static const StringAndValue expected[]={ { "def", 10 }, { "dep", -1 }, { "dey", -1 } }; checkIterator(iter, expected, UPRV_LENGTHOF(expected)); // Reset, and we should get the same result. logln("after iter.reset()"); checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); }
int main(int argc, char **argv) { clock_t start=clock(); FILE *fpStrpool = fopen(argv[1],"r"); FILE *fpCheckedstr = fopen(argv[2],"r"); //FILE *fpResult = fopen(strcat(argv[3],"_bloom.dat"),"w"); FILE *fpResult2 = fopen(strcat(argv[3],"_trie.dat"),"w"); if(argc != 4) { printf("USAGE: strfind strpool strcheck result!\n"); return 0; } /* FILE *fpStrpool = fopen("strpool.dat","r"); FILE *fpCheckedstr = fopen("checkedemail.dat","r"); FILE *fpResult2 = fopen("result_trie.dat","w"); FILE *fpResult = fopen("result_bloom.dat","w");*/ if(fpStrpool == NULL || fpCheckedstr == NULL){ printf("Input file not found!\n"); return 0; } /* bloom(fpStrpool,fpCheckedstr,fpResult); rewind(fpStrpool); rewind(fpCheckedstr); */ trie(fpStrpool,fpCheckedstr,fpResult2); fclose(fpStrpool); fclose(fpCheckedstr); fclose(fpResult2); printf("%f\n",(double)(clock()-start)/CLOCKS_PER_SEC); return 0; }
int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) { BytesTrie trie(bytesTries+bytesTrieOffset); if(containsName(trie, alias)) { return trie.getValue(); } else { return UCHAR_INVALID_CODE; } }
static int32_t bytesTrieLookup(const char *s, const char *nameTrieBytes) { BytesTrie trie(nameTrieBytes); if(USTRINGTRIE_HAS_VALUE(trie.next(s, -1))) { return trie.getValue(); } else { return -1; } }
void insert( const T &collection ) { trie *node = this; for( auto &c : collection ) { auto found = node->children.find( c ); if( found == node->children.end() ) node->children[c] = trie( node->value + c ); node = &(node->children[c]); } node->flag = true; }
void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption)); if(trie.isNull()) { return; // buildTrie() reported an error } checkFirst(*trie, data, dataLength); checkNext(*trie, data, dataLength); checkNextWithState(*trie, data, dataLength); checkNextString(*trie, data, dataLength); checkIterator(*trie, data, dataLength); }
/// \brief Inserts a word (given by iterators \p begin and \p end) /// \returns true if the element was inserted, false if already there template <typename Iterator> bool insert(Iterator && begin, Iterator && end) { if (begin == end) return false; size_t i = *begin++; if (i >= branches.size()) branches.resize(i + 1); auto & b = branches[i]; if (b) return b->insert(begin, end); b = trie(); b->insert(begin, end); return true; }
V& insert( const K &collection ) { trie *node = this; for( auto &c : collection ) { auto found = node->children.find( c ); if( found == node->children.end() ) { auto copy = node->branch; std::back_inserter( copy ) = c; node->children[c] = trie( copy ); } node = &(node->children[c]); } node->flag = true; return node->leaf; }
int main() { trie_t trie("actrie.bin"); // fold through the key-matching nodes const char *ret = 0; trie.fold_full("01234567", ret, fun); std::cout << "lookup result: " << (ret ? ret : "not found") << std::endl; // traverse all the nodes trie.foreach<ct::up, std::string>(enumerate); trie.foreach<ct::down, std::string>(enumerate); return 0; }
void BytesTrieTest::TestGetNextBytes() { LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); if(trie.isNull()) { return; // buildTrie() reported an error } char buffer[40]; CheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer)); int32_t count=trie->getNextBytes(sink); if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') { errln("months getNextBytes()!=[aj] at root"); } trie->next('j'); trie->next('a'); trie->next('n'); // getNextBytes() directly after next() count=trie->getNextBytes(sink.Reset()); buffer[count]=0; if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) { errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\""); } // getNextBytes() after getValue() trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. memset(buffer, 0, sizeof(buffer)); count=trie->getNextBytes(sink.Reset()); if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) { errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); } // getNextBytes() from a linear-match node trie->next('u'); memset(buffer, 0, sizeof(buffer)); count=trie->getNextBytes(sink.Reset()); if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') { errln("months getNextBytes()!=[a] after \"janu\""); } trie->next('a'); memset(buffer, 0, sizeof(buffer)); count=trie->getNextBytes(sink.Reset()); if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') { errln("months getNextBytes()!=[r] after \"janua\""); } trie->next('r'); trie->next('y'); // getNextBytes() after a final match count=trie->getNextBytes(sink.Reset()); if(count!=0 || sink.NumberOfBytesAppended()!=0) { errln("months getNextBytes()!=[] after \"january\""); } }
void BytesTrieTest::TestIteratorFromBranch() { LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); if(trie.isNull()) { return; // buildTrie() reported an error } // Go to a branch node. trie->next('j'); trie->next('a'); trie->next('n'); IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); BytesTrie::Iterator iter(*trie, 0, errorCode); if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { return; } // Expected data: Same as in buildMonthsTrie(), except only the suffixes // following "jan". static const StringAndValue data[]={ { "", 1 }, { ".", 1 }, { "a", 1 }, { "bb", 1 }, { "c", 1 }, { "ddd", 1 }, { "ee", 1 }, { "ef", 1 }, { "f", 1 }, { "gg", 1 }, { "h", 1 }, { "iiii", 1 }, { "j", 1 }, { "kk", 1 }, { "kl", 1 }, { "kmm", 1 }, { "l", 1 }, { "m", 1 }, { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, { "o", 1 }, { "pp", 1 }, { "qqq", 1 }, { "r", 1 }, { "uar", 1 }, { "uary", 1 } }; checkIterator(iter, data, UPRV_LENGTHOF(data)); // Reset, and we should get the same result. logln("after iter.reset()"); checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); }
BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { IcuTestErrorCode errorCode(*this, "buildTrie()"); // Add the items to the trie builder in an interesting (not trivial, not random) order. int32_t index, step; if(dataLength&1) { // Odd number of items. index=dataLength/2; step=2; } else if((dataLength%3)!=0) { // Not a multiple of 3. index=dataLength/5; step=3; } else { index=dataLength-1; step=-1; } builder_->clear(); for(int32_t i=0; i<dataLength; ++i) { builder_->add(data[index].s, data[index].value, errorCode); index=(index+step)%dataLength; } StringPiece sp=builder_->buildStringPiece(buildOption, errorCode); LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode)); if(!errorCode.logIfFailureAndReset("add()/build()")) { builder_->add("zzz", 999, errorCode); if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); } } logln("serialized trie size: %ld bytes\n", (long)sp.length()); StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode); if(sp.data()==sp2.data()) { errln("builder.buildStringPiece() before & after build() returned same array"); } if(errorCode.isFailure()) { return NULL; } // Tries from either build() method should be identical but // BytesTrie does not implement equals(). // We just return either one. if((dataLength&1)!=0) { return trie.orphan(); } else { return new BytesTrie(sp2.data()); } }
int main(int argc, char *argv[]) { std::vector<std::string> args(argv, argv + argc); std::cin.sync_with_stdio(false); std::cout.sync_with_stdio(false); trie_t trie(args[1].c_str()); tdc::trie_tokenizer<> tokenizer(trie); if (args.size() > 2 && args[2] != "-") { std::ifstream in(args[2].c_str(), std::ios::binary); tokenizer.tokenize(in, apertium_printer(std::cout)); } else { tokenizer.tokenize(std::cin, apertium_printer(std::cout)); } }
void BytesTrieTest::TestTruncatingIteratorFromRoot() { LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); if(trie.isNull()) { return; // buildTrie() reported an error } IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); BytesTrie::Iterator iter(*trie, 4, errorCode); if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { return; } // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters // of each string, and no string duplicates from the truncation. static const StringAndValue data[]={ { "augu", -1 }, { "jan", 1 }, { "jan.", 1 }, { "jana", 1 }, { "janb", -1 }, { "janc", 1 }, { "jand", -1 }, { "jane", -1 }, { "janf", 1 }, { "jang", -1 }, { "janh", 1 }, { "jani", -1 }, { "janj", 1 }, { "jank", -1 }, { "janl", 1 }, { "janm", 1 }, { "jann", -1 }, { "jano", 1 }, { "janp", -1 }, { "janq", -1 }, { "janr", 1 }, { "janu", -1 }, { "july", 7 }, { "jun", 6 }, { "jun.", 6 }, { "june", 6 } }; checkIterator(iter, data, UPRV_LENGTHOF(data)); // Reset, and we should get the same result. logln("after iter.reset()"); checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); }
void UCharsTrieTest::TestNextForCodePoint() { static const StringAndValue data[]={ { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 }, { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 }, { "\\u4dff\\U000103ff", 99999 } }; LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); if(trie.isNull()) { return; // buildTrie() reported an error } UStringTrieResult result; if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || trie->getValue()!=2000000000 ) { errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s); } if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || trie->getValue()!=44444 ) { errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s); } if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current() // no match for trail surrogate ) { errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222"); } if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || trie->getValue()!=99999 ) { errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s); } }
int main(int argc, char** argv) { if (!(argc == 4 || (argc == 5 && strcmp(argv[1], "-t") == 0))) { printf("Usage: %s [-t] word_list_file dawg_file unicharset_file", argv[0]); return 1; } tesseract::Classify classify; int argv_index = 0; if (argc == 5) ++argv_index; const char* wordlist_filename = argv[++argv_index]; const char* dawg_filename = argv[++argv_index]; const char* unicharset_file = argv[++argv_index]; if (!classify.getDict().getUnicharset().load_from_file(unicharset_file)) { tprintf("Failed to load unicharset from '%s'\n", unicharset_file); return 1; } const UNICHARSET &unicharset = classify.getDict().getUnicharset(); if (argc == 4) { tesseract::Trie trie( // the first 3 arguments are not used in this case tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM, kMaxNumEdges, unicharset.size()); printf("Reading word list from '%s'\n", wordlist_filename); if (!trie.read_word_list(wordlist_filename, unicharset)) { printf("Failed to read word list from '%s'\n", wordlist_filename); exit(1); } printf("Reducing Trie to SquishedDawg\n"); tesseract::SquishedDawg *dawg = trie.trie_to_dawg(); printf("Writing squished DAWG to '%s'\n", dawg_filename); dawg->write_squished_dawg(dawg_filename); delete dawg; } else { printf("Loading dawg DAWG from '%s'\n", dawg_filename); tesseract::SquishedDawg words( dawg_filename, // these 3 arguments are not used in this case tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM); printf("Checking word list from '%s'\n", wordlist_filename); words.check_for_words(wordlist_filename, unicharset, true); } return 0; }
/** * サンプル・コマンド */ int main() { Node<char>* trie(0); const char key[N][16] = {"array", "bold", "curry", "cute", "art", "alert", "dish"}; if (3 < N) trie = add(trie, key[3]); for (int i(0); i + 2 < N; ++i) trie = add(trie, key[i]); for (int i(0); i < N; ++i) { std::printf("%s: %s\n", key[i], find(trie, key[i]) ? "yes" : "no"); } clear(trie); return 0; }
int main(int argc, char **argv) { srand((time)NULL); printf("Taille structure = %u\n", sizeof(Tableau)); // (taille du tableau * taille int) + (taille du champs 'taille') = 100*4 + 1 = 404 /* int a = 0; // On initialise arbitrairement la variable a printf("%d\n", alea(a)); */ Tableau T = initialise(T); affiche(T); printf("Elément minimum du tableau : %d\n", minimum(T)); if (produit(T) < 0) printf("Produit trop grand"); // Evite d'afficher un produit négatif sur des entiers non signés else printf("Produit des éléments du tableau : %d\n", produit(T)); // Décalage // T = decalage(T); putchar('\n'); printf("Tableau après décalage : \n"); affiche(T); printf("Taille du tableau après le décalage = %d\n\n", T.taille); // Trie // printf("Après trie du tableau : \n"); T = trie(T); affiche(T); return 0; }
int main(int argc, char **argv) { /// trie that associates a integer to strings /// 0 is the default value I want to receive when there is no match /// in trie edm::Trie<int> trie(0); typedef edm::TrieNode<int> Node; typedef Node const * pointer; // sigh.... typedef edm::TrieNodeIter<int> node_iterator; char tre[] = {'a','a','a'}; char quattro[] = {'c','a','a','a'}; for (int j=0;j<3;j++) { tre[2]='a'; quattro[3]='a'; for (int i=0;i<10;i++) { trie.insert(tre,3,i); trie.insert(quattro,4,i); tre[2]++; quattro[3]++; } tre[1]++; quattro[2]++; } std::cout << "get [aac] " << trie.find("aac", 3) << std::endl; std::cout << "get [caae] = " << trie.find("caae", 4) << std::endl; trie.setEntry("caag", 4, -2); std::cout << "get [caag] = " << trie.find("caag", 4) << std::endl; // no match std::cout << "get [abcd] = " << trie.find("abcd", 4) << std::endl; // no match std::cout << "get [ca] = " << trie.find("ca", 2) << std::endl; trie.display(std::cout); std::cout << std::endl; pointer pn = trie.node("ab",2); // if (pn) pn->display(std::cout,0,' '); std::cout << std::endl; { node_iterator e(pn,false); std::cout << "\n ab iteration" << std::endl; for (node_iterator p(pn,true); p!=e; p++) std::cout << "ab" << p.label() << " = " << p->value() << std::endl; } { std::cout << "\n ab iteration: string" << std::endl; pn = trie.node("ab"); Node::const_iterator p = (*pn).begin(); Node::const_iterator e = (*pn).end(); for (; p!=e; p++) std::cout << "ab" << p.label() << " = " << p->value() << std::endl; } { pn = trie.initialNode(); node_iterator e(pn,false); std::cout << "\ntop iteration"<< std::endl; for (node_iterator p(pn,true); p!=e; p++) std::cout << p.label() << " = " << p->value() << std::endl; std::cout << std::endl; } std::cout << "\nfull walk"<< std::endl; Print pr; edm::walkTrie(pr,*trie.initialNode()); std::cout << std::endl; std::cout << "\nleaves iteration"<< std::endl; edm::iterateTrieLeaves(pr,*trie.initialNode()); std::cout << std::endl; }
int main(int, char**) try { /// trie that associates a integer to strings /// 0 is the default value I want to receive when there is no match /// in trie edm::Trie<int> trie(0); typedef edm::TrieNode<int> Node; typedef Node const* pointer; // sigh.... typedef edm::TrieNodeIter<int> node_iterator; char tre[] = {'a', 'a', 'a'}; char quattro[] = {'c', 'a', 'a', 'a'}; for (int j = 0; j < 3; j++) { tre[2] = 'a'; quattro[3] = 'a'; for (int i = 0; i < 10; i++) { trie.insert(tre, 3, i); trie.insert(quattro, 4, i); tre[2]++; quattro[3]++; } tre[1]++; quattro[2]++; } std::cout << "get [aac] " << trie.find("aac", 3) << std::endl; std::cout << "get [caae] = " << trie.find("caae", 4) << std::endl; trie.setEntry("caag", 4, -2); std::cout << "get [caag] = " << trie.find("caag", 4) << std::endl; // no match std::cout << "get [abcd] = " << trie.find("abcd", 4) << std::endl; // no match std::cout << "get [ca] = " << trie.find("ca", 2) << std::endl; trie.display(std::cout); std::cout << std::endl; pointer pn = trie.node("ab", 2); if (pn) pn->display(std::cout, 0, ' '); std::cout << std::endl; node_iterator e; std::cout << "\n ab iteration" << std::endl; for (node_iterator p(trie.node("ab", 2)); p != e; p++) { std::cout << "ab" << p.label() << " = " << p->value() << std::endl; } std::cout << "\n ab iteration: string" << std::endl; pn = trie.node("ab"); e = pn->end(); for (Node::const_iterator p = pn->begin(); p != e; p++) { std::cout << "ab" << p.label() << " = " << p->value() << std::endl; } std::cout << "\ntop iteration" << std::endl; for (node_iterator p(trie.initialNode()); p != e; p++) { std::cout << p.label() << " = " << p->value() << std::endl; } std::cout << std::endl; std::cout << "\nfull walk" << std::endl; Print pr; edm::walkTrie(pr, *trie.initialNode()); std::cout << std::endl; std::cout << "\nleaves iteration" << std::endl; edm::iterateTrieLeaves(pr, *trie.initialNode()); std::cout << std::endl; return 0; } catch (cms::Exception const& e) { std::cerr << e.explainSelf() << std::endl; return 1; } catch (std::exception const& e) { std::cerr << e.what() << std::endl; return 1; }
bool UnicodeTournamentTrie::Preprocess( IImporter* importer, QString dir ) { QString filename = fileInDirectory( dir, "Unicode Tournament Trie" ); QFile subTrieFile( filename + "_sub" ); QFile wayFile( filename + "_ways" ); if ( !openQFile( &subTrieFile, QIODevice::WriteOnly ) ) return false; if ( !openQFile( &wayFile, QIODevice::WriteOnly ) ) return false; std::vector< IImporter::Place > inputPlaces; std::vector< IImporter::Address > inputAddress; std::vector< UnsignedCoordinate > inputWayBuffer; std::vector< QString > inputWayNames; if ( !importer->GetAddressData( &inputPlaces, &inputAddress, &inputWayBuffer, &inputWayNames ) ) return false; Timer time; std::sort( inputAddress.begin(), inputAddress.end() ); qDebug() << "Unicode Tournament Trie: sorted addresses by importance:" << time.restart() << "ms"; std::vector< UnsignedCoordinate > wayBuffer; std::vector< utt::Node > trie( 1 ); unsigned address = 0; // build address name index QMultiHash< unsigned, unsigned > addressByName; for ( ; address < inputAddress.size(); address++ ) { addressByName.insert( inputAddress[address].name, address ); } // compute way lengths QList< unsigned > uniqueNames = addressByName.uniqueKeys(); std::vector< std::pair< double, unsigned > > wayLengths; for ( unsigned name = 0; name < ( unsigned ) uniqueNames.size(); name++ ) { QList< unsigned > segments = addressByName.values( uniqueNames[name] ); double distance = 0; for( unsigned segment = 0; segment < ( unsigned ) segments.size(); segment++ ) { const IImporter::Address segmentAddress = inputAddress[segment]; for ( unsigned coord = 1; coord < segmentAddress.pathLength; ++coord ) { GPSCoordinate sourceGPS = inputWayBuffer[segmentAddress.pathID + coord - 1].ToProjectedCoordinate().ToGPSCoordinate(); GPSCoordinate targetGPS = inputWayBuffer[segmentAddress.pathID + coord].ToProjectedCoordinate().ToGPSCoordinate(); distance += sourceGPS.ApproximateDistance( targetGPS ); } } wayLengths.push_back( std::pair< double, unsigned >( distance, name ) ); } // sort ways by aggregate lengths std::sort( wayLengths.begin(), wayLengths.end() ); std::vector< unsigned > wayImportance( uniqueNames.size() ); for ( unsigned way = 0; way < wayLengths.size(); way++ ) wayImportance[wayLengths[way].second] = way; wayLengths.clear(); std::vector< utt::Node > subTrie( 1 ); for ( unsigned name = 0; name < ( unsigned ) uniqueNames.size(); name++ ) { QList< unsigned > segments = addressByName.values( uniqueNames[name] ); // build edge connector data structures std::vector< EdgeConnector< UnsignedCoordinate>::Edge > connectorEdges; std::vector< unsigned > resultSegments; std::vector< unsigned > resultSegmentDescriptions; std::vector< bool > resultReversed; for ( unsigned segment = 0; segment < ( unsigned ) segments.size(); segment++ ) { const IImporter::Address& segmentAddress = inputAddress[segments[segment]]; EdgeConnector< UnsignedCoordinate >::Edge newEdge; newEdge.source = inputWayBuffer[segmentAddress.pathID]; newEdge.target = inputWayBuffer[segmentAddress.pathID + segmentAddress.pathLength - 1]; newEdge.reverseable = true; connectorEdges.push_back( newEdge ); } EdgeConnector< UnsignedCoordinate >::run( &resultSegments, &resultSegmentDescriptions, &resultReversed, connectorEdges ); // string places with the same name together unsigned nextID = 0; for ( unsigned segment = 0; segment < resultSegments.size(); segment++ ) { utt::Data subEntry; subEntry.start = wayBuffer.size(); for ( unsigned description = 0; description < resultSegments[segment]; description++ ) { unsigned segmentID = resultSegmentDescriptions[nextID + description]; const IImporter::Address& segmentAddress = inputAddress[segments[segmentID]]; std::vector< UnsignedCoordinate > path; for ( unsigned pathID = 0; pathID < segmentAddress.pathLength; pathID++ ) path.push_back( inputWayBuffer[pathID + segmentAddress.pathID]); if ( resultReversed[segmentID] ) std::reverse( path.begin(), path.end() ); int skipFirst = description == 0 ? 0 : 1; assert( skipFirst == 0 || wayBuffer.back() == path.front() ); wayBuffer.insert( wayBuffer.end(), path.begin() + skipFirst, path.end() ); } utt::PlaceData placeData; placeData.name = inputPlaces[inputAddress[segments[resultSegmentDescriptions[nextID]]].nearPlace].name; subEntry.length = wayBuffer.size() - subEntry.start; insert( &subTrie, wayImportance[name], inputWayNames[uniqueNames[name]], subEntry, placeData ); nextID += resultSegments[segment]; } } writeTrie( &subTrie, subTrieFile ); assert( address == inputAddress.size() ); qDebug() << "Unicode Tournament Trie: build tries and tournament trees:" << time.restart() << "ms"; for ( std::vector< UnsignedCoordinate >::const_iterator i = wayBuffer.begin(), e = wayBuffer.end(); i != e; ++i ) { wayFile.write( ( char* ) &i->x, sizeof( i->x ) ); wayFile.write( ( char* ) &i->y, sizeof( i->y ) ); } qDebug() << "Unicode Tournament Trie: wrote ways:" << time.restart() << "ms"; return true; }
void tour_de_jeu(){ troisdes t; int point; int x; print_newline(); print_newline(); print_newline(); print_text("Lancer Initial : "); t=trie(lancer()); aff_des(t); print_text("->"); point=valeur(t); print_int(point); print_newline(); print_newline(); print_newline(); print_text("Vous gardez [0] ou vous relancez [1]? [Oui/Non]: "); x=read_int(); print_newline(); print_newline(); print_newline(); if (x==0){ } else{ t=rejouer(t); print_newline(); print_newline(); print_newline(); print_text("Deuxieme Lancer : "); t=trie(t); aff_des(t); print_text ("->"); point=valeur(t); print_int(point); print_newline(); print_newline(); print_newline(); print_text("Vous gardez [0] ou vous relancez [1] [Oui/Non]: "); x=read_int(); print_newline(); print_newline(); print_newline();} if (x==0){ aff_des(t); print_text("->"); point=valeur(t); print_int(point); print_newline();} else{ t=rejouer(t); t=trie(t); print_newline(); print_newline(); aff_des(t); print_text("->"); point=valeur(t); print_int(point); print_newline(); print_newline(); } }
int main(int argc, char** argv) { size_t nof_peptides = 10; if (argc < 4) { std::cout << "Usage: compute_distance <matrix> <trie> <input> [<nof_peptides>]" << std::endl; return -1; } //cout << argc << endl; if (argc > 4) // nof_peptides is optional { nof_peptides = atoi(argv[4]); } string matrix(argv[1]); string trie(argv[2]); string filename(argv[3]); //std::cout << matrix << ", " << trie << ", " << filename << ", " << outname << ", " << nof_peptides << std::endl; //std::cout << "Reading trie..." << std::endl; TrieArray ta; { std::ifstream ifs(trie.c_str()); //trie is a string containing the path and filename of the trie file. boost::archive::text_iarchive ia(ifs); ta.load(ia,1); } Matrix m(matrix); set<string> peptides; // Read petides! One peptide sequence per line { //std::cout << "Reading search peptides and additional information from file " << std::endl; ifstream is(filename.c_str()); if (not is) throw "Cannot open info File!"; string line; while (getline(is,line)) { string::size_type comment = line.find("#"); if (comment == string::npos) { peptides.insert(line); // std::cout << line << std::endl; } } is.close(); } //std::cout << "Computing distances..." << std::endl; //ofstream os( outname.c_str() ); for( set<string>::const_iterator iter = peptides.begin(); iter != peptides.end(); ++iter ) { string s = *iter; //std::cout << s << std::endl; //std::cout << "." ; flush(cout); Node n (0,0); //start at top of the trie Peptide p; // Peptide seq; //std::cout << s << std::endl; m.translate(s, seq); //translate peptide sequence to matrix indices. seq contains the translated peptide sequence. multiset<pair<double,string> > dist_min; multiset<pair<double,string> > dt; double dist = 0.0; dist_min = DFS_BnB_x_pair(ta,n,dist,m,p,seq,dt,nof_peptides); // os << s << "," << query_reactivity[s] << "," << query_affinity[s] << "," << query_virality[s] <<":"; //os << s << ":"; cout << s << ":"; for (multiset<pair<double,string> >::iterator it=dist_min.begin() ; it != dist_min.end(); it++ ) //{os << (*it).second <<"," << (*it).first << ";";} {cout << (*it).second <<"," << (*it).first << ";";} //cout << (*it).second << (*it).first << endl;} //{os << (*it).second <<"," << (*it).first << "," << affinities[(*it).second] << ";";} //os << std::endl; cout << std::endl; } //std::cout << std::endl; // os.close(); return 0; }
int main(int argc, char** argv) { SuffixTrie trie("cacao"); return 0; }
int main(int argc, char** argv) { int min_word_length; int max_word_length; if (!(argc == 4 || (argc == 5 && strcmp(argv[1], "-t") == 0) || (argc == 6 && strcmp(argv[1], "-r") == 0) || (argc == 7 && strcmp(argv[1], "-l") == 0 && sscanf(argv[2], "%d", &min_word_length) == 1 && sscanf(argv[3], "%d", &max_word_length) == 1))) { printf("Usage: %s [-t | -r [reverse policy] |" " -l min_len max_len] word_list_file" " dawg_file unicharset_file\n", argv[0]); return 1; } tesseract::Classify *classify = new tesseract::Classify(); int argv_index = 0; if (argc == 5) ++argv_index; tesseract::Trie::RTLReversePolicy reverse_policy = tesseract::Trie::RRP_DO_NO_REVERSE; if (argc == 6) { ++argv_index; int tmp_int; sscanf(argv[++argv_index], "%d", &tmp_int); reverse_policy = static_cast<tesseract::Trie::RTLReversePolicy>(tmp_int); tprintf("Set reverse_policy to %s\n", tesseract::Trie::get_reverse_policy_name(reverse_policy)); } if (argc == 7) argv_index += 3; const char* wordlist_filename = argv[++argv_index]; const char* dawg_filename = argv[++argv_index]; const char* unicharset_file = argv[++argv_index]; tprintf("Loading unicharset from '%s'\n", unicharset_file); if (!classify->getDict().getUnicharset().load_from_file(unicharset_file)) { tprintf("Failed to load unicharset from '%s'\n", unicharset_file); delete classify; return 1; } const UNICHARSET &unicharset = classify->getDict().getUnicharset(); if (argc == 4 || argc == 6) { tesseract::Trie trie( // the first 3 arguments are not used in this case tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM, kMaxNumEdges, unicharset.size(), classify->getDict().dawg_debug_level); tprintf("Reading word list from '%s'\n", wordlist_filename); if (!trie.read_word_list(wordlist_filename, unicharset, reverse_policy)) { tprintf("Failed to read word list from '%s'\n", wordlist_filename); exit(1); } tprintf("Reducing Trie to SquishedDawg\n"); tesseract::SquishedDawg *dawg = trie.trie_to_dawg(); if (dawg != NULL && dawg->NumEdges() > 0) { tprintf("Writing squished DAWG to '%s'\n", dawg_filename); dawg->write_squished_dawg(dawg_filename); } else { tprintf("Dawg is empty, skip producing the output file\n"); } delete dawg; } else if (argc == 5) { tprintf("Loading dawg DAWG from '%s'\n", dawg_filename); tesseract::SquishedDawg words( dawg_filename, // these 3 arguments are not used in this case tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM, classify->getDict().dawg_debug_level); tprintf("Checking word list from '%s'\n", wordlist_filename); words.check_for_words(wordlist_filename, unicharset, true); } else if (argc == 7) { // Place words of different lengths in separate Dawgs. char str[CHARS_PER_LINE]; FILE *word_file = fopen(wordlist_filename, "rb"); if (word_file == NULL) { tprintf("Failed to open wordlist file %s\n", wordlist_filename); exit(1); } FILE *dawg_file = fopen(dawg_filename, "wb"); if (dawg_file == NULL) { tprintf("Failed to open dawg output file %s\n", dawg_filename); exit(1); } tprintf("Reading word list from '%s'\n", wordlist_filename); GenericVector<tesseract::Trie *> trie_vec; int i; for (i = min_word_length; i <= max_word_length; ++i) { trie_vec.push_back(new tesseract::Trie( // the first 3 arguments are not used in this case tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM, kMaxNumEdges, unicharset.size(), classify->getDict().dawg_debug_level)); } while (fgets(str, CHARS_PER_LINE, word_file) != NULL) { chomp_string(str); // remove newline int badpos; if (!unicharset.encodable_string(str, &badpos)) { tprintf("String '%s' not compatible with unicharset. " "Bad chars here: '%s'\n", str, str + badpos); continue; } WERD_CHOICE word(str, unicharset); if ((reverse_policy == tesseract::Trie::RRP_REVERSE_IF_HAS_RTL && word.has_rtl_unichar_id()) || reverse_policy == tesseract::Trie::RRP_FORCE_REVERSE) { word.reverse_and_mirror_unichar_ids(); } if (word.length() >= min_word_length && word.length() <= max_word_length && !word.contains_unichar_id(INVALID_UNICHAR_ID)) { tesseract::Trie *curr_trie = trie_vec[word.length()-min_word_length]; if (!curr_trie->word_in_dawg(word)) { if (!curr_trie->add_word_to_dawg(word)) { tprintf("Failed to add the following word to dawg:\n"); word.print(); exit(1); } if (classify->getDict().dawg_debug_level > 1) { tprintf("Added word %s of length %d\n", str, word.length()); } if (!curr_trie->word_in_dawg(word)) { tprintf("Error: word '%s' not in DAWG after adding it\n", str); exit(1); } } } } fclose(word_file); tprintf("Writing fixed length dawgs to '%s'\n", dawg_filename); GenericVector<tesseract::SquishedDawg *> dawg_vec; for (i = 0; i <= max_word_length; ++i) { dawg_vec.push_back(i < min_word_length ? NULL : trie_vec[i-min_word_length]->trie_to_dawg()); } tesseract::Dict::WriteFixedLengthDawgs( dawg_vec, max_word_length - min_word_length + 1, classify->getDict().dawg_debug_level, dawg_file); fclose(dawg_file); dawg_vec.delete_data_pointers(); trie_vec.delete_data_pointers(); } else { // should never get here tprintf("Invalid command-line options\n"); exit(1); } delete classify; return 0; }
int main() { int i; liste li = nouvListe(), li2 = nouvListe(); adjt(1, li); adjt(5, li); adjt(-7, li); printf ("liste : "); affiche(li); printf("taille : %d\n", taille(li)); printf("vide ? : %s\n", (estVide(li)?"oui":"non")); for(i=1; i <= 10; ++i) { adjq(i*i, li2); } printf ("liste : "); affiche(li2); printf("tete : %d queue : %d\n", tete(li2), queue(li2)); printf("====== suppressions =========\n"); supt(li2); printf ("apres supt : "); affiche(li2); supq(li2); printf ("apres supq : "); affiche(li2); // creation de deux listes avec des elements choisis au hasard printf("====== tris et renversement =========\n"); srand(time(NULL)); // initialisation de la suite aleatoire printf("liste 11 : "); liste l1 = nouvListe(); for(i=0; i < 15; ++i) { adjt(rand()%30, l1); } affiche (l1); printf("liste 12 : "); liste l2 = nouvListe(); for(i=0; i < 10; ++i) { adjt(rand()%30, l2); } affiche (l2); liste l1t = trie(l1); liste l2t = trie(l2); printf("liste 11 apres trie : "); affiche(l1t); printf("liste 12 apres trie : "); affiche(l2t); liste l3t = interclasse(l1t,l2t); printf("interclassement : "); affiche(l3t); printf("renversement iter : "); affiche(renverse_iter(l3t)); printf("renversement recur : "); affiche(renverse_recur(l3t)); printf("====== palindrome =========\n"); liste lpalin = nouvListe(); adjt(1, lpalin); adjt(2, lpalin); adjq(2, lpalin); adjt(8, lpalin); adjq(8, lpalin); printf("liste : "); affiche(lpalin); printf("Palindrome (iter) ? %s\n", (palindrome_iter(lpalin)?"oui":"non")); printf("Palindrome (recur) ? %s\n", (palindrome_recur(lpalin)?"oui":"non")); supt(lpalin); printf("liste : "); affiche(lpalin); printf("Palindrome (iter) ? %s\n", (palindrome_iter(lpalin)?"oui":"non")); printf("Palindrome (recur) ? %s\n", (palindrome_recur(lpalin)?"oui":"non")); return 0; }