Пример #1
0
void BytesTrieTest::TestHasUniqueValue() {
    LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    if(trie.isNull()) {
        return;  // buildTrie() reported an error
    }
    int32_t uniqueValue;
    if(trie->hasUniqueValue(uniqueValue)) {
        errln("unique value at root");
    }
    trie->next('j');
    trie->next('a');
    trie->next('n');
    // hasUniqueValue() directly after next()
    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
        errln("not unique value 1 after \"jan\"");
    }
    trie->first('j');
    trie->next('u');
    if(trie->hasUniqueValue(uniqueValue)) {
        errln("unique value after \"ju\"");
    }
    if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
        errln("not normal value 6 after \"jun\"");
    }
    // hasUniqueValue() after getValue()
    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
        errln("not unique value 6 after \"jun\"");
    }
    // hasUniqueValue() from within a linear-match node
    trie->first('a');
    trie->next('u');
    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
        errln("not unique value 8 after \"au\"");
    }
}
Пример #2
0
void BytesTrieTest::TestIteratorFromLinearMatch() {
    LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    if(trie.isNull()) {
        return;  // buildTrie() reported an error
    }
    // Go into a linear-match node.
    trie->next('j');
    trie->next('a');
    trie->next('n');
    trie->next('u');
    trie->next('a');
    IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
    BytesTrie::Iterator iter(*trie, 0, errorCode);
    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
        return;
    }
    // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    // following "janua".
    static const StringAndValue data[]={
        { "r", 1 },
        { "ry", 1 }
    };
    checkIterator(iter, data, UPRV_LENGTHOF(data));
    // Reset, and we should get the same result.
    logln("after iter.reset()");
    checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
}
Пример #3
0
void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
    static const StringAndValue data[]={
        { "abcdef", 10 },
        { "abcdepq", 200 },
        { "abcdeyz", 3000 }
    };
    LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    if(trie.isNull()) {
        return;  // buildTrie() reported an error
    }
    // Go into a linear-match node.
    trie->next('a');
    trie->next('b');
    trie->next('c');
    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
    // Truncate after the linear-match node.
    BytesTrie::Iterator iter(*trie, 3, errorCode);
    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
        return;
    }
    static const StringAndValue expected[]={
        { "def", 10 },
        { "dep", -1 },
        { "dey", -1 }
    };
    checkIterator(iter, expected, UPRV_LENGTHOF(expected));
    // Reset, and we should get the same result.
    logln("after iter.reset()");
    checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
}
Пример #4
0
int main(int argc, char **argv)
{
	clock_t start=clock();
	FILE *fpStrpool = fopen(argv[1],"r");
	FILE *fpCheckedstr = fopen(argv[2],"r");
		//FILE *fpResult = fopen(strcat(argv[3],"_bloom.dat"),"w");
	FILE *fpResult2 = fopen(strcat(argv[3],"_trie.dat"),"w");
	if(argc != 4) {
		printf("USAGE: strfind strpool strcheck result!\n");
		return 0;
	}
	
/*	
	FILE *fpStrpool = fopen("strpool.dat","r");
	FILE *fpCheckedstr = fopen("checkedemail.dat","r");
	FILE *fpResult2 = fopen("result_trie.dat","w");
	FILE *fpResult = fopen("result_bloom.dat","w");*/
	if(fpStrpool == NULL || fpCheckedstr == NULL){
		printf("Input file not found!\n");
		return 0;
	}
/*	bloom(fpStrpool,fpCheckedstr,fpResult);	
	rewind(fpStrpool);
	rewind(fpCheckedstr);
*/
	trie(fpStrpool,fpCheckedstr,fpResult2);
	
	fclose(fpStrpool);
	fclose(fpCheckedstr);
	fclose(fpResult2);
	printf("%f\n",(double)(clock()-start)/CLOCKS_PER_SEC);
	return 0;
}
Пример #5
0
int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
    BytesTrie trie(bytesTries+bytesTrieOffset);
    if(containsName(trie, alias)) {
        return trie.getValue();
    } else {
        return UCHAR_INVALID_CODE;
    }
}
Пример #6
0
static int32_t bytesTrieLookup(const char *s, const char *nameTrieBytes) {
    BytesTrie trie(nameTrieBytes);
    if(USTRINGTRIE_HAS_VALUE(trie.next(s, -1))) {
        return trie.getValue();
    } else {
        return -1;
    }
}
Пример #7
0
 void insert( const T &collection ) {
     trie *node = this;
     for( auto &c : collection ) {
         auto found = node->children.find( c );
         if( found == node->children.end() )
             node->children[c] = trie( node->value + c );
         node = &(node->children[c]);
     }
     node->flag = true;
 }
Пример #8
0
void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
    LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
    if(trie.isNull()) {
        return;  // buildTrie() reported an error
    }
    checkFirst(*trie, data, dataLength);
    checkNext(*trie, data, dataLength);
    checkNextWithState(*trie, data, dataLength);
    checkNextString(*trie, data, dataLength);
    checkIterator(*trie, data, dataLength);
}
Пример #9
0
	/// \brief Inserts a word (given by iterators \p begin and \p end)
	/// \returns true if the element was inserted, false if already there
	template <typename Iterator> bool insert(Iterator && begin, Iterator && end) {
		if (begin == end) return false;

		size_t i = *begin++;
		if (i >= branches.size()) branches.resize(i + 1);

		auto & b = branches[i];
		if (b) return b->insert(begin, end);

		b = trie();
		b->insert(begin, end);
		return true;
	}
Пример #10
0
 V& insert( const K &collection ) {
     trie *node = this;
     for( auto &c : collection ) {
         auto found = node->children.find( c );
         if( found == node->children.end() ) {
             auto copy = node->branch;
             std::back_inserter( copy ) = c;
             node->children[c] = trie( copy );
         }
         node = &(node->children[c]);
     }
     node->flag = true;
     return node->leaf;
 }
Пример #11
0
int main() {
    trie_t trie("actrie.bin");

    // fold through the key-matching nodes
    const char *ret = 0;
    trie.fold_full("01234567", ret, fun);
    std::cout << "lookup result: " << (ret ? ret : "not found") << std::endl;

    // traverse all the nodes
    trie.foreach<ct::up, std::string>(enumerate);
    trie.foreach<ct::down, std::string>(enumerate);

    return 0;
}
Пример #12
0
void BytesTrieTest::TestGetNextBytes() {
    LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    if(trie.isNull()) {
        return;  // buildTrie() reported an error
    }
    char buffer[40];
    CheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer));
    int32_t count=trie->getNextBytes(sink);
    if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
        errln("months getNextBytes()!=[aj] at root");
    }
    trie->next('j');
    trie->next('a');
    trie->next('n');
    // getNextBytes() directly after next()
    count=trie->getNextBytes(sink.Reset());
    buffer[count]=0;
    if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
        errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
    }
    // getNextBytes() after getValue()
    trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
    memset(buffer, 0, sizeof(buffer));
    count=trie->getNextBytes(sink.Reset());
    if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
        errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
    }
    // getNextBytes() from a linear-match node
    trie->next('u');
    memset(buffer, 0, sizeof(buffer));
    count=trie->getNextBytes(sink.Reset());
    if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
        errln("months getNextBytes()!=[a] after \"janu\"");
    }
    trie->next('a');
    memset(buffer, 0, sizeof(buffer));
    count=trie->getNextBytes(sink.Reset());
    if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
        errln("months getNextBytes()!=[r] after \"janua\"");
    }
    trie->next('r');
    trie->next('y');
    // getNextBytes() after a final match
    count=trie->getNextBytes(sink.Reset());
    if(count!=0 || sink.NumberOfBytesAppended()!=0) {
        errln("months getNextBytes()!=[] after \"january\"");
    }
}
Пример #13
0
void BytesTrieTest::TestIteratorFromBranch() {
    LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    if(trie.isNull()) {
        return;  // buildTrie() reported an error
    }
    // Go to a branch node.
    trie->next('j');
    trie->next('a');
    trie->next('n');
    IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
    BytesTrie::Iterator iter(*trie, 0, errorCode);
    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
        return;
    }
    // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    // following "jan".
    static const StringAndValue data[]={
        { "", 1 },
        { ".", 1 },
        { "a", 1 },
        { "bb", 1 },
        { "c", 1 },
        { "ddd", 1 },
        { "ee", 1 },
        { "ef", 1 },
        { "f", 1 },
        { "gg", 1 },
        { "h", 1 },
        { "iiii", 1 },
        { "j", 1 },
        { "kk", 1 },
        { "kl", 1 },
        { "kmm", 1 },
        { "l", 1 },
        { "m", 1 },
        { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
        { "o", 1 },
        { "pp", 1 },
        { "qqq", 1 },
        { "r", 1 },
        { "uar", 1 },
        { "uary", 1 }
    };
    checkIterator(iter, data, UPRV_LENGTHOF(data));
    // Reset, and we should get the same result.
    logln("after iter.reset()");
    checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
}
Пример #14
0
BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
                                    UStringTrieBuildOption buildOption) {
    IcuTestErrorCode errorCode(*this, "buildTrie()");
    // Add the items to the trie builder in an interesting (not trivial, not random) order.
    int32_t index, step;
    if(dataLength&1) {
        // Odd number of items.
        index=dataLength/2;
        step=2;
    } else if((dataLength%3)!=0) {
        // Not a multiple of 3.
        index=dataLength/5;
        step=3;
    } else {
        index=dataLength-1;
        step=-1;
    }
    builder_->clear();
    for(int32_t i=0; i<dataLength; ++i) {
        builder_->add(data[index].s, data[index].value, errorCode);
        index=(index+step)%dataLength;
    }
    StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
    LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
    if(!errorCode.logIfFailureAndReset("add()/build()")) {
        builder_->add("zzz", 999, errorCode);
        if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
            errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
        }
    }
    logln("serialized trie size: %ld bytes\n", (long)sp.length());
    StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
    if(sp.data()==sp2.data()) {
        errln("builder.buildStringPiece() before & after build() returned same array");
    }
    if(errorCode.isFailure()) {
        return NULL;
    }
    // Tries from either build() method should be identical but
    // BytesTrie does not implement equals().
    // We just return either one.
    if((dataLength&1)!=0) {
        return trie.orphan();
    } else {
        return new BytesTrie(sp2.data());
    }
}
int main(int argc, char *argv[]) {
	std::vector<std::string> args(argv, argv + argc);
	std::cin.sync_with_stdio(false);
	std::cout.sync_with_stdio(false);

	trie_t trie(args[1].c_str());

	tdc::trie_tokenizer<> tokenizer(trie);

	if (args.size() > 2 && args[2] != "-") {
		std::ifstream in(args[2].c_str(), std::ios::binary);
		tokenizer.tokenize(in, apertium_printer(std::cout));
	}
	else {
		tokenizer.tokenize(std::cin, apertium_printer(std::cout));
	}
}
Пример #16
0
void BytesTrieTest::TestTruncatingIteratorFromRoot() {
    LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    if(trie.isNull()) {
        return;  // buildTrie() reported an error
    }
    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
    BytesTrie::Iterator iter(*trie, 4, errorCode);
    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
        return;
    }
    // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
    // of each string, and no string duplicates from the truncation.
    static const StringAndValue data[]={
        { "augu", -1 },
        { "jan", 1 },
        { "jan.", 1 },
        { "jana", 1 },
        { "janb", -1 },
        { "janc", 1 },
        { "jand", -1 },
        { "jane", -1 },
        { "janf", 1 },
        { "jang", -1 },
        { "janh", 1 },
        { "jani", -1 },
        { "janj", 1 },
        { "jank", -1 },
        { "janl", 1 },
        { "janm", 1 },
        { "jann", -1 },
        { "jano", 1 },
        { "janp", -1 },
        { "janq", -1 },
        { "janr", 1 },
        { "janu", -1 },
        { "july", 7 },
        { "jun", 6 },
        { "jun.", 6 },
        { "june", 6 }
    };
    checkIterator(iter, data, UPRV_LENGTHOF(data));
    // Reset, and we should get the same result.
    logln("after iter.reset()");
    checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
}
Пример #17
0
void UCharsTrieTest::TestNextForCodePoint() {
    static const StringAndValue data[]={
        { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
        { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
        { "\\u4dff\\U000103ff", 99999 }
    };
    LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    if(trie.isNull()) {
        return;  // buildTrie() reported an error
    }
    UStringTrieResult result;
    if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
        (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
        (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
        (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
        (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
        (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
        trie->getValue()!=2000000000
    ) {
        errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
    }
    if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
        (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
        (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
        (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
        trie->getValue()!=44444
    ) {
        errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
    }
    if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
        (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
        (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
        (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current()  // no match for trail surrogate
    ) {
        errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
    }
    if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
        (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
        trie->getValue()!=99999
    ) {
        errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
    }
}
Пример #18
0
int main(int argc, char** argv) {
  if (!(argc == 4 || (argc == 5 && strcmp(argv[1], "-t") == 0))) {
    printf("Usage: %s [-t] word_list_file dawg_file unicharset_file", argv[0]);
    return 1;
  }
  tesseract::Classify classify;
  int argv_index = 0;
  if (argc == 5) ++argv_index;
  const char* wordlist_filename = argv[++argv_index];
  const char* dawg_filename = argv[++argv_index];
  const char* unicharset_file = argv[++argv_index];
  if (!classify.getDict().getUnicharset().load_from_file(unicharset_file)) {
    tprintf("Failed to load unicharset from '%s'\n", unicharset_file);
    return 1;
  }
  const UNICHARSET &unicharset = classify.getDict().getUnicharset();
  if (argc == 4) {
    tesseract::Trie trie(
        // the first 3 arguments are not used in this case
        tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM,
        kMaxNumEdges, unicharset.size());
    printf("Reading word list from '%s'\n", wordlist_filename);
    if (!trie.read_word_list(wordlist_filename, unicharset)) {
      printf("Failed to read word list from '%s'\n", wordlist_filename);
      exit(1);
    }
    printf("Reducing Trie to SquishedDawg\n");
    tesseract::SquishedDawg *dawg = trie.trie_to_dawg();
    printf("Writing squished DAWG to '%s'\n", dawg_filename);
    dawg->write_squished_dawg(dawg_filename);
    delete dawg;
  } else {
    printf("Loading dawg DAWG from '%s'\n", dawg_filename);
    tesseract::SquishedDawg words(
        dawg_filename,
        // these 3 arguments are not used in this case
        tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM);
    printf("Checking word list from '%s'\n", wordlist_filename);
    words.check_for_words(wordlist_filename, unicharset, true);
  }
  return 0;
}
Пример #19
0
/**
 * サンプル・コマンド
 */
int
main()
{
	Node<char>* trie(0);
	const char key[N][16] = {"array",
							 "bold",
							 "curry",
							 "cute",
							 "art",
							 "alert",
							 "dish"};

	if (3 < N) trie = add(trie, key[3]);
	for (int i(0); i + 2 < N; ++i) trie = add(trie, key[i]);

	for (int i(0); i < N; ++i) {
		std::printf("%s: %s\n", key[i], find(trie, key[i]) ? "yes" : "no");
	}

	clear(trie);

	return 0;
}
Пример #20
0
int main(int argc, char **argv)
{
	srand((time)NULL);
	
	
	printf("Taille structure = %u\n", sizeof(Tableau)); // (taille du tableau * taille int) + (taille du champs 'taille') = 100*4 + 1 = 404
	

/*	int a = 0; // On initialise arbitrairement la variable a
	printf("%d\n", alea(a));	
*/
	
	Tableau T = initialise(T);
	
	affiche(T);
	printf("Elément minimum du tableau : %d\n", minimum(T));
	if (produit(T) < 0) printf("Produit trop grand"); // Evite d'afficher un produit négatif sur des entiers non signés
	else printf("Produit des éléments du tableau : %d\n", produit(T));
	
	
	// Décalage //
	T = decalage(T); 
	putchar('\n'); 
	printf("Tableau après décalage : \n"); 
	affiche(T);	
	printf("Taille du tableau après le décalage = %d\n\n", T.taille);
	
	
	// Trie //
	printf("Après trie du tableau : \n");
	T = trie(T);
	affiche(T);	
	
	
	return 0;
}
Пример #21
0
int main(int argc, char **argv)
{
  /// trie that associates a integer to strings
  /// 0 is the default value I want to receive when there is no match
  /// in trie
  edm::Trie<int>	trie(0);
  typedef edm::TrieNode<int> Node;
  typedef Node const * pointer; // sigh....
  typedef edm::TrieNodeIter<int> node_iterator;

  char tre[] = {'a','a','a'};
  char quattro[] = {'c','a','a','a'};

  for (int j=0;j<3;j++) {
    tre[2]='a';
    quattro[3]='a';
    for (int i=0;i<10;i++) {
       trie.insert(tre,3,i);
       trie.insert(quattro,4,i);
       tre[2]++;
       quattro[3]++;
    }
    tre[1]++;
    quattro[2]++;
  }

 
  std::cout << "get [aac] " << trie.find("aac", 3) << std::endl;
  std::cout << "get [caae] = " << trie.find("caae", 4) << std::endl;

  trie.setEntry("caag", 4, -2);
  std::cout << "get [caag] = " << trie.find("caag", 4) << std::endl;

  // no match
  std::cout << "get [abcd] = " << trie.find("abcd", 4) << std::endl;
  // no match
  std::cout << "get [ca] = " << trie.find("ca", 2) << std::endl;

  trie.display(std::cout);
  std::cout << std::endl;

  pointer pn = trie.node("ab",2);
  // if (pn) pn->display(std::cout,0,' ');
  std::cout << std::endl;

  {
    node_iterator e(pn,false);
    std::cout << "\n ab iteration" << std::endl;
    for (node_iterator p(pn,true); p!=e; p++)
      std::cout << "ab" << p.label() << " = " << p->value() << std::endl;
  }

  {
    std::cout << "\n ab iteration: string" << std::endl;
    pn = trie.node("ab");
    Node::const_iterator p = (*pn).begin();
    Node::const_iterator e = (*pn).end();
    
    for (; p!=e; p++)
      std::cout << "ab" << p.label() << " = " << p->value() << std::endl;
  }

  {
    pn = trie.initialNode();
    node_iterator e(pn,false);   
    std::cout << "\ntop iteration"<< std::endl;
    for (node_iterator p(pn,true); p!=e; p++)
      std::cout << p.label() << " = " << p->value() << std::endl;
    std::cout << std::endl;
  }

  std::cout << "\nfull walk"<< std::endl;
  Print pr;
  edm::walkTrie(pr,*trie.initialNode());
  std::cout << std::endl;

  std::cout << "\nleaves iteration"<< std::endl;
  edm::iterateTrieLeaves(pr,*trie.initialNode());
  std::cout << std::endl;

}
Пример #22
0
int main(int, char**) try {
  /// trie that associates a integer to strings
  /// 0 is the default value I want to receive when there is no match
  /// in trie
  edm::Trie<int> trie(0);
  typedef edm::TrieNode<int> Node;
  typedef Node const* pointer;  // sigh....
  typedef edm::TrieNodeIter<int> node_iterator;

  char tre[] = {'a', 'a', 'a'};
  char quattro[] = {'c', 'a', 'a', 'a'};

  for (int j = 0; j < 3; j++) {
    tre[2] = 'a';
    quattro[3] = 'a';
    for (int i = 0; i < 10; i++) {
      trie.insert(tre, 3, i);
      trie.insert(quattro, 4, i);
      tre[2]++;
      quattro[3]++;
    }
    tre[1]++;
    quattro[2]++;
  }

  std::cout << "get [aac] " << trie.find("aac", 3) << std::endl;
  std::cout << "get [caae] = " << trie.find("caae", 4) << std::endl;

  trie.setEntry("caag", 4, -2);
  std::cout << "get [caag] = " << trie.find("caag", 4) << std::endl;

  // no match
  std::cout << "get [abcd] = " << trie.find("abcd", 4) << std::endl;
  // no match
  std::cout << "get [ca] = " << trie.find("ca", 2) << std::endl;

  trie.display(std::cout);
  std::cout << std::endl;

  pointer pn = trie.node("ab", 2);
  if (pn)
    pn->display(std::cout, 0, ' ');
  std::cout << std::endl;

  node_iterator e;
  std::cout << "\n ab iteration" << std::endl;
  for (node_iterator p(trie.node("ab", 2)); p != e; p++) {
    std::cout << "ab" << p.label() << " = " << p->value() << std::endl;
  }

  std::cout << "\n ab iteration: string" << std::endl;
  pn = trie.node("ab");
  e = pn->end();
  for (Node::const_iterator p = pn->begin(); p != e; p++) {
    std::cout << "ab" << p.label() << " = " << p->value() << std::endl;
  }
  std::cout << "\ntop iteration" << std::endl;
  for (node_iterator p(trie.initialNode()); p != e; p++) {
    std::cout << p.label() << " = " << p->value() << std::endl;
  }
  std::cout << std::endl;

  std::cout << "\nfull walk" << std::endl;
  Print pr;
  edm::walkTrie(pr, *trie.initialNode());
  std::cout << std::endl;

  std::cout << "\nleaves iteration" << std::endl;
  edm::iterateTrieLeaves(pr, *trie.initialNode());
  std::cout << std::endl;
  return 0;
} catch (cms::Exception const& e) {
  std::cerr << e.explainSelf() << std::endl;
  return 1;
} catch (std::exception const& e) {
  std::cerr << e.what() << std::endl;
  return 1;
}
Пример #23
0
bool UnicodeTournamentTrie::Preprocess( IImporter* importer, QString dir )
{
	QString filename = fileInDirectory( dir, "Unicode Tournament Trie" );

	QFile subTrieFile( filename + "_sub" );
	QFile wayFile( filename + "_ways" );

	if ( !openQFile( &subTrieFile, QIODevice::WriteOnly ) )
		return false;
	if ( !openQFile( &wayFile, QIODevice::WriteOnly ) )
		return false;

	std::vector< IImporter::Place > inputPlaces;
	std::vector< IImporter::Address > inputAddress;
	std::vector< UnsignedCoordinate > inputWayBuffer;
	std::vector< QString > inputWayNames;
	if ( !importer->GetAddressData( &inputPlaces, &inputAddress, &inputWayBuffer, &inputWayNames ) )
		return false;

	Timer time;

	std::sort( inputAddress.begin(), inputAddress.end() );
	qDebug() << "Unicode Tournament Trie: sorted addresses by importance:" << time.restart() << "ms";

	std::vector< UnsignedCoordinate > wayBuffer;
	std::vector< utt::Node > trie( 1 );
	unsigned address = 0;

	// build address name index
	QMultiHash< unsigned, unsigned > addressByName;
	for ( ; address < inputAddress.size(); address++ ) {
		addressByName.insert( inputAddress[address].name, address );
	}

	// compute way lengths
	QList< unsigned > uniqueNames = addressByName.uniqueKeys();
	std::vector< std::pair< double, unsigned > > wayLengths;
	for ( unsigned name = 0; name < ( unsigned ) uniqueNames.size(); name++ ) {
		QList< unsigned > segments = addressByName.values( uniqueNames[name] );
		double distance = 0;
		for( unsigned segment = 0; segment < ( unsigned ) segments.size(); segment++ ) {
			const IImporter::Address segmentAddress = inputAddress[segment];
			for ( unsigned coord = 1; coord < segmentAddress.pathLength; ++coord ) {
				GPSCoordinate sourceGPS = inputWayBuffer[segmentAddress.pathID + coord - 1].ToProjectedCoordinate().ToGPSCoordinate();
				GPSCoordinate targetGPS = inputWayBuffer[segmentAddress.pathID + coord].ToProjectedCoordinate().ToGPSCoordinate();
				distance += sourceGPS.ApproximateDistance( targetGPS );
			}
		}
		wayLengths.push_back( std::pair< double, unsigned >( distance, name ) );
	}

	// sort ways by aggregate lengths
	std::sort( wayLengths.begin(), wayLengths.end() );
	std::vector< unsigned > wayImportance( uniqueNames.size() );
	for ( unsigned way = 0; way < wayLengths.size(); way++ )
		wayImportance[wayLengths[way].second] = way;
	wayLengths.clear();

	std::vector< utt::Node > subTrie( 1 );

	for ( unsigned name = 0; name < ( unsigned ) uniqueNames.size(); name++ ) {
		QList< unsigned > segments = addressByName.values( uniqueNames[name] );

		// build edge connector data structures
		std::vector< EdgeConnector< UnsignedCoordinate>::Edge > connectorEdges;
		std::vector< unsigned > resultSegments;
		std::vector< unsigned > resultSegmentDescriptions;
		std::vector< bool > resultReversed;

		for ( unsigned segment = 0; segment < ( unsigned ) segments.size(); segment++ ) {
			const IImporter::Address& segmentAddress = inputAddress[segments[segment]];
			EdgeConnector< UnsignedCoordinate >::Edge newEdge;
			newEdge.source = inputWayBuffer[segmentAddress.pathID];
			newEdge.target = inputWayBuffer[segmentAddress.pathID + segmentAddress.pathLength - 1];
			newEdge.reverseable = true;
			connectorEdges.push_back( newEdge );
		}

		EdgeConnector< UnsignedCoordinate >::run( &resultSegments, &resultSegmentDescriptions, &resultReversed, connectorEdges );

		// string places with the same name together
		unsigned nextID = 0;
		for ( unsigned segment = 0; segment < resultSegments.size(); segment++ ) {
			utt::Data subEntry;
			subEntry.start = wayBuffer.size();

			for ( unsigned description = 0; description < resultSegments[segment]; description++ ) {
				unsigned segmentID = resultSegmentDescriptions[nextID + description];
				const IImporter::Address& segmentAddress = inputAddress[segments[segmentID]];
				std::vector< UnsignedCoordinate > path;
				for ( unsigned pathID = 0; pathID < segmentAddress.pathLength; pathID++ )
					path.push_back( inputWayBuffer[pathID + segmentAddress.pathID]);
				if ( resultReversed[segmentID] )
					std::reverse( path.begin(), path.end() );
				int skipFirst = description == 0 ? 0 : 1;
				assert( skipFirst == 0 || wayBuffer.back() == path.front() );
				wayBuffer.insert( wayBuffer.end(), path.begin() + skipFirst, path.end() );
			}
			
			utt::PlaceData placeData;
			placeData.name = inputPlaces[inputAddress[segments[resultSegmentDescriptions[nextID]]].nearPlace].name;

			subEntry.length = wayBuffer.size() - subEntry.start;
			insert( &subTrie, wayImportance[name], inputWayNames[uniqueNames[name]], subEntry, placeData );

			nextID += resultSegments[segment];
		}
	}

	writeTrie( &subTrie, subTrieFile );

	assert( address == inputAddress.size() );
	qDebug() << "Unicode Tournament Trie: build tries and tournament trees:" << time.restart() << "ms";

	for ( std::vector< UnsignedCoordinate >::const_iterator i = wayBuffer.begin(), e = wayBuffer.end(); i != e; ++i ) {
		wayFile.write( ( char* ) &i->x, sizeof( i->x ) );
		wayFile.write( ( char* ) &i->y, sizeof( i->y ) );
	}
	qDebug() << "Unicode Tournament Trie: wrote ways:" << time.restart() << "ms";

	return true;
}
Пример #24
0
void tour_de_jeu(){
  troisdes t;
  int point;
  int x;
  print_newline();
  print_newline();
  print_newline();
  print_text("Lancer Initial : ");
  t=trie(lancer());
  aff_des(t);
  print_text("->");
  point=valeur(t);
  print_int(point);
  print_newline();
  print_newline();
  print_newline();
  print_text("Vous gardez [0] ou vous relancez [1]? [Oui/Non]: ");
  x=read_int();
  print_newline();
  print_newline();
  print_newline();
  if (x==0){
      }

  else{
  t=rejouer(t);
  print_newline();
  print_newline();
  print_newline();
  print_text("Deuxieme Lancer : ");
  t=trie(t);
  aff_des(t);
  print_text ("->");
  point=valeur(t);
  print_int(point);
  print_newline();
  print_newline();
  print_newline();
  print_text("Vous gardez [0] ou vous relancez [1] [Oui/Non]: ");
  x=read_int();
  print_newline();
  print_newline();
  print_newline();}

  if (x==0){
    aff_des(t);
  print_text("->");
  point=valeur(t);
  print_int(point); 
print_newline();}
  else{
    t=rejouer(t);
    t=trie(t);
    print_newline();
    print_newline();
    aff_des(t);
    print_text("->");
    point=valeur(t);
    print_int(point);
    print_newline();
    print_newline();
}
}
Пример #25
0
int main(int argc, char** argv)
{	
	size_t nof_peptides = 10;
	if (argc < 4)
	{
		std::cout << "Usage: compute_distance <matrix> <trie> <input> [<nof_peptides>]" << std::endl;
		return -1;
	}
	//cout << argc << endl;
	if (argc > 4) // nof_peptides is optional
	{
		nof_peptides = atoi(argv[4]);
	}

	string matrix(argv[1]);
	string trie(argv[2]);
	string filename(argv[3]);
	//std::cout << matrix << ", " << trie << ", " << filename << ", " << outname << ", " << nof_peptides << std::endl;

	//std::cout << "Reading trie..." << std::endl;
	TrieArray ta;
	{
	std::ifstream ifs(trie.c_str()); //trie is a string containing the path and filename of the trie file.
	boost::archive::text_iarchive ia(ifs);
	ta.load(ia,1);
	}

	
	Matrix m(matrix);	 

	set<string> peptides;
	
	
	// Read petides! One peptide sequence per line
	
	{ 

		//std::cout << "Reading search peptides and additional information from file " <<   std::endl;
		
		ifstream is(filename.c_str());
		if (not is)
			throw "Cannot open info File!";
		string line;
		while (getline(is,line))
		{	
			string::size_type comment = line.find("#");
			if (comment == string::npos)
			{	
						peptides.insert(line);
					//	std::cout << line << std::endl;
			}
		}
		is.close();
	}
	
	
	//std::cout << "Computing distances..." << std::endl;
	
	//ofstream os( outname.c_str() );	
	for( set<string>::const_iterator iter = peptides.begin(); iter != peptides.end(); ++iter ) 
	{
    string s = *iter;
    //std::cout << s << std::endl; 
  	
		//std::cout << "." ;
		flush(cout);		
		Node n (0,0); //start at top of the trie
		Peptide p; //
		Peptide seq;
		//std::cout << s << std::endl;
		m.translate(s, seq); //translate peptide sequence to matrix indices. seq contains the translated peptide sequence. 
		
		multiset<pair<double,string> > dist_min;
		multiset<pair<double,string> > dt;
		double dist = 0.0;
		dist_min = DFS_BnB_x_pair(ta,n,dist,m,p,seq,dt,nof_peptides);	
//		os << s << "," << query_reactivity[s] << "," << query_affinity[s] << "," << query_virality[s] <<":";
		//os << s << ":";
		cout << s << ":";
		for (multiset<pair<double,string> >::iterator it=dist_min.begin() ; it != dist_min.end(); it++ )
							//{os << (*it).second <<"," << (*it).first << ";";}
							{cout << (*it).second <<"," << (*it).first << ";";}
					//cout << (*it).second << (*it).first << endl;}
			//{os << (*it).second <<"," << (*it).first << "," << affinities[(*it).second] << ";";}
		//os << std::endl;	
		cout << std::endl;
		

	}
	//std::cout << std::endl;
//	os.close();
	return 0;
	
	
	
}
Пример #26
0
int main(int argc, char** argv)
{
    SuffixTrie trie("cacao");

    return 0;
}
Пример #27
0
int main(int argc, char** argv) {
  int min_word_length;
  int max_word_length;
  if (!(argc == 4 || (argc == 5 && strcmp(argv[1], "-t") == 0) ||
      (argc == 6 && strcmp(argv[1], "-r") == 0) ||
      (argc == 7 && strcmp(argv[1], "-l") == 0 &&
         sscanf(argv[2], "%d", &min_word_length) == 1 &&
         sscanf(argv[3], "%d", &max_word_length) == 1))) {
    printf("Usage: %s [-t | -r [reverse policy] |"
           " -l min_len max_len] word_list_file"
           " dawg_file unicharset_file\n", argv[0]);
    return 1;
  }
  tesseract::Classify *classify = new tesseract::Classify();
  int argv_index = 0;
  if (argc == 5) ++argv_index;
  tesseract::Trie::RTLReversePolicy reverse_policy =
      tesseract::Trie::RRP_DO_NO_REVERSE;
  if (argc == 6) {
    ++argv_index;
    int tmp_int;
    sscanf(argv[++argv_index], "%d", &tmp_int);
    reverse_policy = static_cast<tesseract::Trie::RTLReversePolicy>(tmp_int);
    tprintf("Set reverse_policy to %s\n",
            tesseract::Trie::get_reverse_policy_name(reverse_policy));
  }
  if (argc == 7) argv_index += 3;
  const char* wordlist_filename = argv[++argv_index];
  const char* dawg_filename = argv[++argv_index];
  const char* unicharset_file = argv[++argv_index];
  tprintf("Loading unicharset from '%s'\n", unicharset_file);
  if (!classify->getDict().getUnicharset().load_from_file(unicharset_file)) {
    tprintf("Failed to load unicharset from '%s'\n", unicharset_file);
    delete classify;
    return 1;
  }
  const UNICHARSET &unicharset = classify->getDict().getUnicharset();
  if (argc == 4 || argc == 6) {
    tesseract::Trie trie(
        // the first 3 arguments are not used in this case
        tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM,
        kMaxNumEdges, unicharset.size(),
        classify->getDict().dawg_debug_level);
    tprintf("Reading word list from '%s'\n", wordlist_filename);
    if (!trie.read_word_list(wordlist_filename, unicharset, reverse_policy)) {
      tprintf("Failed to read word list from '%s'\n", wordlist_filename);
      exit(1);
    }
    tprintf("Reducing Trie to SquishedDawg\n");
    tesseract::SquishedDawg *dawg = trie.trie_to_dawg();
    if (dawg != NULL && dawg->NumEdges() > 0) {
      tprintf("Writing squished DAWG to '%s'\n", dawg_filename);
      dawg->write_squished_dawg(dawg_filename);
    } else {
      tprintf("Dawg is empty, skip producing the output file\n");
    }
    delete dawg;
  } else if (argc == 5) {
    tprintf("Loading dawg DAWG from '%s'\n", dawg_filename);
    tesseract::SquishedDawg words(
        dawg_filename,
        // these 3 arguments are not used in this case
        tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM,
        classify->getDict().dawg_debug_level);
    tprintf("Checking word list from '%s'\n", wordlist_filename);
    words.check_for_words(wordlist_filename, unicharset, true);
  } else if (argc == 7) {
    // Place words of different lengths in separate Dawgs.
    char str[CHARS_PER_LINE];
    FILE *word_file = fopen(wordlist_filename, "rb");
    if (word_file == NULL) {
      tprintf("Failed to open wordlist file %s\n", wordlist_filename);
      exit(1);
    }
    FILE *dawg_file = fopen(dawg_filename, "wb");
    if (dawg_file == NULL) {
      tprintf("Failed to open dawg output file %s\n", dawg_filename);
      exit(1);
    }
    tprintf("Reading word list from '%s'\n", wordlist_filename);
    GenericVector<tesseract::Trie *> trie_vec;
    int i;
    for (i = min_word_length; i <= max_word_length; ++i) {
      trie_vec.push_back(new tesseract::Trie(
          // the first 3 arguments are not used in this case
          tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM,
          kMaxNumEdges, unicharset.size(),
          classify->getDict().dawg_debug_level));
    }
    while (fgets(str, CHARS_PER_LINE, word_file) != NULL) {
      chomp_string(str);  // remove newline
      int badpos;
      if (!unicharset.encodable_string(str, &badpos)) {
        tprintf("String '%s' not compatible with unicharset. "
                "Bad chars here: '%s'\n", str, str + badpos);
        continue;
      }
      WERD_CHOICE word(str, unicharset);
      if ((reverse_policy == tesseract::Trie::RRP_REVERSE_IF_HAS_RTL &&
          word.has_rtl_unichar_id()) ||
          reverse_policy == tesseract::Trie::RRP_FORCE_REVERSE) {
        word.reverse_and_mirror_unichar_ids();
      }
      if (word.length() >= min_word_length &&
          word.length() <= max_word_length &&
          !word.contains_unichar_id(INVALID_UNICHAR_ID)) {
        tesseract::Trie *curr_trie = trie_vec[word.length()-min_word_length];
        if (!curr_trie->word_in_dawg(word)) {
          if (!curr_trie->add_word_to_dawg(word)) {
            tprintf("Failed to add the following word to dawg:\n");
            word.print();
            exit(1);
          }
          if (classify->getDict().dawg_debug_level > 1) {
            tprintf("Added word %s of length %d\n", str, word.length());
          }
          if (!curr_trie->word_in_dawg(word)) {
            tprintf("Error: word '%s' not in DAWG after adding it\n", str);
            exit(1);
          }
        }
      }
    }
    fclose(word_file);
    tprintf("Writing fixed length dawgs to '%s'\n", dawg_filename);
    GenericVector<tesseract::SquishedDawg *> dawg_vec;
    for (i = 0; i <= max_word_length; ++i) {
      dawg_vec.push_back(i < min_word_length ? NULL :
                         trie_vec[i-min_word_length]->trie_to_dawg());
    }
    tesseract::Dict::WriteFixedLengthDawgs(
        dawg_vec, max_word_length - min_word_length + 1,
        classify->getDict().dawg_debug_level, dawg_file);
    fclose(dawg_file);
    dawg_vec.delete_data_pointers();
    trie_vec.delete_data_pointers();
  } else {  // should never get here
    tprintf("Invalid command-line options\n");
    exit(1);
  }
  delete classify;
  return 0;
}
Пример #28
0
int main() {
	int i;
	liste li = nouvListe(), li2 = nouvListe();
	
	adjt(1, li);
	adjt(5, li);
	adjt(-7, li);
		
	printf ("liste : "); affiche(li);
	
	printf("taille : %d\n", taille(li));	
	printf("vide ? : %s\n", (estVide(li)?"oui":"non"));	

	for(i=1; i <= 10; ++i) {
		adjq(i*i, li2);
	} 
	printf ("liste : "); affiche(li2);
	printf("tete : %d queue : %d\n", tete(li2), queue(li2));
	printf("====== suppressions =========\n");
	supt(li2);
	printf ("apres supt : "); affiche(li2);
	supq(li2);
	printf ("apres supq : "); affiche(li2);

	// creation de deux listes avec des elements choisis au hasard
	printf("====== tris et renversement =========\n");

	srand(time(NULL));	// initialisation de la suite aleatoire
	printf("liste 11 : ");
	liste l1 = nouvListe();
	for(i=0; i < 15; ++i) {
	  adjt(rand()%30, l1);
	}
	affiche (l1);
	
	printf("liste 12 : ");
	liste l2 = nouvListe();
	for(i=0; i < 10; ++i) {
	  adjt(rand()%30, l2);
	}
	affiche (l2);

	liste l1t = trie(l1);
	liste l2t = trie(l2);
	printf("liste 11 apres trie : "); affiche(l1t);
	printf("liste 12 apres trie : "); affiche(l2t);
	liste l3t = interclasse(l1t,l2t);
	printf("interclassement : "); affiche(l3t);
	
	printf("renversement iter : "); affiche(renverse_iter(l3t));
	printf("renversement recur : "); affiche(renverse_recur(l3t));

	printf("====== palindrome =========\n");

	
	liste lpalin = nouvListe();
	adjt(1, lpalin); 
	adjt(2, lpalin); adjq(2, lpalin);
	adjt(8, lpalin); adjq(8, lpalin);
	printf("liste : "); affiche(lpalin);
	printf("Palindrome (iter) ? %s\n", (palindrome_iter(lpalin)?"oui":"non"));
	printf("Palindrome (recur) ? %s\n", (palindrome_recur(lpalin)?"oui":"non"));
	supt(lpalin);
	printf("liste : "); affiche(lpalin);
	printf("Palindrome (iter) ? %s\n", (palindrome_iter(lpalin)?"oui":"non"));
	printf("Palindrome (recur) ? %s\n", (palindrome_recur(lpalin)?"oui":"non"));
	
	return 0;
}