Esempio n. 1
0
SuffixTree::SuffixTree(int left, int right, std::vector <int> &buffer) 
{
    for (int i = left; i <= right; i++) {
        line.push_back(buffer[i]);
    }
    initialization();
    buildSuffixTree();
}
Esempio n. 2
0
int main(int argc, char** args)
{
#ifdef DEBUG
	clock_t start = clock();
#endif

	scanf("%s\n", firstString);
	
	Node* root = buildSuffixTree(firstString);

	while(scanf("%s\n", otherString) == 1)
	{
#ifdef DEBUG
		printf("creating otherTree - #nodes: %ld\n", count);
#endif

		Node* otherRoot = buildSuffixTree(otherString);
		
#ifdef DEBUG
		printf("intersecting trees - #nodes: %ld\n", count);
#endif

		intersectTrees(root, firstString, otherRoot, otherString);
		
#ifdef DEBUG
		printf("deleting unused nodes - #nodes: %ld\n", count);
#endif

		deleteNode(otherRoot);
		
#ifdef DEBUG
		printf("round done - #nodes: %ld\n", count);
#endif
	}

	printf("%ld\n", calcLongestMatch(root));
	deleteNode(root);

#ifdef DEBUG
	clock_t end = clock();
	double cpu_time = ((double)( end - start )) / CLOCKS_PER_SEC;
	printf("%.3fsecs\n", cpu_time);
#endif

	return 0;
}
//--------------------------------------------------------------------------------------------
// driver program to test above functions
int main(int argc, char *argv[])
{
    strcpy(text, "GEEKSFORGEEKS$");
    buildSuffixTree();
    getLongestRepeatedSubstring();
    //Free the dynamically allocated memory
    freeSuffixTreeByPostOrder(root);

    strcpy(text, "AAAAAAAAAA$");
    buildSuffixTree();
    getLongestRepeatedSubstring();
    //Free the dynamically allocated memory
    freeSuffixTreeByPostOrder(root);

    strcpy(text, "ABCDEFG$");
    buildSuffixTree();
    getLongestRepeatedSubstring();
    //Free the dynamically allocated memory
    freeSuffixTreeByPostOrder(root);

    strcpy(text, "ABABABA$");
    buildSuffixTree();
    getLongestRepeatedSubstring();
    //Free the dynamically allocated memory
    freeSuffixTreeByPostOrder(root);

    strcpy(text, "ATCGATCGA$");
    buildSuffixTree();
    getLongestRepeatedSubstring();
    //Free the dynamically allocated memory
    freeSuffixTreeByPostOrder(root);

    strcpy(text, "banana$");
    buildSuffixTree();
    getLongestRepeatedSubstring();
    //Free the dynamically allocated memory
    freeSuffixTreeByPostOrder(root);

    strcpy(text, "abcpqrabpqpq$");
    buildSuffixTree();
    getLongestRepeatedSubstring();
    //Free the dynamically allocated memory
    freeSuffixTreeByPostOrder(root);

    strcpy(text, "pqrpqpqabab$");
    buildSuffixTree();
    getLongestRepeatedSubstring();
    //Free the dynamically allocated memory
    freeSuffixTreeByPostOrder(root);

    return 0;
}
Esempio n. 4
0
// driver program to test above functions
int main(int argc, char *argv[])
{
//  strcpy(text, "abc"); buildSuffixTree();
//  strcpy(text, "xabxac#");    buildSuffixTree();
//  strcpy(text, "xabxa");  buildSuffixTree();
//  strcpy(text, "xabxa$"); buildSuffixTree();
    strcpy(text, "abc$"); buildSuffixTree();
//  strcpy(text, "geeksforgeeks$"); buildSuffixTree();
//  strcpy(text, "THIS IS A TEST TEXT$"); buildSuffixTree();
//  strcpy(text, "AABAACAADAABAAABAA$"); buildSuffixTree();
    return 0;
}
Esempio n. 5
0
// driver program to test above functions
int main(int argc, char *argv[])
{
    for(int i=0;i<10;i++)
    {
        test* p=new test();
        print("%d",*((int*)(p)));
        printf("%d\n",HASH(p));
        delete[] p;
    }
    return 0;
    size1 = 7;
    printf("Longest Common Substring in xabxac and abcabxabcd is: ");
    strcpy(text, "xabxac#abcabxabcd$"); buildSuffixTree();
    getLongestCommonSubstring();
    //Free the dynamically allocated memory
    freeSuffixTreeByPostOrder(root);
 
    size1 = 10;
    printf("Longest Common Substring in xabxaabxa and babxba is: ");
    strcpy(text, "xabxaabxa#babxba$"); buildSuffixTree();
    getLongestCommonSubstring();
    //Free the dynamically allocated memory
    freeSuffixTreeByPostOrder(root);
 
    size1 = 14;
    printf("Longest Common Substring in GeeksforGeeks and GeeksQuiz is: ");
    strcpy(text, "GeeksforGeeks#GeeksQuiz$"); buildSuffixTree();
    getLongestCommonSubstring();
    //Free the dynamically allocated memory
    freeSuffixTreeByPostOrder(root);
 
    size1 = 26;
    printf("Longest Common Substring in OldSite:GeeksforGeeks.org");
    printf(" and NewSite:GeeksQuiz.com is: ");
    strcpy(text, "OldSite:GeeksforGeeks.org#NewSite:GeeksQuiz.com$");
    buildSuffixTree();
    getLongestCommonSubstring();
    //Free the dynamically allocated memory
    freeSuffixTreeByPostOrder(root);
 
    size1 = 6;
    printf("Longest Common Substring in abcde and fghie is: ");
    strcpy(text, "abcde#fghie$"); buildSuffixTree();
    getLongestCommonSubstring();
    //Free the dynamically allocated memory
    freeSuffixTreeByPostOrder(root);
 
    size1 = 6;
    printf("Longest Common Substring in pqrst and uvwxyz is: ");
    strcpy(text, "pqrst#uvwxyz$"); buildSuffixTree();
    getLongestCommonSubstring();
    //Free the dynamically allocated memory
    freeSuffixTreeByPostOrder(root);
 
    return 0;
}
Esempio n. 6
0
// Process command line options and arguments
static int
parse_opt (int key, char *arg, struct argp_state *state)
{
        int *arg_count = state->input;

        switch (key)
        {
                case 't': //Run the self test suite
                {
                        self_test();
                        exit(0);
                }
                case 'p': //Print the generated tree
                {
                        opt_print_tree = 1;
                        break;
                }
                case 'l': //Find Longest Common Substring (LCS)
                {
                        opt_lcs = 1;
                        break;
                }
                case 'a': //Find Longest Common Substring (LCS)
                {
                        opt_acs = 1;
                        break;
                }
                case ARGP_KEY_ARG: //Process the command line arguments
                {
                        (*arg_count)--;
                        if (*arg_count == 3){
                                input_text = (unsigned char*)arg;
                        }
                        else if (*arg_count == 2){
                                input_text2 = (unsigned char*)arg;
                        }

                }
                break;
                case ARGP_KEY_END:
                {
                        printf ("\n");
                        if (*arg_count >= 4){
                                argp_failure (state, 1, 0, "too few arguments");
                        }
                        else if (*arg_count < 0){
                                argp_failure (state, 1, 0, "too many arguments");
                        }
                        else {
                                if (opt_print_tree){
                                        // Construct the tree and process based on supplied options
                                        buildSuffixTree(input_text, input_text2, opt_print_tree);
                                        freeSuffixTreeByPostOrder(root);
                                        printf(tree_string, 's');
                                }
                                if (opt_lcs){
                                        char *lcs;
                                        if(!input_text2){
                                                argp_failure (state, 1, 0, "missing comparison string");
                                        }
                                        printf("Longest Common Substring in %s and %s is: ",
                                               input_text,
                                               input_text2);
                                        lcs = getLongestCommonSubstring(input_text,
                                                                        input_text2,
                                                                        opt_print_tree);

                                        printf(lcs, 's');

                                }
                                if (opt_acs){
                                        char *acs;
                                        if(!input_text2){
                                                argp_failure (state, 1, 0, "missing comparison string");
                                        }
                                        printf("All Common Substrings in %s and %s are: ",
                                               input_text,
                                               input_text2);
                                        acs = getAllCommonSubstrings(input_text,
                                                                     input_text2,
                                                                     opt_print_tree);

                                        printf(acs, 's');

                                }
                        }
                }
                break;
        }
        return 0;
}
Esempio n. 7
0
/**
 * run self tests
 */
void self_test(){
        printf("Running Self Tests... \n");

        printf("Build suffix tree test: \n");
        char *tree_output =     "$ [3]\n"
                                "abc$ [0]\n"
                                "bc$ [1]\n"
                                "c$ [2]\n"; //Expected output
        printf("Building suffix tree for string: abc \n");
        buildSuffixTree((unsigned char *)"abc", NULL, 1); //Build Suffix tree for this string
        freeSuffixTreeByPostOrder(root);
        printf(tree_string, 's');
        int compare_result = strcmp(tree_string, tree_output);
        assert(compare_result == 0);
        memset(tree_string,0,strlen(tree_string)); //clear string from previous test
        char *tree_output2 =    "$ [10]\n"
                                "ab [-1]\n"
                                "c [-1]\n"
                                "abxabcd$ [0]\n"
                                "d$ [6]\n"
                                "xabcd$ [3]\n"
                                "b [-1]\n"
                                "c [-1]\n"
                                "abxabcd$ [1]\n"
                                "d$ [7]\n"
                                "xabcd$ [4]\n"
                                "c [-1]\n"
                                "abxabcd$ [2]\n"
                                "d$ [8]\n"
                                "d$ [9]\n"
                                "xabcd$ [5]\n"; //Expected output
        printf("Building suffix tree for string: abcabxabcd \n");
        buildSuffixTree((unsigned char *)"abcabxabcd", NULL, 1); //Build Suffix tree for this string
        freeSuffixTreeByPostOrder(root);
        printf(tree_string, 's');
        int compare_result2 = strcmp(tree_string, tree_output2);
        assert(compare_result2 == 0);
        memset(tree_string,0,strlen(tree_string)); //clear string from previous test
        printf("Suffix tree build test: Passed\n\n");

        printf("Longest substrings test: \n");
        char *lcs;

        printf("Longest Common Substring in xabxac and abcabxabcd is: ");
        lcs = getLongestCommonSubstring((unsigned char *)"xabxac",
                                        (unsigned char *)"abcabxabcd",
                                        0);
        printf(lcs, 's');
        int compare_result3 = strcmp(lcs, "abxa, of length: 4\n");
        assert(compare_result3 == 0);
        memset(lcs,0,strlen(lcs)); //clear string from previous test; //clear string from previous test

        printf("Longest Common Substring in xabxaabxa and babxba is: ");
        lcs = getLongestCommonSubstring((unsigned char *)"xabxaabxa",
                                        (unsigned char *)"babxba",
                                        0);

        printf(lcs, 's');
        int compare_result4 = strcmp(lcs, "abx, of length: 3\n");
        assert(compare_result4 == 0);
        memset(lcs,0,strlen(lcs)); //clear string from previous test;
        printf("Longest Common Substring test: Passed\n\n");

        printf("Substrings test: \n");
        int is_substring = 0;

        printf("\"test\" is a substring of \"this is a test\" \n");
        is_substring = checkForSubString((unsigned char *)"test",
                                         (unsigned char *)"this is a test");
        assert(is_substring == 1);

        printf("\"foo\" is a substring of \"this is a test\" \n");
        is_substring = checkForSubString((unsigned char *)"foo",
                                         (unsigned char *)"this is a test");
        assert(is_substring == 0);
        printf("Substrings test: Passed\n\n");

        printf("All occurrences of Substring test: \n");
        int *all_substrings;
        size_t i = 0;

        printf("Text: AABAACAADAABAAABAA, Pattern to search: AABA\n");
        all_substrings = checkAllSubStringOccurences((unsigned char *)"AABA",
                                                     (unsigned char *)"AABAACAADAABAAABAA");
        assert(all_substrings[0] == 3);
        assert(all_substrings[1] == 13);
        assert(all_substrings[2] == 9);
        assert(all_substrings[3] == 0);

        printf("Substring found count: %d\n", all_substrings[0]);
        printf("Substrings found at positions: ");
        for( i = 1; i <all_substrings[0] + 1; i++)
        {
            printf ("%d,",all_substrings[i]);
        }
        free(all_substrings);

        printf("\n\nText: AABAACAADAABAAABAA, Pattern to search: AABAACAAD\n");
        all_substrings = checkAllSubStringOccurences((unsigned char *)"AABAACAAD",
                                                     (unsigned char *)"AABAACAADAABAAABAA");
        assert(all_substrings[0] == 1);
        assert(all_substrings[1] == 0);

        printf("Substring found count: %d\n", all_substrings[0]);
        printf("Substrings found at position: %d", all_substrings[1]);
        free(all_substrings);

        printf("\n\nText: AABAACAADAABAAABAA, Pattern to search: AA\n");
        all_substrings = checkAllSubStringOccurences((unsigned char *)"AA",
                                                     (unsigned char *)"AABAACAADAABAAABAA");
        assert(all_substrings[0] == 7);
        assert(all_substrings[1] == 16);
        assert(all_substrings[2] == 12);
        assert(all_substrings[3] == 13);
        assert(all_substrings[4] == 9);
        assert(all_substrings[5] == 0);
        assert(all_substrings[6] == 3);
        assert(all_substrings[7] == 6);

        printf("Substring found count: %d\n", all_substrings[0]);
        printf("Substrings found at positions: ");
        for( i = 1; i <all_substrings[0] + 1; i++)
        {
            printf ("%d,",all_substrings[i]);
        }
        free(all_substrings);


        printf("\n\nText: AABAACAADAABAAABAA, Pattern to search: ZZ\n");
        all_substrings = checkAllSubStringOccurences((unsigned char *)"ZZ",
                                                     (unsigned char *)"AABAACAADAABAAABAA");
        assert(all_substrings[0] == 0);
        printf("No Substrings found\n");
        printf("\nAll occurences of Substring test: Passed\n\n");

        printf("Longest Repeated Substring test: \n");
        char * lrs;
        int lcs_compare;

        printf("Longest Repeated Substring in AAAAAAAAAA is: ");
        lrs = getLongestRepeatedSubstring((unsigned char *)"AAAAAAAAAA");
        lcs_compare = strcmp(lrs, "AAAAAAAAA");
        assert(lcs_compare == 0);
        printf(lrs, 's');
        memset(lrs,0,strlen(lrs)); //clear string from previous test;

        printf("\nLongest Repeated Substring in ABABABA is: ");
        lrs = getLongestRepeatedSubstring((unsigned char *)"ABABABA");
        lcs_compare = strcmp(lrs, "ABABA");
        assert(lcs_compare == 0);
        printf(lrs, 's');
        memset(lrs,0,strlen(lrs)); //clear string from previous test;

        printf("\nLongest Repeated Substring in ABCDEFG is: ");
        lrs = getLongestRepeatedSubstring((unsigned char *)"ABCDEFG");
        lcs_compare = strcmp(lrs, "No repeated substring");
        assert(lcs_compare == 0);
        printf(lrs, 's');
        memset(lrs,0,strlen(lrs)); //clear string from previous test;

        printf("\nLongest Repeated Substring in pqrpqpqabab is: ");
        lrs = getLongestRepeatedSubstring((unsigned char *)"pqrpqpqabab");
        lcs_compare = strcmp(lrs, "ab");
        assert(lcs_compare == 0);
        printf(lrs, 's');
        memset(lrs,0,strlen(lrs)); //clear string from previous test;

        printf("\nLongest Repeated Substring in abcpqrabpqpq is: ");
        lrs = getLongestRepeatedSubstring((unsigned char *)"abcpqrabpqpq");
        lcs_compare = strcmp(lrs, "ab");
        assert(lcs_compare == 0);
        printf(lrs, 's');
        memset(lrs,0,strlen(lrs)); //clear string from previous test;

        printf("\nLongest Repeated Substring test: Passed\n\n");

//        printf("All Common substrings test: \n");
//
//        printf("All Common Substrings in orangeisatypeoffruit and fruitsomestugfruitgoeshereorange are: \n");
//        getAllCommonSubstrings((unsigned char *)"orangeisatypeoffruit",
//                                        (unsigned char *)"fruitsomestugfruitgoeshereorange",
//                                        0);

        printf("done");

}
Esempio n. 8
0
/**
 * Compresses the input text and writes the compressed data to a file.
 * @param[in] filename name and path of the file to compress.
 * @param[in] compressed name and path of the compressed output file.
 * @param[in] algorithm the algorithm that will be used to build the suffix tree (Ukkonnen or Kurtz).
 * @param[in] see if see will be used.
 */
static void zip(char *filename, char *compressed, BOOL algorithm, int parts, BOOL see) {
  Uchar *origText, *prevText = NULL;
  Uint origTextLen, partTextLen, currentTextLen;
  FILE *compressed_file;
  int i, part;
  fsmTree_t stree = NULL, prevTree = NULL;
  BOOL alloc = False;

#ifdef WIN32
  HANDLE hndl;
  origText = (Uchar *) file2String(filename, &origTextLen, &hndl);
#else
  origText = (Uchar *) file2String(filename, &origTextLen);
#endif

  if(origText == NULL) {
    fprintf(stderr,"Cannot open file %s\n", filename);
    exit(EXIT_FAILURE);
  }
  /*if(textLen > MAXTEXTLEN)
    {
    fprintf(stderr,"Sorry, textlen = %lu is larger than maximal textlen = %lu\n",
    (Showuint) textLen,(Showuint) MAXTEXTLEN);
    exit(EXIT_FAILURE);
    }*/

  if (!compressed) {
    CALLOC(compressed, Uchar, strlen(filename) + 5);
    strcpy(compressed, filename);
    strcat(compressed, ".ctx");
    alloc = True;
  }

  compressed_file = fopen(compressed, "wb");
  if (!compressed_file) {
    printf( "Could not open output file");
    exit(1);
  }
  if (alloc) FREE(compressed);

  buildAlpha(origText, origTextLen);
  printf ("Alphasize: %ld\n", alphasize);
  printf("Algorithm %d\n", algorithm);

  setMaxCount();

  /* write magic number */
  putc(MAGIC >> 8, compressed_file);
  putc(MAGIC, compressed_file);
  /* write # of parts */
  putc(parts, compressed_file);

  initialize_output_bitstream();
  initialize_arithmetic_encoder();

  writeAlphabet(compressed_file);

  currentTextLen = 0;
  for (part = 1; part <= parts; part++) {
    printf("---------- part %d ---------------\n", part);
    if (part != parts) {
      partTextLen = floor(origTextLen / parts);
    }
    else {
      partTextLen = origTextLen - (floor(origTextLen / parts) * (parts - 1));
    }
 
    if (part > 1) {
      prevText = text;
      prevTree = stree;
    }

    textlen = partTextLen;
    CALLOC(text, Uchar, textlen);
    reversestring(origText + currentTextLen, textlen, text);
    
    if (algorithm == UKKONEN) {
      suffixTree_t tree = initSuffixTree();
      buildSuffixTree(tree);
      printf("Tree built\n");
      pruneSuffixTree(tree);
      stree = fsmSuffixTree(tree);   
    }
    else {
      stree = buildSTree();
      printf("Tree built\n");
    }

    /*if (part > 1) {
      copyStatistics(prevTree, stree, prevText);
      FREE(prevText);
      freeFsmTree(prevTree);
    }*/

    DEBUGCODE(printf("gamma hits: %d gamma Misses: %d\n", getHits(), getMisses()));
    printf("height: %ld\n", getHeight(stree));

    /* write textlen */
    for (i=3; i>=0; i--) {
      writeByte(textlen >> (8 * i), compressed_file);
    }
    printf ("Textlen: %ld\n", textlen);
    writeFsmTree(stree, compressed_file);
    printf("FSM...\n");
    makeFsm(stree);
    DEBUGCODE(printFsmTree(stree));
    printf("Encoding...\n");

    encode(stree, compressed_file, origText + currentTextLen, partTextLen, see);
    
    currentTextLen += partTextLen;
  }

  FREE(text);
  freeFsmTree(stree);

  flush_arithmetic_encoder(compressed_file);
  flush_output_bitstream(compressed_file);

#ifdef WIN32
  freetextspace(origText, hndl);
#else
  freetextspace(origText, origTextLen);
#endif

  fclose(compressed_file);
}