SuffixTree::SuffixTree(int left, int right, std::vector <int> &buffer) { for (int i = left; i <= right; i++) { line.push_back(buffer[i]); } initialization(); buildSuffixTree(); }
int main(int argc, char** args) { #ifdef DEBUG clock_t start = clock(); #endif scanf("%s\n", firstString); Node* root = buildSuffixTree(firstString); while(scanf("%s\n", otherString) == 1) { #ifdef DEBUG printf("creating otherTree - #nodes: %ld\n", count); #endif Node* otherRoot = buildSuffixTree(otherString); #ifdef DEBUG printf("intersecting trees - #nodes: %ld\n", count); #endif intersectTrees(root, firstString, otherRoot, otherString); #ifdef DEBUG printf("deleting unused nodes - #nodes: %ld\n", count); #endif deleteNode(otherRoot); #ifdef DEBUG printf("round done - #nodes: %ld\n", count); #endif } printf("%ld\n", calcLongestMatch(root)); deleteNode(root); #ifdef DEBUG clock_t end = clock(); double cpu_time = ((double)( end - start )) / CLOCKS_PER_SEC; printf("%.3fsecs\n", cpu_time); #endif return 0; }
//-------------------------------------------------------------------------------------------- // driver program to test above functions int main(int argc, char *argv[]) { strcpy(text, "GEEKSFORGEEKS$"); buildSuffixTree(); getLongestRepeatedSubstring(); //Free the dynamically allocated memory freeSuffixTreeByPostOrder(root); strcpy(text, "AAAAAAAAAA$"); buildSuffixTree(); getLongestRepeatedSubstring(); //Free the dynamically allocated memory freeSuffixTreeByPostOrder(root); strcpy(text, "ABCDEFG$"); buildSuffixTree(); getLongestRepeatedSubstring(); //Free the dynamically allocated memory freeSuffixTreeByPostOrder(root); strcpy(text, "ABABABA$"); buildSuffixTree(); getLongestRepeatedSubstring(); //Free the dynamically allocated memory freeSuffixTreeByPostOrder(root); strcpy(text, "ATCGATCGA$"); buildSuffixTree(); getLongestRepeatedSubstring(); //Free the dynamically allocated memory freeSuffixTreeByPostOrder(root); strcpy(text, "banana$"); buildSuffixTree(); getLongestRepeatedSubstring(); //Free the dynamically allocated memory freeSuffixTreeByPostOrder(root); strcpy(text, "abcpqrabpqpq$"); buildSuffixTree(); getLongestRepeatedSubstring(); //Free the dynamically allocated memory freeSuffixTreeByPostOrder(root); strcpy(text, "pqrpqpqabab$"); buildSuffixTree(); getLongestRepeatedSubstring(); //Free the dynamically allocated memory freeSuffixTreeByPostOrder(root); return 0; }
// driver program to test above functions int main(int argc, char *argv[]) { // strcpy(text, "abc"); buildSuffixTree(); // strcpy(text, "xabxac#"); buildSuffixTree(); // strcpy(text, "xabxa"); buildSuffixTree(); // strcpy(text, "xabxa$"); buildSuffixTree(); strcpy(text, "abc$"); buildSuffixTree(); // strcpy(text, "geeksforgeeks$"); buildSuffixTree(); // strcpy(text, "THIS IS A TEST TEXT$"); buildSuffixTree(); // strcpy(text, "AABAACAADAABAAABAA$"); buildSuffixTree(); return 0; }
// driver program to test above functions int main(int argc, char *argv[]) { for(int i=0;i<10;i++) { test* p=new test(); print("%d",*((int*)(p))); printf("%d\n",HASH(p)); delete[] p; } return 0; size1 = 7; printf("Longest Common Substring in xabxac and abcabxabcd is: "); strcpy(text, "xabxac#abcabxabcd$"); buildSuffixTree(); getLongestCommonSubstring(); //Free the dynamically allocated memory freeSuffixTreeByPostOrder(root); size1 = 10; printf("Longest Common Substring in xabxaabxa and babxba is: "); strcpy(text, "xabxaabxa#babxba$"); buildSuffixTree(); getLongestCommonSubstring(); //Free the dynamically allocated memory freeSuffixTreeByPostOrder(root); size1 = 14; printf("Longest Common Substring in GeeksforGeeks and GeeksQuiz is: "); strcpy(text, "GeeksforGeeks#GeeksQuiz$"); buildSuffixTree(); getLongestCommonSubstring(); //Free the dynamically allocated memory freeSuffixTreeByPostOrder(root); size1 = 26; printf("Longest Common Substring in OldSite:GeeksforGeeks.org"); printf(" and NewSite:GeeksQuiz.com is: "); strcpy(text, "OldSite:GeeksforGeeks.org#NewSite:GeeksQuiz.com$"); buildSuffixTree(); getLongestCommonSubstring(); //Free the dynamically allocated memory freeSuffixTreeByPostOrder(root); size1 = 6; printf("Longest Common Substring in abcde and fghie is: "); strcpy(text, "abcde#fghie$"); buildSuffixTree(); getLongestCommonSubstring(); //Free the dynamically allocated memory freeSuffixTreeByPostOrder(root); size1 = 6; printf("Longest Common Substring in pqrst and uvwxyz is: "); strcpy(text, "pqrst#uvwxyz$"); buildSuffixTree(); getLongestCommonSubstring(); //Free the dynamically allocated memory freeSuffixTreeByPostOrder(root); return 0; }
// Process command line options and arguments static int parse_opt (int key, char *arg, struct argp_state *state) { int *arg_count = state->input; switch (key) { case 't': //Run the self test suite { self_test(); exit(0); } case 'p': //Print the generated tree { opt_print_tree = 1; break; } case 'l': //Find Longest Common Substring (LCS) { opt_lcs = 1; break; } case 'a': //Find Longest Common Substring (LCS) { opt_acs = 1; break; } case ARGP_KEY_ARG: //Process the command line arguments { (*arg_count)--; if (*arg_count == 3){ input_text = (unsigned char*)arg; } else if (*arg_count == 2){ input_text2 = (unsigned char*)arg; } } break; case ARGP_KEY_END: { printf ("\n"); if (*arg_count >= 4){ argp_failure (state, 1, 0, "too few arguments"); } else if (*arg_count < 0){ argp_failure (state, 1, 0, "too many arguments"); } else { if (opt_print_tree){ // Construct the tree and process based on supplied options buildSuffixTree(input_text, input_text2, opt_print_tree); freeSuffixTreeByPostOrder(root); printf(tree_string, 's'); } if (opt_lcs){ char *lcs; if(!input_text2){ argp_failure (state, 1, 0, "missing comparison string"); } printf("Longest Common Substring in %s and %s is: ", input_text, input_text2); lcs = getLongestCommonSubstring(input_text, input_text2, opt_print_tree); printf(lcs, 's'); } if (opt_acs){ char *acs; if(!input_text2){ argp_failure (state, 1, 0, "missing comparison string"); } printf("All Common Substrings in %s and %s are: ", input_text, input_text2); acs = getAllCommonSubstrings(input_text, input_text2, opt_print_tree); printf(acs, 's'); } } } break; } return 0; }
/** * run self tests */ void self_test(){ printf("Running Self Tests... \n"); printf("Build suffix tree test: \n"); char *tree_output = "$ [3]\n" "abc$ [0]\n" "bc$ [1]\n" "c$ [2]\n"; //Expected output printf("Building suffix tree for string: abc \n"); buildSuffixTree((unsigned char *)"abc", NULL, 1); //Build Suffix tree for this string freeSuffixTreeByPostOrder(root); printf(tree_string, 's'); int compare_result = strcmp(tree_string, tree_output); assert(compare_result == 0); memset(tree_string,0,strlen(tree_string)); //clear string from previous test char *tree_output2 = "$ [10]\n" "ab [-1]\n" "c [-1]\n" "abxabcd$ [0]\n" "d$ [6]\n" "xabcd$ [3]\n" "b [-1]\n" "c [-1]\n" "abxabcd$ [1]\n" "d$ [7]\n" "xabcd$ [4]\n" "c [-1]\n" "abxabcd$ [2]\n" "d$ [8]\n" "d$ [9]\n" "xabcd$ [5]\n"; //Expected output printf("Building suffix tree for string: abcabxabcd \n"); buildSuffixTree((unsigned char *)"abcabxabcd", NULL, 1); //Build Suffix tree for this string freeSuffixTreeByPostOrder(root); printf(tree_string, 's'); int compare_result2 = strcmp(tree_string, tree_output2); assert(compare_result2 == 0); memset(tree_string,0,strlen(tree_string)); //clear string from previous test printf("Suffix tree build test: Passed\n\n"); printf("Longest substrings test: \n"); char *lcs; printf("Longest Common Substring in xabxac and abcabxabcd is: "); lcs = getLongestCommonSubstring((unsigned char *)"xabxac", (unsigned char *)"abcabxabcd", 0); printf(lcs, 's'); int compare_result3 = strcmp(lcs, "abxa, of length: 4\n"); assert(compare_result3 == 0); memset(lcs,0,strlen(lcs)); //clear string from previous test; //clear string from previous test printf("Longest Common Substring in xabxaabxa and babxba is: "); lcs = getLongestCommonSubstring((unsigned char *)"xabxaabxa", (unsigned char *)"babxba", 0); printf(lcs, 's'); int compare_result4 = strcmp(lcs, "abx, of length: 3\n"); assert(compare_result4 == 0); memset(lcs,0,strlen(lcs)); //clear string from previous test; printf("Longest Common Substring test: Passed\n\n"); printf("Substrings test: \n"); int is_substring = 0; printf("\"test\" is a substring of \"this is a test\" \n"); is_substring = checkForSubString((unsigned char *)"test", (unsigned char *)"this is a test"); assert(is_substring == 1); printf("\"foo\" is a substring of \"this is a test\" \n"); is_substring = checkForSubString((unsigned char *)"foo", (unsigned char *)"this is a test"); assert(is_substring == 0); printf("Substrings test: Passed\n\n"); printf("All occurrences of Substring test: \n"); int *all_substrings; size_t i = 0; printf("Text: AABAACAADAABAAABAA, Pattern to search: AABA\n"); all_substrings = checkAllSubStringOccurences((unsigned char *)"AABA", (unsigned char *)"AABAACAADAABAAABAA"); assert(all_substrings[0] == 3); assert(all_substrings[1] == 13); assert(all_substrings[2] == 9); assert(all_substrings[3] == 0); printf("Substring found count: %d\n", all_substrings[0]); printf("Substrings found at positions: "); for( i = 1; i <all_substrings[0] + 1; i++) { printf ("%d,",all_substrings[i]); } free(all_substrings); printf("\n\nText: AABAACAADAABAAABAA, Pattern to search: AABAACAAD\n"); all_substrings = checkAllSubStringOccurences((unsigned char *)"AABAACAAD", (unsigned char *)"AABAACAADAABAAABAA"); assert(all_substrings[0] == 1); assert(all_substrings[1] == 0); printf("Substring found count: %d\n", all_substrings[0]); printf("Substrings found at position: %d", all_substrings[1]); free(all_substrings); printf("\n\nText: AABAACAADAABAAABAA, Pattern to search: AA\n"); all_substrings = checkAllSubStringOccurences((unsigned char *)"AA", (unsigned char *)"AABAACAADAABAAABAA"); assert(all_substrings[0] == 7); assert(all_substrings[1] == 16); assert(all_substrings[2] == 12); assert(all_substrings[3] == 13); assert(all_substrings[4] == 9); assert(all_substrings[5] == 0); assert(all_substrings[6] == 3); assert(all_substrings[7] == 6); printf("Substring found count: %d\n", all_substrings[0]); printf("Substrings found at positions: "); for( i = 1; i <all_substrings[0] + 1; i++) { printf ("%d,",all_substrings[i]); } free(all_substrings); printf("\n\nText: AABAACAADAABAAABAA, Pattern to search: ZZ\n"); all_substrings = checkAllSubStringOccurences((unsigned char *)"ZZ", (unsigned char *)"AABAACAADAABAAABAA"); assert(all_substrings[0] == 0); printf("No Substrings found\n"); printf("\nAll occurences of Substring test: Passed\n\n"); printf("Longest Repeated Substring test: \n"); char * lrs; int lcs_compare; printf("Longest Repeated Substring in AAAAAAAAAA is: "); lrs = getLongestRepeatedSubstring((unsigned char *)"AAAAAAAAAA"); lcs_compare = strcmp(lrs, "AAAAAAAAA"); assert(lcs_compare == 0); printf(lrs, 's'); memset(lrs,0,strlen(lrs)); //clear string from previous test; printf("\nLongest Repeated Substring in ABABABA is: "); lrs = getLongestRepeatedSubstring((unsigned char *)"ABABABA"); lcs_compare = strcmp(lrs, "ABABA"); assert(lcs_compare == 0); printf(lrs, 's'); memset(lrs,0,strlen(lrs)); //clear string from previous test; printf("\nLongest Repeated Substring in ABCDEFG is: "); lrs = getLongestRepeatedSubstring((unsigned char *)"ABCDEFG"); lcs_compare = strcmp(lrs, "No repeated substring"); assert(lcs_compare == 0); printf(lrs, 's'); memset(lrs,0,strlen(lrs)); //clear string from previous test; printf("\nLongest Repeated Substring in pqrpqpqabab is: "); lrs = getLongestRepeatedSubstring((unsigned char *)"pqrpqpqabab"); lcs_compare = strcmp(lrs, "ab"); assert(lcs_compare == 0); printf(lrs, 's'); memset(lrs,0,strlen(lrs)); //clear string from previous test; printf("\nLongest Repeated Substring in abcpqrabpqpq is: "); lrs = getLongestRepeatedSubstring((unsigned char *)"abcpqrabpqpq"); lcs_compare = strcmp(lrs, "ab"); assert(lcs_compare == 0); printf(lrs, 's'); memset(lrs,0,strlen(lrs)); //clear string from previous test; printf("\nLongest Repeated Substring test: Passed\n\n"); // printf("All Common substrings test: \n"); // // printf("All Common Substrings in orangeisatypeoffruit and fruitsomestugfruitgoeshereorange are: \n"); // getAllCommonSubstrings((unsigned char *)"orangeisatypeoffruit", // (unsigned char *)"fruitsomestugfruitgoeshereorange", // 0); printf("done"); }
/** * Compresses the input text and writes the compressed data to a file. * @param[in] filename name and path of the file to compress. * @param[in] compressed name and path of the compressed output file. * @param[in] algorithm the algorithm that will be used to build the suffix tree (Ukkonnen or Kurtz). * @param[in] see if see will be used. */ static void zip(char *filename, char *compressed, BOOL algorithm, int parts, BOOL see) { Uchar *origText, *prevText = NULL; Uint origTextLen, partTextLen, currentTextLen; FILE *compressed_file; int i, part; fsmTree_t stree = NULL, prevTree = NULL; BOOL alloc = False; #ifdef WIN32 HANDLE hndl; origText = (Uchar *) file2String(filename, &origTextLen, &hndl); #else origText = (Uchar *) file2String(filename, &origTextLen); #endif if(origText == NULL) { fprintf(stderr,"Cannot open file %s\n", filename); exit(EXIT_FAILURE); } /*if(textLen > MAXTEXTLEN) { fprintf(stderr,"Sorry, textlen = %lu is larger than maximal textlen = %lu\n", (Showuint) textLen,(Showuint) MAXTEXTLEN); exit(EXIT_FAILURE); }*/ if (!compressed) { CALLOC(compressed, Uchar, strlen(filename) + 5); strcpy(compressed, filename); strcat(compressed, ".ctx"); alloc = True; } compressed_file = fopen(compressed, "wb"); if (!compressed_file) { printf( "Could not open output file"); exit(1); } if (alloc) FREE(compressed); buildAlpha(origText, origTextLen); printf ("Alphasize: %ld\n", alphasize); printf("Algorithm %d\n", algorithm); setMaxCount(); /* write magic number */ putc(MAGIC >> 8, compressed_file); putc(MAGIC, compressed_file); /* write # of parts */ putc(parts, compressed_file); initialize_output_bitstream(); initialize_arithmetic_encoder(); writeAlphabet(compressed_file); currentTextLen = 0; for (part = 1; part <= parts; part++) { printf("---------- part %d ---------------\n", part); if (part != parts) { partTextLen = floor(origTextLen / parts); } else { partTextLen = origTextLen - (floor(origTextLen / parts) * (parts - 1)); } if (part > 1) { prevText = text; prevTree = stree; } textlen = partTextLen; CALLOC(text, Uchar, textlen); reversestring(origText + currentTextLen, textlen, text); if (algorithm == UKKONEN) { suffixTree_t tree = initSuffixTree(); buildSuffixTree(tree); printf("Tree built\n"); pruneSuffixTree(tree); stree = fsmSuffixTree(tree); } else { stree = buildSTree(); printf("Tree built\n"); } /*if (part > 1) { copyStatistics(prevTree, stree, prevText); FREE(prevText); freeFsmTree(prevTree); }*/ DEBUGCODE(printf("gamma hits: %d gamma Misses: %d\n", getHits(), getMisses())); printf("height: %ld\n", getHeight(stree)); /* write textlen */ for (i=3; i>=0; i--) { writeByte(textlen >> (8 * i), compressed_file); } printf ("Textlen: %ld\n", textlen); writeFsmTree(stree, compressed_file); printf("FSM...\n"); makeFsm(stree); DEBUGCODE(printFsmTree(stree)); printf("Encoding...\n"); encode(stree, compressed_file, origText + currentTextLen, partTextLen, see); currentTextLen += partTextLen; } FREE(text); freeFsmTree(stree); flush_arithmetic_encoder(compressed_file); flush_output_bitstream(compressed_file); #ifdef WIN32 freetextspace(origText, hndl); #else freetextspace(origText, origTextLen); #endif fclose(compressed_file); }