Example #1
0
BWT* Load_Indexes(char *BWTINDEX,char *OCCFILE, char *SAFILE, MMPool* & mmPool)
{
        int PoolSize = 524288;
	MMMasterInitialize(3, 0, FALSE, NULL);
	mmPool = MMPoolCreate(PoolSize);
	return BWTLoad(mmPool, BWTINDEX, OCCFILE, SAFILE, NULL, NULL, NULL);//Load FM index
}
Example #2
0
Idx2BWT * BWTLoad2BWT(const char * indexFilePrefix, const char * saFileNameExtension) {

    Idx2BWT * idx2BWT = malloc(sizeof(Idx2BWT));
    BWT * bwt;
    BWT * rev_bwt;
    HSP * hsp;
    
    char bwtFilename[MAX_INDEX_FILENAME_LENGTH]; 
    char bwtOccFilename[MAX_INDEX_FILENAME_LENGTH]; 
    char saFilename[MAX_INDEX_FILENAME_LENGTH]; 
    char rev_bwtFilename[MAX_INDEX_FILENAME_LENGTH]; 
    char rev_bwtOccFilename[MAX_INDEX_FILENAME_LENGTH];
    char packedDnaFilename[MAX_INDEX_FILENAME_LENGTH]; 
    char annotationFilename[MAX_INDEX_FILENAME_LENGTH]; 
    char ambiguityFilename[MAX_INDEX_FILENAME_LENGTH]; 
    char translateFilename[MAX_INDEX_FILENAME_LENGTH]; 
    
    strcpy(bwtFilename,indexFilePrefix);
    strcpy(bwtOccFilename,indexFilePrefix);
    strcpy(saFilename,indexFilePrefix);
    strcpy(rev_bwtFilename,indexFilePrefix);
    strcpy(rev_bwtOccFilename,indexFilePrefix);
    strcpy(packedDnaFilename,indexFilePrefix);
    strcpy(annotationFilename,indexFilePrefix);
    strcpy(ambiguityFilename,indexFilePrefix);
    strcpy(translateFilename,indexFilePrefix);
    
    strcat(bwtFilename,".bwt");
    strcat(bwtOccFilename,".fmv");
    strcat(saFilename,saFileNameExtension);
    strcat(rev_bwtFilename,".rev.bwt");
    strcat(rev_bwtOccFilename,".rev.fmv");
    strcat(packedDnaFilename,".pac");
    strcat(annotationFilename,".ann");
    strcat(ambiguityFilename,".amb");
    strcat(translateFilename,".tra");
    
    MMMasterInitialize(3, 0, FALSE, NULL);
    MMPool * mmPool = MMPoolCreate(2097152);
    
    bwt = BWTLoad(mmPool, bwtFilename, bwtOccFilename, saFilename, NULL, NULL, NULL);
    rev_bwt = BWTLoad(mmPool, rev_bwtFilename, rev_bwtOccFilename, NULL, NULL, NULL, NULL);
    hsp = HSPLoad(mmPool, packedDnaFilename, annotationFilename, ambiguityFilename,translateFilename, 1);
    
    HSPFillCharMap(idx2BWT->charMap);
    HSPFillComplementMap(idx2BWT->complementMap);
     
    idx2BWT->bwt = bwt;
    idx2BWT->rev_bwt = rev_bwt;
    idx2BWT->hsp = hsp;
    idx2BWT->mmPool = mmPool;
    
    return idx2BWT;
}
Example #3
0
int main(int argc, char** argv) {

    char c;
    unsigned int i, j, k;
    MMPool *mmPool;
    dictionary *programInput, *ini;
    char argumentText[11] = "argument:0";
    double startTime;
    double elapsedTime = 0, totalElapsedTime = 0;

    BWT *bwt;
    HSP *hsp;

    unsigned char charMap[256];

    unsigned char *convertedKey;
    unsigned int *packedKey;

    unsigned int found;
    unsigned int numberOfPattern, numberOfPatternFound;
    unsigned int saIndexLeft, saIndexRight;
    SaIndexGroupNew *saIndexGroup;
    SaIndexList *tempSaIndex1, *tempSaIndex2;
    unsigned int numberOfSaIndexGroup;
    HitList *hitList;
    unsigned int textPositionMatched, textPositionRetrieved;
    unsigned long long totalTextPositionMatched, totalTextPositionRetrieved;
    BWTSaRetrievalStatistics BWTSaRetrievalStatistics;

    FILE *logFile;
    

    init(textPositionRetrieved);    // to avoid compiler warning only
    init(textPositionMatched);        // to avoid compiler warning only

    programInput = ParseInput(argc, argv);
    ini = ParseIniFile();

    ProcessIni();
    ValidateIni();

    PrintIni();

    if (Confirmation == TRUE) {
        printf("BWT Search. Press Y to go or N to cancel. ");
        c = (char)getchar();
        while (c != 'y' && c != 'Y' && c != 'n' && c!= 'N') {
            c = (char)getchar();
        }
        if (c == 'n' || c == 'N') {
            exit(0);
        }
    }

    startTime = setStartTime();

    MMMasterInitialize(1, 0, FALSE, NULL);
    mmPool = MMPoolCreate(PoolSize);

    // Load Database
    printf("Loading Database..");
    fflush(stdout);

    bwt = BWTLoad(mmPool, BWTCodeFileName, BWTOccValueFileName, SaValueFileName, NULL, SaIndexFileName, NULL);
    HSPFillCharMap(charMap);
    hsp = HSPLoad(mmPool, PackedDNAFileName, AnnotationFileName, AmbiguityFileName, MAX_SEARCH_PATTERN_LENGTH / CHAR_PER_WORD);
    if (bwt->textLength != hsp->dnaLength) {
        fprintf(stderr, "BWT-BLAST: Database length inconsistent!\n");
        exit(1);
    }


    if (LogFileName[0] != '\0') {
        logFile = fopen64(LogFileName, "w");
        if (logFile == NULL) {
            fprintf(stderr, "Cannot open log file!\n");
            exit(1);
        }
    } else {
        logFile = NULL;
    }

    packedKey = MMPoolDispatch(mmPool, WordPackedLengthFromText(MAX_SEARCH_PATTERN_LENGTH, BIT_PER_CHAR));
    convertedKey = MMPoolDispatch(mmPool, MAX_SEARCH_PATTERN_LENGTH);

    saIndexGroup = MMUnitAllocate(MaxNumberOfHitGroups * sizeof(SaIndexGroup));
    hitList = MMUnitAllocate(MaxNumberOfTextPosition * sizeof(HitList));
    tempSaIndex1 = MMUnitAllocate(MaxNumberOfTextPosition * sizeof(SaIndexList));
    tempSaIndex2 = MMUnitAllocate(MaxNumberOfTextPosition * sizeof(SaIndexList));

    elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
    printf("Elapsed time = ");
    printElapsedTime(stdout, FALSE, FALSE, TRUE, 4, elapsedTime);
    totalElapsedTime += elapsedTime;
    printf("\n");

    // Process search pattern files
    for (i=1; i<=NumberOfSearchPatternFile; i++) {

        argumentText[9] = '0' + (char)(i + 2);
        iniparser_copystring(programInput, argumentText, PatternFileName, PatternFileName, MAX_FILENAME_LEN);

        printf("Loading search pattern : %s\n", PatternFileName);
        numberOfPattern = ProcessSearchPattern();
        printf("Finished loading search pattern.\n");
        elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
        printf("Elapsed time = ");
        printElapsedTime(stdout, FALSE, FALSE, TRUE, 4, elapsedTime);
        totalElapsedTime += elapsedTime;
        printf("\n");

        if (LogFileName[0] != '\0') {
            fprintf(logFile, "Searching for pattern : %s\n", PatternFileName);
        }

        if (SABinarySearch == TRUE) {

            printf("Start forward search with SA index.\n");
            if (LogFileName[0] != '\0') {
                fprintf(logFile, "Forward search with SA index.\n");
            }

            numberOfPatternFound = 0;
            totalTextPositionMatched = 0;
            totalTextPositionRetrieved = 0;
            BWTInitializeSaRetrievalStatistics(&BWTSaRetrievalStatistics);

            for (j=0; j<numberOfPattern; j++) {
                //ConvertTextToWordPacked(searchPattern + searchPatternPosition[j], packedKey, charMap, ALPHABET_SIZE, 
                //                    searchPatternPosition[j+1] - searchPatternPosition[j]);
                //found = BWTForwardSearchSaIndex(packedKey, searchPatternPosition[j+1] - searchPatternPosition[j],
                //                         bwt, hsp->packedDNA, &saIndexLeft, &saIndexRight);

                ConvertTextToCode(searchPattern + searchPatternPosition[j], convertedKey, charMap, 
                                    searchPatternPosition[j+1] - searchPatternPosition[j]);
                found = BWTSaBinarySearch(convertedKey, searchPatternPosition[j+1] - searchPatternPosition[j],
                                         bwt, hsp->packedDNA, &saIndexLeft, &saIndexRight, packedKey);

                if (found) {
                    numberOfPatternFound++;
                    textPositionMatched = saIndexRight - saIndexLeft + 1;
                    totalTextPositionMatched += textPositionMatched;

                    if (FindTextPosition) {
                        saIndexGroup->startSaIndex = saIndexLeft;
                        if (textPositionMatched <= MaxNumberOfTextPosition) {
                            saIndexGroup->numOfMatch = textPositionMatched;
                        } else {
                            saIndexGroup->numOfMatch = MaxNumberOfTextPosition;
                            printf("Max no of text positions reached! Only %u out of %u text positions retrieved.\n", MaxNumberOfTextPosition, textPositionMatched);
                        }
                        saIndexGroup->posQuery = 0;
                        saIndexGroup->info = 0;
                        textPositionRetrieved = BWTTextPosition(bwt, saIndexGroup, 1, hitList, tempSaIndex1, tempSaIndex2, &BWTSaRetrievalStatistics, FALSE);
                        totalTextPositionRetrieved += textPositionRetrieved;
                    }
                }

                if (LogFileName[0] != '\0') {
                    if (found) {
                        fprintf(logFile, "Pattern number %u found. SA Index from %u to %u.  ", 
                                j + 1, saIndexLeft, saIndexRight);
                        if (FindTextPosition) {
                            fprintf(logFile, "%u matches of %u located. ", textPositionRetrieved, textPositionMatched);
                            for (k=0; k<textPositionRetrieved; k++) {
                                fprintf(logFile, "%u ", hitList[k].posText);
                            }
                        }
                    } else {
                        fprintf(logFile, "Pattern number %u not found.", j + 1); 
                    }
                    fprintf(logFile, "\n");
                }
            }

            printf("Finished forward search with SA index.\n");
            printf("%u patterns out of %u found. %llu of %llu matches located.\n", numberOfPatternFound, numberOfPattern, totalTextPositionRetrieved, totalTextPositionMatched);
            if (FindTextPosition) {
                printf("Hits: BWT/Cached/Diag/Dup      : %u/%u/%u/%u\n", 
                              BWTSaRetrievalStatistics.bwtSaRetrieved, BWTSaRetrievalStatistics.cachedSaRetrieved, 
                              BWTSaRetrievalStatistics.saDiagonalLinked, BWTSaRetrievalStatistics.saDuplicated);
            }
            elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
            printf("Elapsed time = ");
            printElapsedTime(stdout, FALSE, FALSE, TRUE, 4, elapsedTime);
            totalElapsedTime += elapsedTime;
            printf("\n");
            if (LogFileName[0] != '\0') {
                fprintf(logFile, "Finished forward search with SA index.\n");
                fprintf(logFile, "%u patterns out of %u found. %llu of %llu matches located.\n", numberOfPatternFound, numberOfPattern, totalTextPositionRetrieved, totalTextPositionMatched);
                if (FindTextPosition) {
                    fprintf(logFile, "Hits: BWT/Cached/Diag/Dup      : %u/%u/%u/%u\n", 
                                  BWTSaRetrievalStatistics.bwtSaRetrieved, BWTSaRetrievalStatistics.cachedSaRetrieved, 
                                  BWTSaRetrievalStatistics.saDiagonalLinked, BWTSaRetrievalStatistics.saDuplicated);
                }
                fprintf(logFile, "Elapsed time = ");
                printElapsedTime(logFile, FALSE, FALSE, TRUE, 4, elapsedTime);
                fprintf(logFile, "\n");
            }

        }

        if (BackwardSearch == TRUE) {

            printf("Start backward search.\n");
            if (LogFileName[0] != '\0') {
                fprintf(logFile, "Backward search.\n");
            }

            numberOfPatternFound = 0;
            totalTextPositionMatched = 0;
            totalTextPositionRetrieved = 0;
            BWTInitializeSaRetrievalStatistics(&BWTSaRetrievalStatistics);

            for (j=0; j<numberOfPattern; j++) {
                ConvertTextToCode(searchPattern + searchPatternPosition[j], convertedKey, charMap, 
                                    searchPatternPosition[j+1] - searchPatternPosition[j]);
                found = BWTBackwardSearch(convertedKey, searchPatternPosition[j+1] - searchPatternPosition[j],
                                         bwt, &saIndexLeft, &saIndexRight);
                if (found) {
                    numberOfPatternFound++;
                    textPositionMatched = saIndexRight - saIndexLeft + 1;
                    totalTextPositionMatched += textPositionMatched;

                    if (FindTextPosition) {
                        saIndexGroup->startSaIndex = saIndexLeft;
                        if (textPositionMatched <= MaxNumberOfTextPosition) {
                            saIndexGroup->numOfMatch = textPositionMatched;
                        } else {
                            saIndexGroup->numOfMatch = MaxNumberOfTextPosition;
                            printf("Max no of text positions reached! Only %u out of %u text positions retrieved.\n", MaxNumberOfTextPosition, textPositionMatched);
                        }
                        saIndexGroup->posQuery = 0;
                        saIndexGroup->info = 0;
                        textPositionRetrieved = BWTTextPosition(bwt, saIndexGroup, 1, hitList, tempSaIndex1, tempSaIndex2, &BWTSaRetrievalStatistics, FALSE);
                        totalTextPositionRetrieved += textPositionRetrieved;
                    }
                }

                if (LogFileName[0] != '\0') {
                    if (found) {
                        fprintf(logFile, "Pattern number %u found. SA Index from %u to %u. Total %u occurrences ", j + 1, saIndexLeft, saIndexRight, saIndexRight - saIndexLeft + 1);
                        if (FindTextPosition) {
                            fprintf(logFile, "%u matches of %u located. ", textPositionRetrieved, textPositionMatched);
                            for (k=0; k<textPositionRetrieved; k++) {
                                fprintf(logFile, "%u ", hitList[k].posText);
                            }
                        }
                    } else {
                        fprintf(logFile, "Pattern number %u not found.", j + 1); 
                    }
                    fprintf(logFile, "\n");
                }
            }

            printf("Finished backward search.\n");
            printf("%u patterns out of %u found. %llu of %llu matches located.\n", numberOfPatternFound, numberOfPattern, totalTextPositionRetrieved, totalTextPositionMatched);
            if (FindTextPosition) {
                printf("Hits: BWT/Cached/Diag/Dup      : %u/%u/%u/%u\n", 
                              BWTSaRetrievalStatistics.bwtSaRetrieved, BWTSaRetrievalStatistics.cachedSaRetrieved, 
                              BWTSaRetrievalStatistics.saDiagonalLinked, BWTSaRetrievalStatistics.saDuplicated);
            }
            elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
            printf("Elapsed time = ");
            printElapsedTime(stdout, FALSE, FALSE, TRUE, 4, elapsedTime);
            totalElapsedTime += elapsedTime;
            printf("\n");
            if (LogFileName[0] != '\0') {
                fprintf(logFile, "Finished backward search.\n");
                fprintf(logFile, "%u patterns out of %u found. %llu of %llu matches located.\n", numberOfPatternFound, numberOfPattern, totalTextPositionRetrieved, totalTextPositionMatched);
                if (FindTextPosition) {
                    fprintf(logFile, "Hits: BWT/Cached/Diag/Dup      : %u/%u/%u/%u\n", 
                                  BWTSaRetrievalStatistics.bwtSaRetrieved, BWTSaRetrievalStatistics.cachedSaRetrieved, 
                                  BWTSaRetrievalStatistics.saDiagonalLinked, BWTSaRetrievalStatistics.saDuplicated);
                }
                fprintf(logFile, "Elapsed time = ");
                printElapsedTime(logFile, FALSE, FALSE, TRUE, 4, elapsedTime);
                fprintf(logFile, "\n");
            }

        }

        if (BackwardSearchCheck == TRUE) {

            printf("Start backward search with text.\n");
            if (LogFileName[0] != '\0') {
                fprintf(logFile, "Backward search with text.\n");
            }

            numberOfPatternFound = 0;
            totalTextPositionMatched = 0;
            totalTextPositionRetrieved = 0;
            BWTInitializeSaRetrievalStatistics(&BWTSaRetrievalStatistics);

            for (j=0; j<numberOfPattern; j++) {
                ConvertTextToCode(searchPattern + searchPatternPosition[j], convertedKey, charMap, 
                                    searchPatternPosition[j+1] - searchPatternPosition[j]);
                ConvertTextToWordPacked(searchPattern + searchPatternPosition[j], packedKey, charMap, ALPHABET_SIZE, 
                                    searchPatternPosition[j+1] - searchPatternPosition[j]);
                found = BWTBackwardSearchCheckWithText(convertedKey, packedKey, searchPatternPosition[j+1] - searchPatternPosition[j],
                                                  bwt, hsp->packedDNA, TextCheckCostFactor, MaxNumberOfTextPosition, 
                                                  hitList, &saIndexLeft, &saIndexRight);
                if (found) {
                    numberOfPatternFound++;
                    textPositionMatched = saIndexRight - saIndexLeft + 1;
                    totalTextPositionMatched += textPositionMatched;

                    // Find text position if text check not used
                    if (FindTextPosition && saIndexLeft != 0) {
                        saIndexGroup->startSaIndex = saIndexLeft;
                        if (textPositionMatched <= MaxNumberOfTextPosition) {
                            saIndexGroup->numOfMatch = textPositionMatched;
                        } else {
                            saIndexGroup->numOfMatch = MaxNumberOfTextPosition;
                            printf("Max no of text positions reached! Only %u out of %u text positions retrieved.\n", MaxNumberOfTextPosition, textPositionMatched);
                        }
                        saIndexGroup->posQuery = 0;
                        saIndexGroup->info = 0;
                        textPositionRetrieved = BWTTextPosition(bwt, saIndexGroup, 1, hitList, tempSaIndex1, tempSaIndex2, &BWTSaRetrievalStatistics, FALSE);
                        totalTextPositionRetrieved += textPositionRetrieved;
                    } else {
                        textPositionRetrieved = textPositionMatched;
                        totalTextPositionRetrieved += textPositionRetrieved;
                    }
                }

                if (LogFileName[0] != '\0') {
                    if (found) {
                        fprintf(logFile, "Pattern number %u found. ", j + 1);
                        if (FindTextPosition) {
                            fprintf(logFile, "%u matches of %u located. ", textPositionRetrieved, textPositionMatched);
                            for (k=0; k<textPositionRetrieved; k++) {
                                fprintf(logFile, "%u ", hitList[k].posText);
                            }
                        }
                    } else {
                        fprintf(logFile, "Pattern number %u not found.", j + 1); 
                    }
                    fprintf(logFile, "\n");
                }
            }

            printf("Finished backward search with text.\n");
            printf("%u patterns out of %u found. %llu of %llu matches located.\n", numberOfPatternFound, numberOfPattern, totalTextPositionRetrieved, totalTextPositionMatched);
            if (FindTextPosition) {
                printf("Hits: BWT/Cached/Diag/Dup      : %u/%u/%u/%u\n", 
                              BWTSaRetrievalStatistics.bwtSaRetrieved, BWTSaRetrievalStatistics.cachedSaRetrieved, 
                              BWTSaRetrievalStatistics.saDiagonalLinked, BWTSaRetrievalStatistics.saDuplicated);
            }
            elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
            printf("Elapsed time = ");
            printElapsedTime(stdout, FALSE, FALSE, TRUE, 4, elapsedTime);
            totalElapsedTime += elapsedTime;
            printf("\n");
            if (LogFileName[0] != '\0') {
                fprintf(logFile, "Finished backward search with text.\n");
                fprintf(logFile, "%u patterns out of %u found. %llu of %llu matches located.\n", numberOfPatternFound, numberOfPattern, totalTextPositionRetrieved, totalTextPositionMatched);
                if (FindTextPosition) {
                    fprintf(logFile, "Hits: BWT/Cached/Diag/Dup      : %u/%u/%u/%u\n", 
                                  BWTSaRetrievalStatistics.bwtSaRetrieved, BWTSaRetrievalStatistics.cachedSaRetrieved, 
                                  BWTSaRetrievalStatistics.saDiagonalLinked, BWTSaRetrievalStatistics.saDuplicated);
                }
                fprintf(logFile, "Elapsed time = ");
                printElapsedTime(logFile, FALSE, FALSE, TRUE, 4, elapsedTime);
                fprintf(logFile, "\n");
            }

        }

        if (HammingDistSearch == TRUE) {

            printf("Start hamming distance %u approximate match.\n", MaxErrorAllowed);
            if (LogFileName[0] != '\0') {
                fprintf(logFile, "Hamming distance %u approximate match.\n", MaxErrorAllowed);
            }

            numberOfPatternFound = 0;
            totalTextPositionMatched = 0;
            totalTextPositionRetrieved = 0;
            BWTInitializeSaRetrievalStatistics(&BWTSaRetrievalStatistics);

            for (j=0; j<numberOfPattern; j++) {
                ConvertTextToCode(searchPattern + searchPatternPosition[j], convertedKey, charMap, 
                                    searchPatternPosition[j+1] - searchPatternPosition[j]);
                numberOfSaIndexGroup = BWTHammingDistMatchOld(convertedKey, searchPatternPosition[j+1] - searchPatternPosition[j],
                                                           bwt, MaxErrorAllowed, saIndexGroup, MaxNumberOfHitGroups, 0, 0);
                if (numberOfSaIndexGroup) {
                    numberOfPatternFound++;
                    textPositionMatched = 0;
                    for (k=0; k<numberOfSaIndexGroup; k++) {
                        textPositionMatched += saIndexGroup[k].numOfMatch;
                    }
                    if (textPositionMatched > MaxNumberOfTextPosition) {
                        textPositionMatched = 0;
                        for (k=0; k<numberOfSaIndexGroup && textPositionMatched < MaxNumberOfTextPosition; k++) {
                            textPositionMatched += saIndexGroup[k].numOfMatch;
                        }
                        numberOfSaIndexGroup = k - 1;
                        saIndexGroup[numberOfSaIndexGroup].numOfMatch -= textPositionMatched - MaxNumberOfTextPosition;
                        for (; k<numberOfSaIndexGroup; k++) {
                            textPositionMatched += saIndexGroup[k].numOfMatch;
                        }
                        printf("Max no of text positions reached! Only %u out of %u text positions retrieved.\n", MaxNumberOfTextPosition, textPositionMatched);
                    }
                    totalTextPositionMatched += textPositionMatched;

                    if (FindTextPosition) {
                        textPositionRetrieved = BWTTextPosition(bwt, saIndexGroup, numberOfSaIndexGroup, hitList, tempSaIndex1, tempSaIndex2, &BWTSaRetrievalStatistics, FALSE);
                        totalTextPositionRetrieved += textPositionRetrieved;
                    }
                }

                if (LogFileName[0] != '\0') {
                    if (numberOfSaIndexGroup) {
                        fprintf(logFile, "Pattern number %u found. %u matches", j + 1, textPositionMatched);
                        if (FindTextPosition) {
                            fprintf(logFile, "%u matches of %u located. ", textPositionRetrieved, textPositionMatched);
                            for (k=0; k<textPositionRetrieved; k++) {
                                fprintf(logFile, "%u ", hitList[k].posText);
                            }
                        }
                    } else {
                        fprintf(logFile, "Pattern number %u not found.", j + 1); 
                    }
                    fprintf(logFile, "\n");
                }
            }

            printf("Finished hamming distance search.\n");
            printf("%u patterns out of %u found. %llu of %llu matches located.\n", numberOfPatternFound, numberOfPattern, totalTextPositionRetrieved, totalTextPositionMatched);
            if (FindTextPosition) {
                printf("Hits: BWT/Cached/Diag/Dup      : %u/%u/%u/%u\n", 
                              BWTSaRetrievalStatistics.bwtSaRetrieved, BWTSaRetrievalStatistics.cachedSaRetrieved, 
                              BWTSaRetrievalStatistics.saDiagonalLinked, BWTSaRetrievalStatistics.saDuplicated);
            }
            elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
            printf("Elapsed time = ");
            printElapsedTime(stdout, FALSE, FALSE, TRUE, 4, elapsedTime);
            totalElapsedTime += elapsedTime;
            printf("\n");
            if (LogFileName[0] != '\0') {
                fprintf(logFile, "Finished hamming distance search.\n");
                fprintf(logFile, "%u patterns out of %u found. %llu of %llu matches located.\n", numberOfPatternFound, numberOfPattern, totalTextPositionRetrieved, totalTextPositionMatched);
                if (FindTextPosition) {
                    fprintf(logFile, "Hits: BWT/Cached/Diag/Dup      : %u/%u/%u/%u\n", 
                                  BWTSaRetrievalStatistics.bwtSaRetrieved, BWTSaRetrievalStatistics.cachedSaRetrieved, 
                                  BWTSaRetrievalStatistics.saDiagonalLinked, BWTSaRetrievalStatistics.saDuplicated);
                }
                fprintf(logFile, "Elapsed time = ");
                printElapsedTime(logFile, FALSE, FALSE, TRUE, 4, elapsedTime);
                fprintf(logFile, "\n");
            }

        }

        if (EditDistSearch == TRUE) {

            printf("Start edit distance %u approximate match.\n", MaxErrorAllowed);
            if (LogFileName[0] != '\0') {
                fprintf(logFile, "Edit distance %u approximate match.\n", MaxErrorAllowed);
            }

            numberOfPatternFound = 0;
            totalTextPositionMatched = 0;
            totalTextPositionRetrieved = 0;
            BWTInitializeSaRetrievalStatistics(&BWTSaRetrievalStatistics);

            for (j=0; j<numberOfPattern; j++) {
                ConvertTextToCode(searchPattern + searchPatternPosition[j], convertedKey, charMap, 
                                    searchPatternPosition[j+1] - searchPatternPosition[j]);
                numberOfSaIndexGroup = BWTEditDistMatchOld(convertedKey, searchPatternPosition[j+1] - searchPatternPosition[j],
                                                        bwt, MaxErrorAllowed, (SaIndexGroupWithLengthError*)saIndexGroup, MaxNumberOfHitGroups);
                if (numberOfSaIndexGroup > MaxNumberOfHitGroups) {
                    fprintf(stderr, "numberOfSaIndexGroup > MaxNumberOfHitGroups!\n");
                }
                if (numberOfSaIndexGroup) {
                    numberOfPatternFound++;
                    textPositionMatched = 0;
                    for (k=0; k<numberOfSaIndexGroup; k++) {
                        textPositionMatched += saIndexGroup[k].numOfMatch;
                    }
                    if (textPositionMatched > MaxNumberOfTextPosition) {
                        textPositionMatched = 0;
                        for (k=0; k<numberOfSaIndexGroup && textPositionMatched < MaxNumberOfTextPosition; k++) {
                            textPositionMatched += saIndexGroup[k].numOfMatch;
                        }
                        numberOfSaIndexGroup = k - 1;
                        saIndexGroup[numberOfSaIndexGroup].numOfMatch -= textPositionMatched - MaxNumberOfTextPosition;
                        for (; k<numberOfSaIndexGroup; k++) {
                            textPositionMatched += saIndexGroup[k].numOfMatch;
                        }
                        printf("Max no of text positions reached! Only %u out of %u text positions retrieved.\n", MaxNumberOfTextPosition, textPositionMatched);
                    }
                    totalTextPositionMatched += textPositionMatched;

                    if (FindTextPosition) {
                        textPositionRetrieved = BWTTextPosition(bwt, saIndexGroup, numberOfSaIndexGroup, hitList, tempSaIndex1, tempSaIndex2, &BWTSaRetrievalStatistics, FALSE);
                        totalTextPositionRetrieved += textPositionRetrieved;
                    }
                }

                if (LogFileName[0] != '\0') {
                    if (numberOfSaIndexGroup) {
                        fprintf(logFile, "Pattern number %u found. %u matches. ", j + 1, textPositionMatched);
                        if (FindTextPosition) {
                            fprintf(logFile, "%u matches of %u located. ", textPositionRetrieved, textPositionMatched);
                            for (k=0; k<textPositionRetrieved; k++) {
                                fprintf(logFile, "%u ", hitList[k].posText);
                            }
                        }
                    } else {
                        fprintf(logFile, "Pattern number %u not found.", j + 1); 
                    }
                    fprintf(logFile, "\n");
                }
            }

            printf("Finished edit distance search.\n");
            printf("%u patterns out of %u found. %llu of %llu matches located.\n", numberOfPatternFound, numberOfPattern, totalTextPositionRetrieved, totalTextPositionMatched);
            if (FindTextPosition) {
                printf("Hits: BWT/Cached/Diag/Dup      : %u/%u/%u/%u\n", 
                              BWTSaRetrievalStatistics.bwtSaRetrieved, BWTSaRetrievalStatistics.cachedSaRetrieved, 
                              BWTSaRetrievalStatistics.saDiagonalLinked, BWTSaRetrievalStatistics.saDuplicated);
            }
            elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
            printf("Elapsed time = ");
            printElapsedTime(stdout, FALSE, FALSE, TRUE, 4, elapsedTime);
            totalElapsedTime += elapsedTime;
            printf("\n");
            if (LogFileName[0] != '\0') {
                fprintf(logFile, "Finished edit distance search.\n");
                fprintf(logFile, "%u patterns out of %u found. %llu of %llu matches located.\n", numberOfPatternFound, numberOfPattern, totalTextPositionRetrieved, totalTextPositionMatched);
                if (FindTextPosition) {
                    fprintf(logFile, "Hits: BWT/Cached/Diag/Dup      : %u/%u/%u/%u\n", 
                                  BWTSaRetrievalStatistics.bwtSaRetrieved, BWTSaRetrievalStatistics.cachedSaRetrieved, 
                                  BWTSaRetrievalStatistics.saDiagonalLinked, BWTSaRetrievalStatistics.saDuplicated);
                }
                fprintf(logFile, "Elapsed time = ");
                printElapsedTime(logFile, FALSE, FALSE, TRUE, 4, elapsedTime);
                fprintf(logFile, "\n");
            }

        }

    }

    MMPoolReturn(mmPool, packedKey, WordPackedLengthFromText(MAX_SEARCH_PATTERN_LENGTH, BIT_PER_CHAR));
    MMPoolReturn(mmPool, convertedKey, MAX_SEARCH_PATTERN_LENGTH);

    HSPFree(mmPool, hsp, MAX_SEARCH_PATTERN_LENGTH / CHAR_PER_WORD);
    BWTFree(mmPool, bwt);

    if (searchPattern != NULL) {
        MMUnitFree(searchPattern, searchPatternAllocated);
    }
    if (searchPatternPosition != NULL) {
        MMUnitFree(searchPatternPosition, searchPatternPositionAllocated);
    }
    MMUnitFree(saIndexGroup, MaxNumberOfHitGroups * sizeof(unsigned int));
    MMUnitFree(hitList, MaxNumberOfTextPosition * sizeof(unsigned int));

    MMPoolFree(mmPool);

    iniparser_freedict(programInput);
    iniparser_freedict(ini);

    return 0;

}
Example #4
0
int main(int argc, char** argv) {

	unsigned int* __restrict address;
	unsigned int* __restrict memory;

	unsigned int i, j;
	unsigned int t=0;
	unsigned int numberOfMemoryLocation;

	dictionary *programInput;
	double startTime;
	double elapsedTime = 0, totalElapsedTime = 0;
	double initializationTime, experimentTime;

	programInput = ParseInput(argc, argv);
	ValidateParameters();

	// Initialize memory manager
	MMMasterInitialize(0, 0, FALSE, NULL);

	numberOfMemoryLocation = MemorySize / sizeof(int);

	// Allocate memory
	address = MMUnitAllocate((NumberOfAccess + 8) * sizeof(unsigned int));
	memory = MMUnitAllocate((numberOfMemoryLocation + 32) * sizeof(unsigned int));

	// Set random seed
	r250_init(getRandomSeed());

	// Set start time
	startTime = setStartTime();

	printf("Initialize memory pointers with random values..");


	for (i=0; i<numberOfMemoryLocation + 32; i++) {
		memory[i] = r250();
	}
	// Initialize address and memory
	for (i=0; i<NumberOfAccess + 8; i++) {
		address[i] = (r250() % numberOfMemoryLocation) / 4 * 4;
	}

	printf("finished.\n");

	elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
	totalElapsedTime += elapsedTime;
	initializationTime = elapsedTime;

	// Test memory speed for read
	if (ReadWrite[0] == 'R' || ReadWrite[0] == 'r') {
		for (i=0; i<NumberOfIteration; i++) {
			for (j=0; j<NumberOfAccess; j++) {


				t += memory[address[j]];

			}
		}
	}
	// Test memory speed for write
	if (ReadWrite[0] == 'W' || ReadWrite[0] == 'w') {
		for (i=0; i<NumberOfIteration; i++) {
			for (j=0; j<NumberOfAccess; j++) {
				memory[address[j]] += address[j];
			}
		}
	}

	elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
	totalElapsedTime += elapsedTime;
	experimentTime = elapsedTime;

	// So that compiler does not remove code for variables t and r
	if (t==0) {
		printf("\n");
	}

	printf("Experiment completed.\n");

	printf("Initialization time   = ");
	printElapsedTime(stdout, FALSE, FALSE, TRUE, 2, initializationTime);
	printf("Experiment time       = ");
	printElapsedTime(stdout, FALSE, FALSE, TRUE, 2, experimentTime);
	
	MMUnitFree(address, (NumberOfAccess + 8) * sizeof(unsigned int));
	MMUnitFree(memory, (numberOfMemoryLocation + 32) * sizeof(unsigned int));

	iniparser_freedict(programInput);

	return 0;

}