Ejemplo n.º 1
0
/*

    Function:   iSrchFilterGetSearchBitmapFromFilter()

    Purpose:    This function collects and searches filters and returns
                a bit map if there are any filters.
                
                The filter can either be a list name (mapping to a file),
                or a list of terms. If it is a list of terms, then each term has to be
                comma separated and must not contain any spaces.
                
                A search bitmap will not be allocated if no terms were found
                and an existing bitmap was not passed

    Parameters: pssSrchSearch           search structure
                psiSrchIndex            index structure
                uiLanguageID            language ID
                pspfSrchParserFilter    search parser string filter
                uiStartDocumentID       start document ID restriction (0 for no restriction)
                uiEndDocumentID         end document ID restriction (0 for no restriction)
                ppsbSrchBitmap          return pointer for the search bitmap structure 

    Globals:    none

    Returns:    SRCH error code

*/
static int iSrchFilterGetSearchBitmapFromFilter
(
    struct srchSearch *pssSrchSearch,
    struct srchIndex *psiSrchIndex,
    unsigned int uiLanguageID,
    struct srchParserFilter *pspfSrchParserFilter, 
    unsigned int uiStartDocumentID,
    unsigned int uiEndDocumentID,
    struct srchBitmap **ppsbSrchBitmap
)
{

    int                         iError = SRCH_NoError;
    void                        *pvLngStemmer = NULL;
    
    unsigned int                uiSrchParserSearchCacheID = SRCH_PARSER_MODIFIER_UNKNOWN_ID;
    
    unsigned char               pucConfigValue[SRCH_INFO_SYMBOL_MAXIMUM_LENGTH + 1] = {'\0'};
    unsigned char               pucFilterFilePath[UTL_FILE_PATH_MAX + 1] = {'\0'};
    FILE                        *pfFilterFile = NULL;
    off_t                       zFilterFileLength = 0;
    unsigned char               *pucFilterFileMappingPtr = NULL;
    time_t                      tFilterFileLastUpdate = (time_t)0;

    wchar_t                     *pwcTerms = NULL;
    wchar_t                     *pwcNormalizedExcludedTermsStrtokPtr = NULL;

    struct srchTermDictInfo     *pstdiSrchTermDictInfos = NULL;
    struct srchTermDictInfo     *pstdiSrchTermDictInfosPtr = NULL;
    unsigned int                uiSrchTermDictInfosLength = 0;
    unsigned int                uiI = 0;
    
    unsigned int                uiNormalizedTermsLength = 0;

    wchar_t                     *pwcTerm = NULL;
    unsigned char               *pucTerm = NULL;
    
    unsigned char               *pucFieldIDBitmap = NULL;

    

/*     iUtlLogDebug(UTL_LOG_CONTEXT, "iSrchFilterGetSearchBitmapFromFilter - pspfSrchParserFilter->pwcFilter: '%ls'", pspfSrchParserFilter->pwcFilter); */


    ASSERT(pssSrchSearch != NULL);
    ASSERT(psiSrchIndex != NULL);
    ASSERT(uiLanguageID >= 0);
    ASSERT(pspfSrchParserFilter != NULL);
    ASSERT(uiStartDocumentID >= 0);
    ASSERT(uiEndDocumentID >= 0);
    ASSERT(ppsbSrchBitmap != NULL);


    /* Create the stemmer */
    if ( (iError = iLngStemmerCreateByID(psiSrchIndex->uiStemmerID, uiLanguageID, &pvLngStemmer)) != LNG_NoError ) {
        iUtlLogError(UTL_LOG_CONTEXT, "Failed to create a stemmer, lng error: %d.", iError);
        iError = SRCH_FilterCreateStemmerFailed;
        goto bailFromiSrchFilterGetSearchBitmapFromFilter;
    }

    /* Get the parser search cache ID */
    if ( (iError = iSrchParserGetModifierID(pssSrchSearch->pvSrchParser, SRCH_PARSER_MODIFIER_SEARCH_CACHE_ID, &uiSrchParserSearchCacheID)) != SRCH_NoError ) {
        iUtlLogError(UTL_LOG_CONTEXT, "Failed to get the parser search cache ID, srch error: %d.", iError);
        goto bailFromiSrchFilterGetSearchBitmapFromFilter;
    }


    /* Terms based filter */
    if ( pspfSrchParserFilter->uiFilterTypeID == SRCH_PARSER_FILTER_TYPE_TERMS_ID ) {
        ;
    }
    
    /* List based filter */
    else if ( pspfSrchParserFilter->uiFilterTypeID == SRCH_PARSER_FILTER_TYPE_LIST_ID ) {

        unsigned char   *pucFilterDirectoryPath = NULL;
        unsigned char   *pucFilterName = NULL;
        unsigned char   pucFilterFileName[UTL_FILE_PATH_MAX + 1] = {'\0'};


        /* Check for the filter file if there is a location for these files */
        if ( (iError = iUtlConfigGetValue(pssSrchSearch->pvUtlConfig, SRCH_SEARCH_CONFIG_FILTER_FILES_LOCATION, pucConfigValue, SRCH_INFO_SYMBOL_MAXIMUM_LENGTH + 1)) != UTL_NoError ) {
            iUtlLogError(UTL_LOG_CONTEXT, "Failed to get the file file location from the search configuration, symbol name: '%s', utl error: %d.", 
                    SRCH_SEARCH_CONFIG_FILTER_FILES_LOCATION, iError); 
            iError = SRCH_FilterInvalidFilterFileLocation;
            goto bailFromiSrchFilterGetSearchBitmapFromFilter;
        }

        /* Get a pointer to the filter directory path, exclude the file protocol url if it is there */
        pucFilterDirectoryPath = (s_strncasecmp(pucConfigValue, SRCH_FILTER_FILE_PROTOCOL_URL, s_strlen(SRCH_FILTER_FILE_PROTOCOL_URL)) == 0) ? 
                pucConfigValue + s_strlen(SRCH_FILTER_FILE_PROTOCOL_URL) : pucConfigValue;

        /* Convert the filter from wide characters to utf-8, pucFilterName is allocated */
        if ( (iError = iLngConvertWideStringToUtf8_d(pspfSrchParserFilter->pwcFilter, 0, &pucFilterName)) != LNG_NoError ) {
            iUtlLogError(UTL_LOG_CONTEXT, "Failed to convert a filter name from wide characters to utf-8, lng error: %d.", iError);
            iError = SRCH_FilterCharacterSetConvertionFailed;
            goto bailFromiSrchFilterGetSearchBitmapFromFilter;
        }

        /* Create the filter file name */
        snprintf(pucFilterFileName, UTL_FILE_PATH_MAX + 1, "%s%s", pucFilterName, SRCH_FILTER_FILENAME_EXTENSION);
        
        /* Free the utf-8 filter file name */
        s_free(pucFilterName);

        /* Create the filter file path */
        if ( (iError = iUtlFileMergePaths(pucFilterDirectoryPath, pucFilterFileName, pucFilterFilePath, UTL_FILE_PATH_MAX + 1)) != UTL_NoError ) {
            iUtlLogError(UTL_LOG_CONTEXT, "Failed to create the filter file path, file file name: '%s', filter directory path: '%s', utl error: %d", 
                    pucFilterFileName, pucFilterDirectoryPath, iError); 
            iError = SRCH_FilterInvalidFilterFile;
            goto bailFromiSrchFilterGetSearchBitmapFromFilter;
        }
    
        /* Check if the filter file is there */
        if ( bUtlFileIsFile(pucFilterFilePath) == false ) {
                iUtlLogError(UTL_LOG_CONTEXT, "Failed to find the filter file: '%s'.", pucFilterFilePath); 
            iError = SRCH_FilterInvalidFilterFile;
            goto bailFromiSrchFilterGetSearchBitmapFromFilter;
        }

        /* Open the filter file */
        if ( (pfFilterFile = s_fopen(pucFilterFilePath, "r")) == NULL ) {
            iUtlLogError(UTL_LOG_CONTEXT, "Failed to open the filter file: '%s'.", pucFilterFilePath); 
            iError = SRCH_FilterInvalidFilterFile;
            goto bailFromiSrchFilterGetSearchBitmapFromFilter;
        }

        /* Get the filter file modification date */            
        if ( (iError = iUtlFileGetFileModificationTimeT(pfFilterFile, &tFilterFileLastUpdate)) != UTL_NoError ) {
            iUtlLogError(UTL_LOG_CONTEXT, "Failed to get the last modification time of the filter file: '%s', utl error: %d.", pucFilterFilePath, iError); 
            iError = SRCH_FilterInvalidFilterFile;
            goto bailFromiSrchFilterGetSearchBitmapFromFilter;
        }
    }


    /* Get the bitmap from the search cache, note that we dont need to differentiate between terms or lists 
    ** since terms will have a tFilterFileLastUpdate of 0 and lists will have a non-zero tFilterFileLastUpdate 
    */
    if ( uiSrchParserSearchCacheID == SRCH_PARSER_MODIFIER_SEARCH_CACHE_ENABLE_ID ) {

        if ( (iError = iSrchCacheGetSearchBitmap(pssSrchSearch->pvSrchCache, psiSrchIndex, pspfSrchParserFilter->pwcFilter, 
                tFilterFileLastUpdate, ppsbSrchBitmap)) == SRCH_NoError ) {

            /* We are all set */
            goto bailFromiSrchFilterGetSearchBitmapFromFilter;
        }
    }

    
    /* Terms based filter */
    if ( pspfSrchParserFilter->uiFilterTypeID == SRCH_PARSER_FILTER_TYPE_TERMS_ID ) {
    
        /* Make a straight copy of the terms */
        if ( (pwcTerms = s_wcsdup(pspfSrchParserFilter->pwcFilter)) == NULL ) {
            iError = SRCH_MemError;
            goto bailFromiSrchFilterGetSearchBitmapFromFilter;
        }
        
        iSrchReportAppend(pssSrchSearch->pvSrchReport, "%s The search filtered on the following terms: '%ls'\n", REP_SEARCH_RESTRICTION, pspfSrchParserFilter->pwcFilter);

    }
    /* List based filter */
    else if ( pspfSrchParserFilter->uiFilterTypeID == SRCH_PARSER_FILTER_TYPE_LIST_ID ) {

        /* Get the file length */
        if ( (iError = iUtlFileGetFileLength(pfFilterFile, &zFilterFileLength)) != UTL_NoError ) {
            iUtlLogError(UTL_LOG_CONTEXT, "Failed to get the length of the filter file: '%s', utl error: %d.", pucFilterFilePath, iError); 
            iError = SRCH_FilterInvalidFilterFile;
            goto bailFromiSrchFilterGetSearchBitmapFromFilter;
        }
        
        /* Map in the entire file */
        if ( (iError = iUtlFileMemoryMap(fileno(pfFilterFile), 0, zFilterFileLength, PROT_READ, (void **)&pucFilterFileMappingPtr) != UTL_NoError) ) {
            iUtlLogError(UTL_LOG_CONTEXT, "Failed to map in the filter file: '%s', utl error: %d.", pucFilterFilePath, iError);
            iError = SRCH_FilterInvalidFilterFile;
            goto bailFromiSrchFilterGetSearchBitmapFromFilter;
        }

        /* Convert the terms from utf-8 to wide characters, pwcTerms is allocated */
        if ( (iError = iLngConvertUtf8ToWideString_d(pucFilterFileMappingPtr, zFilterFileLength, &pwcTerms)) != LNG_NoError ) {
            iUtlLogError(UTL_LOG_CONTEXT, "Failed to convert the terms from utf-8 to wide characters, lng error: %d.", iError);
            iError = SRCH_FilterCharacterSetConvertionFailed;
            goto bailFromiSrchFilterGetSearchBitmapFromFilter;
        }

        /* Unmap the filter file */
        iUtlFileMemoryUnMap(pucFilterFileMappingPtr, zFilterFileLength);
        pucFilterFileMappingPtr = NULL;

        /* Close the filter file */
        s_fclose(pfFilterFile);
        pfFilterFile = NULL;

        /* Convert newlines to commas */
        iUtlStringsReplaceCharacterInWideString(pwcTerms, L'\n', L',');

        iSrchReportAppend(pssSrchSearch->pvSrchReport, "%s The search filtered on the following list: '%ls'\n", REP_SEARCH_RESTRICTION, pspfSrchParserFilter->pwcFilter);
    }



    /* Allocate a field ID bitmap only if there are any fields other than field ID 0 */
    if ( psiSrchIndex->uiFieldIDMaximum > 0 ) {

        /* Allocate the field ID bitmap - field ID 0 is not a field */
        if ( (pucFieldIDBitmap = (unsigned char *)s_malloc(sizeof(unsigned char) * UTL_BITMAP_GET_BITMAP_BYTE_LENGTH(psiSrchIndex->uiFieldIDMaximum))) == NULL ) {
            iError = SRCH_MemError;
            goto bailFromiSrchFilterGetSearchBitmapFromFilter;
        }
    }


    /* Loop parsing the terms */
    for ( pwcTerm = s_wcstok(pwcTerms, SRCH_FILTER_TERM_SEPARATORS, (wchar_t **)&pwcNormalizedExcludedTermsStrtokPtr); pwcTerm != NULL; 
            pwcTerm = s_wcstok(NULL, SRCH_FILTER_TERM_SEPARATORS, (wchar_t **)&pwcNormalizedExcludedTermsStrtokPtr) ) {

        /* Trim the string if needed, we do this so that we can handle lists like 'bats, lions, elephants' */
        iUtlStringsTrimWideString(pwcTerm);

        /* Term contains spaces, treat as a phrase */
        if ( s_wcschr(pwcTerm, L' ') != NULL ) {

            wchar_t                     *pwcTermStrtokPtr = NULL;
            wchar_t                     *pwcSubTerm = NULL;
            struct srchPostingsList     *psplSrchPostingsList = NULL;

            /* Get the first subterm - wcstok_r along spaces */
            pwcSubTerm = s_wcstok(pwcTerm, L" ", (wchar_t **)&pwcTermStrtokPtr);

            /* Loop while there are subterm */
            while ( pwcSubTerm != NULL ) {

                boolean     bFieldIDBitmapSet = false;

                /* Clear the bitmap */
                if ( pucFieldIDBitmap != NULL ) {
                    UTL_BITMAP_CLEAR_POINTER(pucFieldIDBitmap, psiSrchIndex->uiFieldIDMaximum);
                }

                /* Process the term, this returns a pointer to the processed term and sets the bitmap as needed */
                if ( (iError = iSrchFilterProcessTerm(pssSrchSearch, psiSrchIndex, pvLngStemmer, pwcSubTerm, pucFieldIDBitmap, psiSrchIndex->uiFieldIDMaximum, 
                        &pwcSubTerm, &bFieldIDBitmapSet)) != SRCH_NoError ) {
                    goto bailFromiSrchFilterGetSearchBitmapFromFilter;
                }

                /* Skip the term if it was stemmed out of existence */
                if ( bUtlStringsIsWideStringNULL(pwcSubTerm) == false ) {

                    unsigned char               *pucSubTerm = NULL;
                    struct srchPostingsList     *psplSrchPostingsTempList = NULL;


                    /* Convert the subterm from wide characters to utf-8, pucSubTerm is allocated */
                    if ( (iError = iLngConvertWideStringToUtf8_d(pwcSubTerm, 0, &pucSubTerm)) != LNG_NoError ) {
                        iUtlLogError(UTL_LOG_CONTEXT, "Failed to convert a filter term from wide characters to utf-8, lng error: %d.", iError);
                        iError = SRCH_FilterCharacterSetConvertionFailed;
                        goto bailFromiSrchFilterGetSearchBitmapFromFilter;
                    }

                    /* Search to postings list */
                    iError = iSrchTermSearchGetSearchPostingsListFromTerm(pssSrchSearch, psiSrchIndex, pucSubTerm, SRCH_SEARCH_TERM_WEIGHT_DEFAULT, 
                            (bFieldIDBitmapSet == true) ? pucFieldIDBitmap : NULL, (bFieldIDBitmapSet == true) ? psiSrchIndex->uiFieldIDMaximum : 0, 
                            0, 0, 0, &psplSrchPostingsTempList);

                    /* Free the subterm */
                    s_free(pucSubTerm);

                    /* Check the returned error */
                    if ( iError != SRCH_NoError ) {
                        goto bailFromiSrchFilterGetSearchBitmapFromFilter;
                    }

                    
                    /* Break here if there was no terms returned as this will kill the phrase */
                    if ( (psplSrchPostingsTempList == NULL) || (psplSrchPostingsTempList->uiSrchPostingsLength == 0) ) {
                        
                        iSrchPostingFreeSrchPostingsList(psplSrchPostingsTempList);
                        psplSrchPostingsTempList = NULL;

                        iSrchPostingFreeSrchPostingsList(psplSrchPostingsList);
                        psplSrchPostingsList = NULL;

                        break;
                    }


                    /* Merge the postings lists, ADJ with a distance of 1 and a strict boolean match */
                    if ( psplSrchPostingsList != NULL ) {
                        if ( (iError = iSrchPostingMergeSrchPostingsListsADJ(psplSrchPostingsList, psplSrchPostingsTempList, 1, SRCH_PARSER_MODIFIER_BOOLEAN_OPERATION_STRICT_ID, &psplSrchPostingsList)) != SRCH_NoError ) {
                            goto bailFromiSrchFilterGetSearchBitmapFromFilter;
                        }
                    }
                    else {
                        psplSrchPostingsList = psplSrchPostingsTempList;
                    }
                }
            
                /* Get the next subterm */
                pwcSubTerm = s_wcstok(NULL, L" ", (wchar_t **)&pwcTermStrtokPtr);
            }
            
            
            /* Set the search bitmap structure from the search postings list structure */
            if ( psplSrchPostingsList != NULL ) {

                struct srchPosting      *pspSrchPostingsPtr = NULL;
                struct srchPosting      *pspSrchPostingsEnd = NULL;
                unsigned int            uiLastDocumentID = 0;
                
                unsigned char           *pucBitmapPtr = NULL;

                /* We only process the search postings list structure if there is something in it */
                if ( psplSrchPostingsList->uiSrchPostingsLength > 0 ) {
                
                    /* Allocate the search bitmap structure if it has not yet been allocated */
                    if ( *ppsbSrchBitmap == NULL ) {
                        if ( (iError = iSrchBitmapCreate(NULL, psiSrchIndex->uiDocumentCount + 1, false, ppsbSrchBitmap)) != SRCH_NoError ) {
                            iUtlLogError(UTL_LOG_CONTEXT, "Failed to create a new search bitmap, srch error: %d.", iError);
                            goto bailFromiSrchFilterGetSearchBitmapFromFilter;
                        }
                    }
    
    
                    /* Dereference the bitmap array */
                    pucBitmapPtr = (*ppsbSrchBitmap)->pucBitmap;
                
                    /* Set the bitmap for these documents  */
                    for ( pspSrchPostingsPtr = psplSrchPostingsList->pspSrchPostings, pspSrchPostingsEnd = psplSrchPostingsList->pspSrchPostings + psplSrchPostingsList->uiSrchPostingsLength; 
                            pspSrchPostingsPtr < pspSrchPostingsEnd; pspSrchPostingsPtr++ ) {
                        
                        /* Set the document in the bitmap if it has not already been set */
                        if ( pspSrchPostingsPtr->uiDocumentID != uiLastDocumentID ) {
                            UTL_BITMAP_SET_BIT_IN_POINTER(pucBitmapPtr, pspSrchPostingsPtr->uiDocumentID);
                            uiLastDocumentID = pspSrchPostingsPtr->uiDocumentID;
                        }
                    }
                }
                
                /* Free the postings list */
                iSrchPostingFreeSrchPostingsList(psplSrchPostingsList);
                psplSrchPostingsList = NULL;
            }
        }

        /* Term does not contain spaces, treat as a term */
        else {

            boolean     bFieldIDBitmapSet = false;

            /* Clear the bitmap */
            if ( pucFieldIDBitmap != NULL ) {
                UTL_BITMAP_CLEAR_POINTER(pucFieldIDBitmap, psiSrchIndex->uiFieldIDMaximum);
            }

            /* Process the term, this returns a pointer to the processed term and sets the bitmap as needed */
            if ( (iError = iSrchFilterProcessTerm(pssSrchSearch, psiSrchIndex, pvLngStemmer, pwcTerm, pucFieldIDBitmap, psiSrchIndex->uiFieldIDMaximum, 
                    &pwcTerm, &bFieldIDBitmapSet)) != SRCH_NoError ) {
                goto bailFromiSrchFilterGetSearchBitmapFromFilter;
            }
    
            /* Skip the term if it was stemmed out of existence */
            if ( bUtlStringsIsWideStringNULL(pwcTerm) == false ) {

                /* Convert the term from wide characters to utf-8, pucTerm is allocated */
                if ( (iError = iLngConvertWideStringToUtf8_d(pwcTerm, 0, &pucTerm)) != LNG_NoError ) {
                    iUtlLogError(UTL_LOG_CONTEXT, "Failed to convert a filter term from wide characters to utf-8, lng error: %d.", iError);
                    iError = SRCH_FilterCharacterSetConvertionFailed;
                    goto bailFromiSrchFilterGetSearchBitmapFromFilter;
                }

                /* Expand the term if it contains a wildcard (or two) */
                if ( s_strpbrk(pucTerm, SRCH_PARSER_WILDCARDS_STRING) != NULL ) {
                
                    /* Expand the term */
                    iError = iSrchTermDictLookupWildCard(psiSrchIndex, pucTerm, (bFieldIDBitmapSet == true) ? pucFieldIDBitmap : NULL, 
                            (bFieldIDBitmapSet == true) ? psiSrchIndex->uiFieldIDMaximum : 0, &pstdiSrchTermDictInfos, &uiSrchTermDictInfosLength);
                    
                    /* Check for non recoverable errors */
                    if ( !((iError == SRCH_NoError) || (iError == SRCH_TermDictTermBadRange) || (iError == SRCH_TermDictTermBadWildCard) ||
                            (iError == SRCH_IndexHasNoTerms) || (iError == SRCH_TermDictTermNotFound) || (iError == SRCH_TermDictTermDoesNotOccur)) ) {
                        goto bailFromiSrchFilterGetSearchBitmapFromFilter;
                    }
                    
                    /* Reset the error */
                    iError = SRCH_NoError;


                    /* Loop through all the terms in the term list */
                    for ( pstdiSrchTermDictInfosPtr = pstdiSrchTermDictInfos, uiI = 0; uiI < uiSrchTermDictInfosLength; pstdiSrchTermDictInfosPtr++, uiI++ ) {

                        /* Search to bitmap */
                        if ( (iError = iSrchTermSearchGetSearchBitmapFromTerm(pssSrchSearch, psiSrchIndex, pstdiSrchTermDictInfosPtr->pucTerm, 
                                (bFieldIDBitmapSet == true) ? pucFieldIDBitmap : NULL, ((bFieldIDBitmapSet == true) ? psiSrchIndex->uiFieldIDMaximum : 0), 
                                0, uiStartDocumentID, uiEndDocumentID, ppsbSrchBitmap)) != SRCH_NoError ) {
                            goto bailFromiSrchFilterGetSearchBitmapFromFilter;
                        }
                    }
                }
                else {
                
                    /* Search to bitmap */
                    if ( (iError = iSrchTermSearchGetSearchBitmapFromTerm(pssSrchSearch, psiSrchIndex, pucTerm, (bFieldIDBitmapSet == true) ? pucFieldIDBitmap : NULL, 
                            ((bFieldIDBitmapSet == true) ? psiSrchIndex->uiFieldIDMaximum : 0), 0, uiStartDocumentID, uiEndDocumentID, ppsbSrchBitmap)) != SRCH_NoError )  {
                        goto bailFromiSrchFilterGetSearchBitmapFromFilter;
                    }
                }


                /* Free the term */
                s_free(pucTerm);
            }
        }
    }



    /* Store the bipmap to cache if the cache is enabled */
    if ( uiSrchParserSearchCacheID == SRCH_PARSER_MODIFIER_SEARCH_CACHE_ENABLE_ID ) {
        if ( *ppsbSrchBitmap != NULL ) {
            iSrchCacheSaveSearchBitmap(pssSrchSearch->pvSrchCache, psiSrchIndex, pspfSrchParserFilter->pwcFilter, tFilterFileLastUpdate, *ppsbSrchBitmap);
        }    
    }    



    /* Bail label */
    bailFromiSrchFilterGetSearchBitmapFromFilter:


    /* Free the stemmer */
    iLngStemmerFree(pvLngStemmer);
    pvLngStemmer = NULL;

    /* List based filter */
    if ( pspfSrchParserFilter->uiFilterTypeID == SRCH_PARSER_FILTER_TYPE_LIST_ID ) {

        /* Unmap the terms file */
        iUtlFileMemoryUnMap(pucFilterFileMappingPtr, zFilterFileLength);
        pucFilterFileMappingPtr = NULL;
    
        /* Close the terms file */
        s_fclose(pfFilterFile);
    }
        
    /* Free the term information structure */
    if ( pstdiSrchTermDictInfos != NULL ) {
        for ( pstdiSrchTermDictInfosPtr = pstdiSrchTermDictInfos, uiI = 0; uiI < uiSrchTermDictInfosLength; pstdiSrchTermDictInfosPtr++, uiI++ ) {
            s_free(pstdiSrchTermDictInfosPtr->pucTerm);
        }
        s_free(pstdiSrchTermDictInfos);
    }

    s_free(pucFieldIDBitmap);

    s_free(pucTerm);
    s_free(pwcTerms);


    /* Handle the error */
    if ( iError != SRCH_NoError ) {
        iSrchBitmapFree(*ppsbSrchBitmap);
        *ppsbSrchBitmap = NULL;
    }


    return (iError);

}
Ejemplo n.º 2
0
static void Run(const SUttProcessor *self, SUtterance *utt,
				s_erc *error)
{
	const SRelation *wordRel;
	SItem *wordItem;
	const SItem *tokenItem;
	s_bool is_present;
	const char *end_punc;
	SRelation *phraseRelation = NULL;
	SItem *phraseItem = NULL;
	const char *post_punc;
	SRelation *sentenceRelation = NULL;
	SItem *sentenceItem = NULL;


	S_CLR_ERR(error);

	/* we require the word relation */
	is_present = SUtteranceRelationIsPresent(utt, "Word", error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceRelationIsPresent\" failed"))
		goto quit_error;

	if (!is_present)
	{
		S_CTX_ERR(error, S_FAILURE,
				  "Run",
				  "Failed to find 'Word' relation in utterance");
		goto quit_error;
	}

	wordRel = SUtteranceGetRelation(utt, "Word", error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceGetRelation\" failed"))
		goto quit_error;

	/* get phrasing symbols */
	s_get_phrasing_symbols(self, &end_punc, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"s_get_phrasing_symbols\" failed"))
		goto quit_error;

	/* create Phrase relation */
	phraseRelation = SUtteranceNewRelation(utt, "Phrase", error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceNewRelation\" failed"))
		goto quit_error;

	/* create Sentence relation */
	sentenceRelation = SUtteranceNewRelation(utt, "Sentence", error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceNewRelation\" failed"))
		goto quit_error;

	/* start at the first item in the word relation, cast away
	 * const, we want to add daughter items
	 */
	wordItem = (SItem*)SRelationHead(wordRel, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SRelationHead\" failed"))
		goto quit_error;

	while (wordItem != NULL)
	{
		SItem *lastWordInToken;
		SItem *wordAsToken;


		if (phraseItem == NULL)
		{
			/* if phrase item is NULL, create a new phrase item (NULL
			 * shared content) that is appended to phrase
			 * relation. Will happen in first pass.
			 */
			sentenceItem = SRelationAppend(sentenceRelation, NULL, error);
			if (S_CHK_ERR(error, S_CONTERR,
						  "Run",
						  "Call to \"SRelationAppend\" failed"))
				goto quit_error;

			/* Added on top a sentence item, for now is one on one with the phrase item
			 * */
			phraseItem = SRelationAppend(phraseRelation, NULL, error);
			if (S_CHK_ERR(error, S_CONTERR,
						  "Run",
						  "Call to \"SRelationAppend\" failed"))
				goto quit_error;

			/* add an item name, NB, no break */
			SItemSetString(phraseItem, "name", "NB", error);
			if (S_CHK_ERR(error, S_CONTERR,
						  "Run",
						  "Call to \"SItemSetString\" failed"))
				goto quit_error;

			SItemAddDaughter(sentenceItem, phraseItem, error);
			if (S_CHK_ERR(error, S_CONTERR,
						  "Run",
						  "Call to \"SItemAddDaughter\" failed"))
				goto quit_error;
		}

		/* Create a daughter item for the phrase item. Shared content
		 * is the word item.
		 */
		SItemAddDaughter(phraseItem, wordItem, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemAddDaughter\" failed"))
			goto quit_error;

		/* get word as in Token relation */
		wordAsToken = SItemAs(wordItem, "Token", error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Failed to get word item's as in Token relation"))
			goto quit_error;

		/*
		 * get word's token which is the parent of wordAsToken.
		 */
		tokenItem = SItemParent(wordAsToken, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Failed to get word item's token item"))
			goto quit_error;

		/* get last word in token */
		lastWordInToken = SItemLastDaughter(tokenItem, error);
		if (S_CHK_ERR(error, S_CONTERR,
			"Run",
		"Failed to get last daughter of token item"))
			goto quit_error;

		/* check if the next token is punctuation */
		is_present = FALSE;
		tokenItem = SItemNext(tokenItem, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemNext\" failed"))
			goto quit_error;
		if (tokenItem != NULL)
		{
			is_present = SItemFeatureIsPresent(tokenItem, "IsPunctuation", error);
			if (S_CHK_ERR(error, S_CONTERR,
						"Run",
						"Call to \"SItemFeatureIsPresent\" failed"))
				goto quit_error;
			if (is_present)
			{
				sint32 value = SItemGetInt(tokenItem, "IsPunctuation", error);
				if (S_CHK_ERR(error, S_CONTERR,
							"Run",
							"Call to \"SItemGetInt\" failed"))
					goto quit_error;
				is_present = (value > 0);
			}
		}

		if ((is_present) && (wordAsToken == lastWordInToken))
		{
			char *ptr;


			post_punc = SItemGetName(tokenItem, error);
			if (S_CHK_ERR(error, S_CONTERR,
						  "Run",
						  "Call to \"SItemGetName\" failed"))
				goto quit_error;

			/* check if it is in the end_punc list */
			ptr = s_strpbrk(post_punc, end_punc, error);
			if (S_CHK_ERR(error, S_CONTERR,
						  "Run",
						  "Call to \"s_strpbrk\" failed"))
				goto quit_error;

			if (ptr != NULL)
			{
				/* add a phrase break */
				SItemSetString(phraseItem, "name", "BB", error);
				if (S_CHK_ERR(error, S_CONTERR,
							  "Run",
							  "Call to \"SItemSetString\" failed"))
					goto quit_error;

				/* set to NULL so that a new phrase item is created */
				phraseItem = NULL;
			}
		}

		wordItem = SItemNext(wordItem, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemNext\" failed"))
			goto quit_error;
	}

	/* here all is OK */
	return;

	/* error clean-up code */
quit_error:
	if (phraseRelation != NULL)
	{
		SUtteranceDelRelation(utt, "Phrase", error);
		S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceDelRelation\" failed");
	}
}