/**
Parses URL from a token. Is used by SearchUrlL method and if a URL
was found it's appended to item array. Note that parsing for generic URIs 
is done with SearchGenericUriL -method.

@param aType  a Type of URL to seach, i.e.
                  www.
                  wap.
                  IP e.g.127.0.0.1
@param        aTokenPtr Pointer to token that will be parsed
@param        aTextOffset Offset of the token (start position in the whole text)
@leave KErrNone, if successful; otherwise one of the other system-wide error codes.
@return ETrue if the parameter for phone number is valid, else returns EFalse
*/
TBool CTulAddressStringTokenizer::ParseUrlL(const TDesC& aType, const TPtrC& aTokenPtr, TInt aTextOffset)
    {
    TBool wasValidUrl = EFalse;
    TLex url;
    
    TInt position = aTokenPtr.FindF( aType ); 
    if ( position != KErrNotFound )
        { // address start found
        url = aTokenPtr.Right( aTokenPtr.Length() - position );
        url.Inc( aType.Length() );

        while( IsValidUrlChar( url.Peek() ) && !(url.Eos()) )
            {
            if( url.Peek() == ':' )
                {
                url.Inc();
                if ( !url.Peek().IsDigit() )
                    {
                    url.UnGet();
                    break;
                    }
                }
            else
                url.Inc();
            }

        // If a period or question mark was followed by a whitespace remove it
        if ( url.Eos() ) // Can't be followed by white space if it's
            { // the last character at token
            url.UnGet();
            if ( url.Peek() != '.' && url.Peek() != '?' && url.Peek() != ',' )	// If it wasn't a period or question mark
                url.Inc();
            }
        
        url.Mark();
        wasValidUrl = ETrue;
        }

    if ( wasValidUrl && ( url.MarkedOffset() > aType.Length() ) )
        {
        AddItemL( aTextOffset - aTokenPtr.Length() + position, url.MarkedOffset(), EFindItemSearchURLBin );
        return ETrue;
        }

    return EFalse;
    }
Пример #2
0
/*
-------------------------------------------------------------------------------

    Class: CStifSectionParser

    Method: GotoEndOfLine

    Description: Goes end of the line.

    Parameters: TLex& lex: inout: Parsed line.

    Return Values: TInt: Last item's end position.

    Errors/Exceptions: None

    Status: Proposal

-------------------------------------------------------------------------------
*/
TInt CStifSectionParser::GotoEndOfLine( TLex& lex )
    {
    // End position of the last token(Initialized with current position)
    TInt lastItemPosition( lex.Offset() );

    // LINE BREAK NOTE:
    // Line break in SOS, WIN:  '\r\n'
    // Line break in UNIX:      '\n'

    do
        {
        // Peek next character(10 or '\n' in UNIX style )
        if( lex.Peek() == 0x0A )
            {
            lex.Inc();
            break;
            }

        // Peek next character(13 or '\r' in Symbian OS)
        if ( lex.Peek() == 0x0D )
            {
            // Increment the lex position
            lex.Inc();
            // Peek next character(10 or '\n' in Symbian OS)
            if ( lex.Peek() == 0x0A )
                {
                // End of the section is found and increment the lex position
                lex.Inc();
                break;
                }
            // 0x0A not found, decrement position
            lex.UnGet();
            }
        // Peek for tabulator(0x09) and space(0x20)
        else if ( lex.Peek() == 0x09 || lex.Peek() == 0x20 )
            {
            // Increment the lex position
            lex.Inc();
            continue;
            }
        
        // If white spaces not found take next token
        lex.NextToken();
        lastItemPosition = lex.Offset();    
        
        } while ( !lex.Eos() );
        
    return lastItemPosition;

    }
Пример #3
0
// ----------------------------------------------------
// CheckIPv6Address()
// IPv6address = hexpart [ ":" IPv4address ]
// ----------------------------------------------------
//
LOCAL_C TBool CheckIPv6Address( TLex& aLex )
    {
    TBool result( CheckIPv6HexPart( aLex ) );
    if( aLex.Peek() == '.' )
        {
        while( aLex.Peek() != ':' && aLex.Offset() > 0 )
            {
            aLex.UnGet();
            }
        aLex.Inc();
        result = CheckIPv4Address( aLex );
        }

    return result && aLex.Eos();
    }
/**
Search algorithm for searching phone numbers

@param aText Text that will be parsed
@return ETrue if any Phone Number items were found else returns EFalse
@leave KErrNone, if successful; otherwise one of the other system-wide error codes.
@panic ETulPanicDescriptorLength in debug build if item's position 
and/or length is out of the document's range.
*/
TBool CTulAddressStringTokenizer::SearchPhoneNumberL( const TDesC& aText )
    {
    TLexMark startMark; // Points to the start of the found phone number
    TLexMark endMark; // Points to the end of the found phone number
    TLexMark mark;
    const TInt end = aText.Length();

    TLex number = aText;

    while ( !(number.Eos()) )
        {
        TInt numberCount = 0; // How many real numbers (1234567890)
        TInt bracketsOpen = 0; // How many brackets are currently open
        TInt brackets = 0; // How many brackets overall

        TChar charac = number.Peek();

        while( (!(IsValidPhoneNumberChar( charac ) || charac == '+'
               || charac == '(' ) || charac == '-' || charac == '.' || charac == '/') 
			   && !(number.Eos()) && number.Offset() < end )
            {
            number.Inc();
            charac = number.Peek();
            }

        if ( number.Offset() >= end )
            break;
        
        if ( number.Peek() == '#' )
			{
			number.Inc();
			if (number.Peek() == '.' )
				continue;

			number.UnGet();
			}

        if ( number.Peek() == '+' )
            { // '+' has to be followed by a number (not # or * ...)
            number.Inc();
            if ( !(number.Peek().IsDigit()) )
                continue;

            number.UnGet();
            }

        if ( number.Peek() == '(' )
            { // '(' has to be followed by valid phone number 
              // character (whitespaces are allowed before) or '+' is a next character
            number.Inc();
            if ( !(number.Peek() == '+') )
                {
                number.Mark(mark);
                number.SkipSpace();
                charac = number.Peek();
                if ( !( IsValidPhoneNumberChar(charac) || charac == '+' 
                    || charac == '(' ) || charac == '-' || charac == '.' || charac == '/')
                    {
                    number.Inc();
                    continue;
                    }
                else
                    {
                    number.UnGetToMark(mark);
                    number.UnGet();
                    number.Mark(startMark);
                    }
                } 
            else
                {
                number.UnGet();
                number.Mark(startMark);
                number.Inc();
                }

            bracketsOpen++;
            brackets++;
            }
        else
            number.Mark(startMark);

        if ( number.Peek().IsDigit() )	// If the character was a number
            numberCount++;
        else if ( bracketsOpen > 0 ) 
            { 
            number.Inc();
            TChar next  = number.Peek();
            TInt bracketsOpen2 = bracketsOpen;
            while( (IsValidPhoneNumberChar( next ) || next.IsSpace()
                || next == '(' || next == ')' || next == 'p' || next == '+'
                || next == 'w' ) && !(number.Eos()) && number.Offset() < end)
                {
                if ( next == '(' )
                    bracketsOpen2++;
                else if ( next == ')' )
                    bracketsOpen2--;
                
                if ( bracketsOpen2 == 0 )
                    break;

                number.Inc();
                next = number.Peek();
                }

            number.UnGetToMark(startMark);
            if ( bracketsOpen2 != 0 )
                {
                number.Inc();
                continue;
                }
            }

        number.Inc();
        while ( number.Peek() == '(' && !(number.Eos()) && bracketsOpen > 0 )
            {
            number.Inc();
            bracketsOpen++;
            }

        if ( number.Peek() == '+' && bracketsOpen > 0 )
            number.Inc();

        // a Valid first character has been found. Let's go forward as long as valid characters are found.
        charac = number.Peek();

        while( (IsValidPhoneNumberChar( charac ) || charac.IsSpace()
            || charac == '(' || charac == ')' || charac == 'p'
            || charac == 'w' ) && !(number.Eos()) && number.Offset() < end
            && charac != KCharLinefeed && charac != KCharFormfeed 
            && charac != KCharCarriageReturn
            && charac != KCharLineSeparator
            && charac != KCharParagraphSeparator )
            {
            if ( number.Peek() == '(' )
                { // '(' can't be the last character in phone number
                number.Mark(mark);
                number.Inc();
                
                TChar spaceJump = number.Peek();
                while ( !number.Eos() && spaceJump.IsSpace() && spaceJump != KCharLinefeed 
                        && spaceJump != KCharFormfeed && spaceJump != KCharCarriageReturn 
                        && charac != KCharLineSeparator && spaceJump != KCharParagraphSeparator)
                    {
                    number.Inc();
                    spaceJump = number.Peek();
                    }
                
                if ( !(IsValidPhoneNumberChar(number.Peek())) && number.Peek() != ')'
                     && number.Peek() != '(' )
                    {
                    number.UnGetToMark(mark);
                    break;
                    }

                TChar next  = number.Peek();
                TInt bracketsOpen2 = bracketsOpen + 1;
                while( (IsValidPhoneNumberChar( next ) || next.IsSpace()
                    || next == '(' || next == ')' || next == 'p'
                    || next == 'w' ) && !(number.Eos()) && number.Offset() < end)
                    {
                    if ( next == '(' )
                        bracketsOpen2++;
                    else if ( next == ')' )
                        bracketsOpen2--;

                    if ( bracketsOpen2 == 0 )
                        break;

                    number.Inc();
                    next = number.Peek();
                    }

                number.UnGetToMark(mark);

                if ( bracketsOpen2 != 0 )
                    break;

                bracketsOpen++;
                brackets++;
                }
            else if ( number.Peek() == ')' )
                {
                if ( bracketsOpen <= 0 )	// there has to be equal number of brackets
                    break;

                bracketsOpen--;
				number.Mark(mark);
                number.Inc();
				if ( number.Peek() == '.' )	// '.' is not allowed after ')'
                    break;

                number.UnGetToMark(mark);
                }
            else if ( number.Peek() == '-' || number.Peek() == 'w' 
                        || number.Peek() == 'p' || number.Peek() == '.' || number.Peek() == '/')
                { // Hyphen mark and 'p' & 'w' chars must be followed by a number
				TChar last = number.Peek();
                number.Mark(mark);
                number.Inc();
                
                TChar spaceJump = number.Peek();
                while ( !number.Eos() && spaceJump.IsSpace() && spaceJump != KCharLinefeed 
                        && spaceJump != KCharFormfeed && spaceJump != KCharCarriageReturn 
                        && charac != KCharLineSeparator && spaceJump != KCharParagraphSeparator )
                    {
                    number.Inc();
                    spaceJump = number.Peek();
                    }
                    
                if ( !(number.Peek().IsDigit()) )
                    {
					if (last == '.' && number.Peek() == ')' && bracketsOpen > 0 )
						continue;
					else
						{
						number.UnGetToMark(mark);
						break;
						}
                    }

                number.UnGetToMark(mark);
                }
            else if ( number.Peek().IsDigit() )
                numberCount++;

            number.Inc();
            charac = number.Peek();
            }

        // Get rid of whitespaces from the end
        number.UnGet();
        while( number.Peek().IsSpace() && !(number.Eos()))
            number.UnGet();

        number.Inc();
        // ------------------------------------
        number.Mark(endMark);

        // If they exist, remove brackets from the beginning and the end
        number.Mark(mark); // Let's mark where to continue the search
        TBool endBrackets = ETrue;
        do
            {
            number.UnGet();

            if ( number.Peek() == ')' )
                {
                number.UnGetToMark(startMark);
                if ( number.Peek() == '(' )
                    {
                    // If there's more than one pair of brackets -> don't strip them.
                    if ( brackets > 1 )
                        break;

                    number.Inc();
                    number.Mark(startMark);
                    number.UnGetToMark(endMark);
                    number.UnGet();
                    number.Mark(endMark);
                    // Get rid of whitespaces and periods from the end and from the beginning
					number.UnGet();
                    while ( (number.Peek().IsSpace() || number.Peek() == '.') 
                            && number.Offset() > number.MarkedOffset(startMark) )     
                        { // from the end
                        number.UnGet();
                        }
					number.Inc();
                    number.Mark(endMark);
                    number.UnGetToMark(startMark);
                    while ( (number.Peek().IsSpace() || number.Peek() == '.') 
                            && number.Offset() < number.MarkedOffset(endMark) )     
                        { // from the beginning
                        number.Inc();
                        }
                    number.Mark(startMark);
                    number.UnGetToMark(endMark);
                    // ----
                    }
                else
                    endBrackets = EFalse;
                }
            else
                endBrackets = EFalse;
            }
        while ( endBrackets );

        number.UnGetToMark(mark);
        // ----------------        

        if ( numberCount <= KFindItemMaxNumbers && numberCount >= iMinNumbers )
            {
			TPtrC tokenPtr = number.MarkedToken(startMark);
			TInt tokensEnd = tokenPtr.Length();
			TInt numbers = 0;
			TInt partialNumber = 0;
			TBool wasValidPhoneNumber = ETrue;
			TInt i = 0;

			for ( ; i < tokensEnd; i++ )
				{
				if ( tokenPtr[i] == '.' )
					partialNumber = 0;
				else if ( ((TChar)tokenPtr[i]).IsDigit() )
					{
					numbers++;
					partialNumber++;
					}

				if ( ( partialNumber == 1 || partialNumber == 2 ) && i + 1 < tokensEnd )
					{
					if ( tokenPtr[i + 1] == '.' )
						wasValidPhoneNumber = EFalse;
					}
				}
				
			if (!wasValidPhoneNumber && numbers > 6)
				wasValidPhoneNumber = ETrue;
			
			if (wasValidPhoneNumber)
				{
	            __ASSERT_DEBUG( number.MarkedOffset(startMark) + number.MarkedOffset(endMark) 
	                            - number.MarkedOffset(startMark) <= aText.Length(), 
	                            Panic(ETulPanicDescriptorLength) );

	            AddItemL( number.MarkedOffset(startMark), 
	                      number.MarkedOffset(endMark) - number.MarkedOffset(startMark), 
	                      EFindItemSearchPhoneNumberBin );
				}
			}

        }

    return (iFoundItems->Count() > 0);
    }
/**
Search algorithm for searching e-mail addresses

@param aText Text that will be parsed
@return ETrue if any EMail items were found else returns EFalse
@leave KErrNone, if successful; otherwise one of the other system-wide error codes.
@panic ETulPanicDescriptorLength in debug build if item's position 
and/or length is out of the document's range.
*/
TBool CTulAddressStringTokenizer::SearchMailAddressL( const TDesC& aText )
    {
    TInt searchStart = 0;
    TInt searchResult = 0;
    const TInt end = aText.Length(); // end of document

    do
        {
        TPtrC segment = aText.Right( end - searchStart );
        searchResult = segment.LocateF('@');

        if (searchResult != KErrNotFound)
            { // @ found
            // There should be valid characters (not a period) before and after the @ character
            if ( searchResult == 0 // first char
                || (searchResult >= segment.Length() - 1) // last char 
                || !(IsValidEmailChar(segment[searchResult - 1])) 
                || !(IsValidEmailHostChar(segment[searchResult + 1]))
                || segment[searchResult - 1] == '.' 
                || segment[searchResult + 1] == '.'
               )
                {
                searchStart += searchResult + 1;
                continue;
                }

            TBool wasPeriod = EFalse; // To prevent sequential periods
            // Get TLex from the pointer to get a better API for parsing
            TLexMark startPos;
            TLexMark endPos;
            TLex token = segment;
            
            // Go to searchResult and un-get until the beginning of e-mail address is reached
            token.Inc( searchResult );
            token.Mark();
            do
                {
                token.UnGet();
                if ( token.Peek() == '.' )
                    { // If it was a period
                    if (wasPeriod)	// and if the former was also -> break
                        break;
                    else	// else mark that this one was a period
                        wasPeriod = ETrue;
                    }
                else
                    wasPeriod = EFalse;
                }
            while (token.Offset() > 0 && IsValidEmailChar(token.Peek()));
            
            if (token.Offset() != 0 || !IsValidEmailChar(token.Peek()))
                token.Inc();

            // Get rid of periods from the start of address
            // Does it have to start with a number or char(abc...).
            // If it does, the loop should check that it gets rid of all special chars also.
            while (token.Peek() == '.')
                token.Inc();

            token.Mark( startPos ); // Mark the beginning of address
            token.UnGetToMark();
            wasPeriod = EFalse;
            
            do	// Go forward until a nonvalid character
                {
                token.Inc();
                if ( token.Peek() == '.' )
                    { // If it was a period
                    if ( wasPeriod )	// and if the former was also -> break
                        break;
                    else	// else mark that this one was a period
                        wasPeriod = ETrue;
                    }
                else
                    wasPeriod = EFalse;
                }
            while ( !token.Eos() && IsValidEmailHostChar( token.Peek() ) );
            
            // If address ends with a period take it away
            token.UnGet();
            if (token.Peek() != '.')
                token.Inc();

            token.Mark( endPos ); // Mark the beginning of address

            // Append the found string to the array
            __ASSERT_DEBUG( searchStart + token.MarkedOffset( startPos ) 
                            + token.MarkedOffset( endPos ) 
                            - token.MarkedOffset( startPos ) <= aText.Length(), 
                            Panic(ETulPanicDescriptorLength) );
            AddItemL( searchStart + token.MarkedOffset( startPos ), 
                      token.MarkedOffset( endPos ) - token.MarkedOffset( startPos ), 
                      EFindItemSearchMailAddressBin);
            searchStart += token.MarkedOffset( endPos ) + 1;
            }
        }
    while ( searchResult != KErrNotFound && searchStart < end );

    return (iFoundItems->Count() > 0);
    }
/**
Search algorithm for searching generic URIs

@param aText Text that will be parsed
@return ETrue if any generic URI is found else returns EFalse
@leave KErrNone, if successful; otherwise one of the other system-wide error codes.
*/
TBool CTulAddressStringTokenizer::SearchGenericUriL( const TDesC& aText )
    {
    // Detect generic URI within the token
    const TDesC& schemeStartArray = KURISchemeStartCharacters;
    const TDesC& schemeBodyArray = KURISchemeBodyCharacters;
    const TDesC& schemeTerminatorArray = KURISchemeTerminator;
    const TDesC& URIArray = KURICharacters;

    TBool wasValidUri = EFalse;
    TLex text = aText;

    while ( !text.Eos() )
        {
        // Discard characters until URI scheme terminator is found
        while( !(text.Eos()) && schemeTerminatorArray.Locate(text.Peek()) == KErrNotFound )
            text.Inc();

        // if at end of the text, no legit URI found
        if ( !text.Eos() )
            {
            // Store the schema end offset (+1 to skip ':')
            TInt schemeEndOffset = text.Offset() + 1;

            // Scheme must be at least 1 character long at the beginning of the text to be valid
            if ( text.Offset() > 0 ) 
                {
                // Un-get last scheme character to begin examination
                text.UnGet();

                // Rewind until beginning of the URI
                while ( text.Offset() > 0 && schemeBodyArray.Locate(text.Peek().GetLowerCase()) != KErrNotFound )
                    text.UnGet();
        
                // Now text pointer is at first character of the URI
                // Do go back through the scheme until a legal beginning character for URI 
                // is found or back to the (schemeEndOffset - 1) i.e. URI scheme terminator
                while ( schemeStartArray.Locate(text.Peek().GetLowerCase()) == KErrNotFound && (text.Offset() + 1) < schemeEndOffset )
                    text.Inc();

                // check if terminated because a valid start character was found when
                // scheme terminator was reached.
                if ( schemeStartArray.Locate(text.Peek().GetLowerCase()) != KErrNotFound )
                    {
                    // First character is a valid URI char, so the scheme is valid -> 
                    // marks the beginning of the array
                    text.Mark();
            
                    // fast forward to the end of the scheme
                    while( text.Offset() < schemeEndOffset )    
                        text.Inc();
            
                    // Get characters until end of schema
                    while( !(text.Eos()) && URIArray.Locate( text.Peek().GetLowerCase() ) != KErrNotFound )
                        text.Inc();

                    // remove certain punctuation from end of the URI, as it is likely 
                    // to be part of the surrounding text.
                    text.UnGet();

                    //special processing for bracket
                    //only remove the end bracket if there is no open bracket in the uri
                    //not counting bracket pairs for efficiency
                    if (text.Peek()!=')' || text.MarkedToken().Locate(TChar('('))!=-1)
                        text.Inc();

                    text.UnGet();
                    if ( text.Peek() != '.' && text.Peek() != '?' && text.Peek() != ',')
                        text.Inc();
                
                    // URI cannot contain only scheme, so check that pointer was increased 
                    // by at least one character
                    if ( schemeEndOffset != text.Offset() )
                        {
                        // Append found text to item array (it is now known to be 
                        // syntactically valid URI as it contains characters after the scheme)
                        AddItemL( text.MarkedOffset(), text.Offset() - text.MarkedOffset(), EFindItemSearchScheme );
                        wasValidUri = ETrue;
                        }
                    }
                else // First character of scheme is not legit, fast forward to end of the 
                     // scheme anyway to continue search
                    {
                    while( text.Offset() < schemeEndOffset )        
                        text.Inc();
                    }
                }
            else
 				text.Inc();
            }
        }

    return wasValidUri;
    }