/** Parses URL from a token. Is used by SearchUrlL method and if a URL was found it's appended to item array. Note that parsing for generic URIs is done with SearchGenericUriL -method. @param aType a Type of URL to seach, i.e. www. wap. IP e.g.127.0.0.1 @param aTokenPtr Pointer to token that will be parsed @param aTextOffset Offset of the token (start position in the whole text) @leave KErrNone, if successful; otherwise one of the other system-wide error codes. @return ETrue if the parameter for phone number is valid, else returns EFalse */ TBool CTulAddressStringTokenizer::ParseUrlL(const TDesC& aType, const TPtrC& aTokenPtr, TInt aTextOffset) { TBool wasValidUrl = EFalse; TLex url; TInt position = aTokenPtr.FindF( aType ); if ( position != KErrNotFound ) { // address start found url = aTokenPtr.Right( aTokenPtr.Length() - position ); url.Inc( aType.Length() ); while( IsValidUrlChar( url.Peek() ) && !(url.Eos()) ) { if( url.Peek() == ':' ) { url.Inc(); if ( !url.Peek().IsDigit() ) { url.UnGet(); break; } } else url.Inc(); } // If a period or question mark was followed by a whitespace remove it if ( url.Eos() ) // Can't be followed by white space if it's { // the last character at token url.UnGet(); if ( url.Peek() != '.' && url.Peek() != '?' && url.Peek() != ',' ) // If it wasn't a period or question mark url.Inc(); } url.Mark(); wasValidUrl = ETrue; } if ( wasValidUrl && ( url.MarkedOffset() > aType.Length() ) ) { AddItemL( aTextOffset - aTokenPtr.Length() + position, url.MarkedOffset(), EFindItemSearchURLBin ); return ETrue; } return EFalse; }
TPtrC CScriptFile::ParseValue(const TDesC& aText, const TLex& aInput, TInt aCurrentItemStart) const { TInt mid = aCurrentItemStart; TInt len = aInput.MarkedOffset() - mid; TPtrC ret(KNullDesC); if (len > 0) ret.Set(aText.Mid(mid, len)); return ret; }
/** Search algorithm for searching e-mail addresses @param aText Text that will be parsed @return ETrue if any EMail items were found else returns EFalse @leave KErrNone, if successful; otherwise one of the other system-wide error codes. @panic ETulPanicDescriptorLength in debug build if item's position and/or length is out of the document's range. */ TBool CTulAddressStringTokenizer::SearchMailAddressL( const TDesC& aText ) { TInt searchStart = 0; TInt searchResult = 0; const TInt end = aText.Length(); // end of document do { TPtrC segment = aText.Right( end - searchStart ); searchResult = segment.LocateF('@'); if (searchResult != KErrNotFound) { // @ found // There should be valid characters (not a period) before and after the @ character if ( searchResult == 0 // first char || (searchResult >= segment.Length() - 1) // last char || !(IsValidEmailChar(segment[searchResult - 1])) || !(IsValidEmailHostChar(segment[searchResult + 1])) || segment[searchResult - 1] == '.' || segment[searchResult + 1] == '.' ) { searchStart += searchResult + 1; continue; } TBool wasPeriod = EFalse; // To prevent sequential periods // Get TLex from the pointer to get a better API for parsing TLexMark startPos; TLexMark endPos; TLex token = segment; // Go to searchResult and un-get until the beginning of e-mail address is reached token.Inc( searchResult ); token.Mark(); do { token.UnGet(); if ( token.Peek() == '.' ) { // If it was a period if (wasPeriod) // and if the former was also -> break break; else // else mark that this one was a period wasPeriod = ETrue; } else wasPeriod = EFalse; } while (token.Offset() > 0 && IsValidEmailChar(token.Peek())); if (token.Offset() != 0 || !IsValidEmailChar(token.Peek())) token.Inc(); // Get rid of periods from the start of address // Does it have to start with a number or char(abc...). // If it does, the loop should check that it gets rid of all special chars also. while (token.Peek() == '.') token.Inc(); token.Mark( startPos ); // Mark the beginning of address token.UnGetToMark(); wasPeriod = EFalse; do // Go forward until a nonvalid character { token.Inc(); if ( token.Peek() == '.' ) { // If it was a period if ( wasPeriod ) // and if the former was also -> break break; else // else mark that this one was a period wasPeriod = ETrue; } else wasPeriod = EFalse; } while ( !token.Eos() && IsValidEmailHostChar( token.Peek() ) ); // If address ends with a period take it away token.UnGet(); if (token.Peek() != '.') token.Inc(); token.Mark( endPos ); // Mark the beginning of address // Append the found string to the array __ASSERT_DEBUG( searchStart + token.MarkedOffset( startPos ) + token.MarkedOffset( endPos ) - token.MarkedOffset( startPos ) <= aText.Length(), Panic(ETulPanicDescriptorLength) ); AddItemL( searchStart + token.MarkedOffset( startPos ), token.MarkedOffset( endPos ) - token.MarkedOffset( startPos ), EFindItemSearchMailAddressBin); searchStart += token.MarkedOffset( endPos ) + 1; } } while ( searchResult != KErrNotFound && searchStart < end ); return (iFoundItems->Count() > 0); }
/** Search algorithm for searching phone numbers @param aText Text that will be parsed @return ETrue if any Phone Number items were found else returns EFalse @leave KErrNone, if successful; otherwise one of the other system-wide error codes. @panic ETulPanicDescriptorLength in debug build if item's position and/or length is out of the document's range. */ TBool CTulAddressStringTokenizer::SearchPhoneNumberL( const TDesC& aText ) { TLexMark startMark; // Points to the start of the found phone number TLexMark endMark; // Points to the end of the found phone number TLexMark mark; const TInt end = aText.Length(); TLex number = aText; while ( !(number.Eos()) ) { TInt numberCount = 0; // How many real numbers (1234567890) TInt bracketsOpen = 0; // How many brackets are currently open TInt brackets = 0; // How many brackets overall TChar charac = number.Peek(); while( (!(IsValidPhoneNumberChar( charac ) || charac == '+' || charac == '(' ) || charac == '-' || charac == '.' || charac == '/') && !(number.Eos()) && number.Offset() < end ) { number.Inc(); charac = number.Peek(); } if ( number.Offset() >= end ) break; if ( number.Peek() == '#' ) { number.Inc(); if (number.Peek() == '.' ) continue; number.UnGet(); } if ( number.Peek() == '+' ) { // '+' has to be followed by a number (not # or * ...) number.Inc(); if ( !(number.Peek().IsDigit()) ) continue; number.UnGet(); } if ( number.Peek() == '(' ) { // '(' has to be followed by valid phone number // character (whitespaces are allowed before) or '+' is a next character number.Inc(); if ( !(number.Peek() == '+') ) { number.Mark(mark); number.SkipSpace(); charac = number.Peek(); if ( !( IsValidPhoneNumberChar(charac) || charac == '+' || charac == '(' ) || charac == '-' || charac == '.' || charac == '/') { number.Inc(); continue; } else { number.UnGetToMark(mark); number.UnGet(); number.Mark(startMark); } } else { number.UnGet(); number.Mark(startMark); number.Inc(); } bracketsOpen++; brackets++; } else number.Mark(startMark); if ( number.Peek().IsDigit() ) // If the character was a number numberCount++; else if ( bracketsOpen > 0 ) { number.Inc(); TChar next = number.Peek(); TInt bracketsOpen2 = bracketsOpen; while( (IsValidPhoneNumberChar( next ) || next.IsSpace() || next == '(' || next == ')' || next == 'p' || next == '+' || next == 'w' ) && !(number.Eos()) && number.Offset() < end) { if ( next == '(' ) bracketsOpen2++; else if ( next == ')' ) bracketsOpen2--; if ( bracketsOpen2 == 0 ) break; number.Inc(); next = number.Peek(); } number.UnGetToMark(startMark); if ( bracketsOpen2 != 0 ) { number.Inc(); continue; } } number.Inc(); while ( number.Peek() == '(' && !(number.Eos()) && bracketsOpen > 0 ) { number.Inc(); bracketsOpen++; } if ( number.Peek() == '+' && bracketsOpen > 0 ) number.Inc(); // a Valid first character has been found. Let's go forward as long as valid characters are found. charac = number.Peek(); while( (IsValidPhoneNumberChar( charac ) || charac.IsSpace() || charac == '(' || charac == ')' || charac == 'p' || charac == 'w' ) && !(number.Eos()) && number.Offset() < end && charac != KCharLinefeed && charac != KCharFormfeed && charac != KCharCarriageReturn && charac != KCharLineSeparator && charac != KCharParagraphSeparator ) { if ( number.Peek() == '(' ) { // '(' can't be the last character in phone number number.Mark(mark); number.Inc(); TChar spaceJump = number.Peek(); while ( !number.Eos() && spaceJump.IsSpace() && spaceJump != KCharLinefeed && spaceJump != KCharFormfeed && spaceJump != KCharCarriageReturn && charac != KCharLineSeparator && spaceJump != KCharParagraphSeparator) { number.Inc(); spaceJump = number.Peek(); } if ( !(IsValidPhoneNumberChar(number.Peek())) && number.Peek() != ')' && number.Peek() != '(' ) { number.UnGetToMark(mark); break; } TChar next = number.Peek(); TInt bracketsOpen2 = bracketsOpen + 1; while( (IsValidPhoneNumberChar( next ) || next.IsSpace() || next == '(' || next == ')' || next == 'p' || next == 'w' ) && !(number.Eos()) && number.Offset() < end) { if ( next == '(' ) bracketsOpen2++; else if ( next == ')' ) bracketsOpen2--; if ( bracketsOpen2 == 0 ) break; number.Inc(); next = number.Peek(); } number.UnGetToMark(mark); if ( bracketsOpen2 != 0 ) break; bracketsOpen++; brackets++; } else if ( number.Peek() == ')' ) { if ( bracketsOpen <= 0 ) // there has to be equal number of brackets break; bracketsOpen--; number.Mark(mark); number.Inc(); if ( number.Peek() == '.' ) // '.' is not allowed after ')' break; number.UnGetToMark(mark); } else if ( number.Peek() == '-' || number.Peek() == 'w' || number.Peek() == 'p' || number.Peek() == '.' || number.Peek() == '/') { // Hyphen mark and 'p' & 'w' chars must be followed by a number TChar last = number.Peek(); number.Mark(mark); number.Inc(); TChar spaceJump = number.Peek(); while ( !number.Eos() && spaceJump.IsSpace() && spaceJump != KCharLinefeed && spaceJump != KCharFormfeed && spaceJump != KCharCarriageReturn && charac != KCharLineSeparator && spaceJump != KCharParagraphSeparator ) { number.Inc(); spaceJump = number.Peek(); } if ( !(number.Peek().IsDigit()) ) { if (last == '.' && number.Peek() == ')' && bracketsOpen > 0 ) continue; else { number.UnGetToMark(mark); break; } } number.UnGetToMark(mark); } else if ( number.Peek().IsDigit() ) numberCount++; number.Inc(); charac = number.Peek(); } // Get rid of whitespaces from the end number.UnGet(); while( number.Peek().IsSpace() && !(number.Eos())) number.UnGet(); number.Inc(); // ------------------------------------ number.Mark(endMark); // If they exist, remove brackets from the beginning and the end number.Mark(mark); // Let's mark where to continue the search TBool endBrackets = ETrue; do { number.UnGet(); if ( number.Peek() == ')' ) { number.UnGetToMark(startMark); if ( number.Peek() == '(' ) { // If there's more than one pair of brackets -> don't strip them. if ( brackets > 1 ) break; number.Inc(); number.Mark(startMark); number.UnGetToMark(endMark); number.UnGet(); number.Mark(endMark); // Get rid of whitespaces and periods from the end and from the beginning number.UnGet(); while ( (number.Peek().IsSpace() || number.Peek() == '.') && number.Offset() > number.MarkedOffset(startMark) ) { // from the end number.UnGet(); } number.Inc(); number.Mark(endMark); number.UnGetToMark(startMark); while ( (number.Peek().IsSpace() || number.Peek() == '.') && number.Offset() < number.MarkedOffset(endMark) ) { // from the beginning number.Inc(); } number.Mark(startMark); number.UnGetToMark(endMark); // ---- } else endBrackets = EFalse; } else endBrackets = EFalse; } while ( endBrackets ); number.UnGetToMark(mark); // ---------------- if ( numberCount <= KFindItemMaxNumbers && numberCount >= iMinNumbers ) { TPtrC tokenPtr = number.MarkedToken(startMark); TInt tokensEnd = tokenPtr.Length(); TInt numbers = 0; TInt partialNumber = 0; TBool wasValidPhoneNumber = ETrue; TInt i = 0; for ( ; i < tokensEnd; i++ ) { if ( tokenPtr[i] == '.' ) partialNumber = 0; else if ( ((TChar)tokenPtr[i]).IsDigit() ) { numbers++; partialNumber++; } if ( ( partialNumber == 1 || partialNumber == 2 ) && i + 1 < tokensEnd ) { if ( tokenPtr[i + 1] == '.' ) wasValidPhoneNumber = EFalse; } } if (!wasValidPhoneNumber && numbers > 6) wasValidPhoneNumber = ETrue; if (wasValidPhoneNumber) { __ASSERT_DEBUG( number.MarkedOffset(startMark) + number.MarkedOffset(endMark) - number.MarkedOffset(startMark) <= aText.Length(), Panic(ETulPanicDescriptorLength) ); AddItemL( number.MarkedOffset(startMark), number.MarkedOffset(endMark) - number.MarkedOffset(startMark), EFindItemSearchPhoneNumberBin ); } } } return (iFoundItems->Count() > 0); }
/** Search algorithm for searching generic URIs @param aText Text that will be parsed @return ETrue if any generic URI is found else returns EFalse @leave KErrNone, if successful; otherwise one of the other system-wide error codes. */ TBool CTulAddressStringTokenizer::SearchGenericUriL( const TDesC& aText ) { // Detect generic URI within the token const TDesC& schemeStartArray = KURISchemeStartCharacters; const TDesC& schemeBodyArray = KURISchemeBodyCharacters; const TDesC& schemeTerminatorArray = KURISchemeTerminator; const TDesC& URIArray = KURICharacters; TBool wasValidUri = EFalse; TLex text = aText; while ( !text.Eos() ) { // Discard characters until URI scheme terminator is found while( !(text.Eos()) && schemeTerminatorArray.Locate(text.Peek()) == KErrNotFound ) text.Inc(); // if at end of the text, no legit URI found if ( !text.Eos() ) { // Store the schema end offset (+1 to skip ':') TInt schemeEndOffset = text.Offset() + 1; // Scheme must be at least 1 character long at the beginning of the text to be valid if ( text.Offset() > 0 ) { // Un-get last scheme character to begin examination text.UnGet(); // Rewind until beginning of the URI while ( text.Offset() > 0 && schemeBodyArray.Locate(text.Peek().GetLowerCase()) != KErrNotFound ) text.UnGet(); // Now text pointer is at first character of the URI // Do go back through the scheme until a legal beginning character for URI // is found or back to the (schemeEndOffset - 1) i.e. URI scheme terminator while ( schemeStartArray.Locate(text.Peek().GetLowerCase()) == KErrNotFound && (text.Offset() + 1) < schemeEndOffset ) text.Inc(); // check if terminated because a valid start character was found when // scheme terminator was reached. if ( schemeStartArray.Locate(text.Peek().GetLowerCase()) != KErrNotFound ) { // First character is a valid URI char, so the scheme is valid -> // marks the beginning of the array text.Mark(); // fast forward to the end of the scheme while( text.Offset() < schemeEndOffset ) text.Inc(); // Get characters until end of schema while( !(text.Eos()) && URIArray.Locate( text.Peek().GetLowerCase() ) != KErrNotFound ) text.Inc(); // remove certain punctuation from end of the URI, as it is likely // to be part of the surrounding text. text.UnGet(); //special processing for bracket //only remove the end bracket if there is no open bracket in the uri //not counting bracket pairs for efficiency if (text.Peek()!=')' || text.MarkedToken().Locate(TChar('('))!=-1) text.Inc(); text.UnGet(); if ( text.Peek() != '.' && text.Peek() != '?' && text.Peek() != ',') text.Inc(); // URI cannot contain only scheme, so check that pointer was increased // by at least one character if ( schemeEndOffset != text.Offset() ) { // Append found text to item array (it is now known to be // syntactically valid URI as it contains characters after the scheme) AddItemL( text.MarkedOffset(), text.Offset() - text.MarkedOffset(), EFindItemSearchScheme ); wasValidUri = ETrue; } } else // First character of scheme is not legit, fast forward to end of the // scheme anyway to continue search { while( text.Offset() < schemeEndOffset ) text.Inc(); } } else text.Inc(); } } return wasValidUri; }