class Bodypart * Message::bodypart( const EString & s, bool create ) { uint b = 0; Bodypart * bp = 0; while ( b < s.length() ) { uint e = b; while ( s[e] >= '0' && s[e] <= '9' ) e++; if ( e < s.length() && s[e] != '.' ) return 0; bool inrange = false; uint n = s.mid( b, e-b ).number( &inrange ); b = e + 1; if ( !inrange || n == 0 ) return 0; List<Bodypart> * c = children(); if ( bp ) c = bp->children(); List<Bodypart>::Iterator i( c ); while ( i && i->number() < n ) ++i; if ( i && i->number() == n ) { if ( n == 1 && !i->header() ) { // it's possible that i doesn't have a header of its // own, and that the parent message's header functions // as such. link it in if that's the case. Header * h = header(); if ( bp && bp->message() ) h = bp->message()->header(); if ( h && ( !h->contentType() || h->contentType()->type() != "multipart" ) ) i->setHeader( h ); } bp = i; } else if ( create ) { Bodypart * child = 0; if ( bp ) child = new Bodypart( n, bp ); else child = new Bodypart( n, this ); c->insert( i, child ); bp = child; } else { return 0; } } return bp; }
Bodypart * Bodypart::parseBodypart( uint start, uint end, const EString & rfc2822, Header * h, Multipart * parent ) { if ( rfc2822[start] == 13 ) start++; if ( rfc2822[start] == 10 ) start++; Bodypart * bp = new Bodypart; bp->setParent( parent ); bp->setHeader( h ); EString body; if ( end > start ) body = rfc2822.mid( start, end-start ); if ( !body.contains( '=' ) ) { // sometimes people send c-t-e: q-p _and_ c-t-e: 7bit or 8bit. // if they are equivalent we can accept it. uint i = 0; bool any = false; HeaderField * f = 0; while ( (f=h->field(HeaderField::ContentTransferEncoding,i)) != 0 ) { if ( ((ContentTransferEncoding*)f)->encoding() == EString::QP ) any = true; i++; } if ( any && i > 1 ) h->removeField( HeaderField::ContentTransferEncoding ); } EString::Encoding e = EString::Binary; ContentTransferEncoding * cte = h->contentTransferEncoding(); if ( cte ) e = cte->encoding(); if ( !body.isEmpty() ) { if ( e == EString::Base64 || e == EString::Uuencode ) body = body.decoded( e ); else body = body.crlf().decoded( e ); } ContentType * ct = h->contentType(); if ( !ct ) { switch ( h->defaultType() ) { case Header::TextPlain: h->add( "Content-Type", "text/plain" ); break; case Header::MessageRfc822: h->add( "Content-Type", "message/rfc822" ); break; } ct = h->contentType(); } if ( ct->type() == "text" ) { bool specified = false; bool unknown = false; Codec * c = 0; if ( ct ) { EString csn = ct->parameter( "charset" ); if ( csn.lower() == "default" ) csn = ""; if ( !csn.isEmpty() ) specified = true; c = Codec::byName( csn ); if ( !c ) unknown = true; if ( c && c->name().lower() == "us-ascii" ) { // Some MTAs appear to say this in case there is no // Content-Type field - without checking whether the // body actually is ASCII. If it isn't, we'd better // call our charset guesser. (void)c->toUnicode( body ); if ( !c->valid() ) specified = false; // Not pretty. } } if ( !c ) c = new AsciiCodec; bp->d->hasText = true; bp->d->text = c->toUnicode( body.crlf() ); if ( c->name() == "GB2312" || c->name() == "ISO-2022-JP" || c->name() == "KS_C_5601-1987" ) { // undefined code point usage in GB2312 spam is much too // common. (GB2312 spam is much too common, but that's // another matter.) Gb2312Codec turns all undefined code // points into U+FFFD, so here, we can take the unicode // form and say it's the canonical form. when a client // later reads the message, it gets the text in unicode, // including U+FFFD. bool bad = !c->valid(); // the header may contain some unencoded gb2312. we bang // it by hand, ignoring errors. List<HeaderField>::Iterator hf( h->fields() ); while ( hf ) { if ( !hf->valid() && hf->type() == HeaderField::Subject ) { // is it right to bang only Subject? c->reset(); hf->setValue( c->toUnicode( hf->unparsedValue() ) ); } ++hf; } // if the body was bad, we prefer the (unicode) in // bp->d->text and pretend it arrived as UTF-8: if ( bad ) { c = new Utf8Codec; body = c->fromUnicode( bp->d->text ); } } if ( ( !specified && ( !c->wellformed() || ct->subtype() == "html" ) ) || ( specified && ( !c->valid() ) ) ) { Codec * g = 0; if ( ct->subtype() == "html" ) g = guessHtmlCodec( body ); else g = guessTextCodec( body ); UString guessed; if ( g ) guessed = g->toUnicode( body.crlf() ); if ( !g ) { // if we couldn't guess anything, keep what we had if // it's valid or explicitly specified, else use // unknown-8bit. if ( !specified && !c->valid() ) { c = new Unknown8BitCodec; bp->d->text = c->toUnicode( body.crlf() ); } } else { // if we could guess something, is our guess better // than what we had? if ( g->wellformed() && !c->wellformed() ) { c = g; bp->d->text = guessed; } } } if ( specified && c->state() == Codec::Invalid ) { // the codec was specified, and the specified codec // resulted in an error, but did not abort conversion. we // respond by forgetting the error, using the conversion // result (probably including one or more U+FFFD) and // labelling the message as UTF-8. c = new Utf8Codec; body = c->fromUnicode( bp->d->text ); } else if ( !specified && c->state() == Codec::Invalid ) { // the codec was not specified, and we couldn't find // anything. we call it unknown-8bit. c = new Unknown8BitCodec; bp->d->text = c->toUnicode( body ); } // if we ended up using a 16-bit codec and were using q-p, we // need to reevaluate without any trailing CRLF if ( e == EString::QP && c->name().startsWith( "UTF-16" ) ) bp->d->text = c->toUnicode( body.stripCRLF() ); if ( !c->valid() && bp->d->error.isEmpty() ) { bp->d->error = "Could not convert body to Unicode"; if ( specified ) { EString cs; if ( ct ) cs = ct->parameter( "charset" ); if ( cs.isEmpty() ) cs = c->name(); bp->d->error.append( " from " + cs ); } if ( specified && unknown ) bp->d->error.append( ": Character set not implemented" ); else if ( !c->error().isEmpty() ) bp->d->error.append( ": " + c->error() ); } if ( c->name().lower() != "us-ascii" ) ct->addParameter( "charset", c->name().lower() ); else if ( ct ) ct->removeParameter( "charset" ); body = c->fromUnicode( bp->d->text ); bool qp = body.needsQP(); if ( cte ) { if ( !qp ) { h->removeField( HeaderField::ContentTransferEncoding ); cte = 0; } else if ( cte->encoding() != EString::QP ) { cte->setEncoding( EString::QP ); } } else if ( qp ) { h->add( "Content-Transfer-Encoding", "quoted-printable" ); cte = h->contentTransferEncoding(); } } else { bp->d->data = body; if ( ct->type() != "multipart" && ct->type() != "message" ) { e = EString::Base64; // there may be exceptions. cases where some format really // needs another content-transfer-encoding: if ( ct->type() == "application" && ct->subtype().startsWith( "pgp-" ) && !body.needsQP() ) { // seems some PGP things need "Version: 1" unencoded e = EString::Binary; } else if ( ct->type() == "application" && ct->subtype() == "octet-stream" && body.contains( "BEGIN PGP MESSAGE" ) ) { // mutt cannot handle PGP in base64 (what a crock) e = EString::Binary; } // change c-t-e to match the encoding decided above if ( e == EString::Binary ) { h->removeField( HeaderField::ContentTransferEncoding ); cte = 0; } else if ( cte ) { cte->setEncoding( e ); } else { h->add( "Content-Transfer-Encoding", "base64" ); cte = h->contentTransferEncoding(); } } } if ( ct->type() == "multipart" ) { parseMultipart( start, end, rfc2822, ct->parameter( "boundary" ), ct->subtype() == "digest", bp->children(), bp, false ); } else if ( ct->type() == "message" && ct->subtype() == "rfc822" ) { // There are sometimes blank lines before the message. while ( rfc2822[start] == 13 || rfc2822[start] == 10 ) start++; Message * m = new Message; m->setParent( bp ); m->parse( rfc2822.mid( start, end-start ) ); List<Bodypart>::Iterator it( m->children() ); while ( it ) { bp->children()->append( it ); it->setParent( bp ); ++it; } bp->setMessage( m ); body = m->rfc822( false ); } bp->d->numBytes = body.length(); if ( cte ) body = body.encoded( cte->encoding(), 72 ); bp->d->numEncodedBytes = body.length(); if ( bp->d->hasText || ( ct->type() == "message" && ct->subtype() == "rfc822" ) ) { uint n = 0; uint i = 0; uint l = body.length(); while ( i < l ) { if ( body[i] == '\n' ) n++; i++; } if ( l && body[l-1] != '\n' ) n++; bp->setNumEncodedLines( n ); } h->simplify(); return bp; }
void SieveCommand::parse( const EString & previous ) { if ( identifier().isEmpty() ) setError( "Command name is empty" ); bool test = false; bool blk = false; EString i = identifier(); if ( i == "if" || i == "elsif" ) { test = true; blk = true; if ( i == "elsif" && previous != "if" && previous != "elsif" ) setError( "elsif is only permitted after if/elsif" ); } else if ( i == "else" ) { blk = true; if ( previous != "if" && previous != "elsif" ) setError( "else is only permitted after if/elsif" ); } else if ( i == "require" ) { arguments()->numberRemainingArguments(); UStringList::Iterator i( arguments()->takeStringList( 1 ) ); EStringList a; EStringList e; while ( i ) { if ( supportedExtensions()->contains( i->ascii() ) ) a.append( i->ascii().quoted() ); else e.append( i->ascii().quoted() ); ++i; } if ( !e.isEmpty() ) setError( "Each string must be a supported " "sieve extension. " "These are not: " + e.join( ", " ) ); if ( !d->require ) setError( "require is only permitted as the first command." ); else if ( parent() ) parent()->addExtensions( &a ); } else if ( i == "stop" ) { // nothing needed } else if ( i == "reject" ) { require( "reject" ); if ( arguments()->arguments()->isEmpty() ) { // we accept reject without reason } else { // if there is an argument, it must be a string arguments()->numberRemainingArguments(); (void)arguments()->takeString( 1 ); } } else if ( i == "ereject" ) { require( "reject" ); arguments()->numberRemainingArguments(); (void)arguments()->takeString( 1 ); } else if ( i == "fileinto" ) { require( "fileinto" ); if ( arguments()->findTag( ":copy" ) ) require( "copy" ); if ( arguments()->findTag( ":flags" ) ) { require( "imap4flags" ); (void)arguments()->takeTaggedStringList( ":copy" ); } arguments()->numberRemainingArguments(); UString mailbox = arguments()->takeString( 1 ); UString p; p.append( "/" ); p.append( mailbox ); if ( !Mailbox::validName( mailbox ) && !Mailbox::validName( p ) ) { setError( "Expected mailbox name, but got: " + mailbox.utf8() ); } else if ( mailbox.startsWith( "INBOX." ) ) { // a sieve script which wants to reference a // mailbox called INBOX.X must use lower case // (inbox.x). UString aox = UStringList::split( '.', mailbox.mid( 6 ) )->join( "/" ); setError( mailbox.utf8().quoted() + " is Cyrus syntax. Archiveopteryx uses " + aox.utf8().quoted() ); } } else if ( i == "redirect" ) { (void)arguments()->findTag( ":copy" ); arguments()->numberRemainingArguments(); EString s = arguments()->takeString( 1 ).utf8(); AddressParser ap( s ); ap.assertSingleAddress(); if ( !ap.error().isEmpty() ) setError( "Expected one normal address (local@domain), but got: " + s ); } else if ( i == "keep" ) { // nothing needed } else if ( i == "discard" ) { // nothing needed } else if ( i == "vacation" ) { // vacation [":days" number] [":subject" string] // [":from" string] [":addresses" string-list] // [":mime"] [":handle" string] <reason: string> require( "vacation" ); // :days uint days = 7; if ( arguments()->findTag( ":days" ) ) days = arguments()->takeTaggedNumber( ":days" ); if ( days < 1 || days > 365 ) arguments()->tagError( ":days", "Number must be 1..365" ); // :subject (void)arguments()->takeTaggedString( ":subject" ); // anything is acceptable, right? // :from if ( arguments()->findTag( ":from" ) ) { parseAsAddress( arguments()->takeTaggedString( ":from" ), ":from" ); // XXX we don't enforce its being a local address. } // :addresses if ( arguments()->findTag( ":addresses" ) ) { UStringList * addresses = arguments()->takeTaggedStringList( ":addresses" ); UStringList::Iterator i( addresses ); while ( i ) { parseAsAddress( *i, ":addresses" ); ++i; } } // :mime bool mime = false; if ( arguments()->findTag( ":mime" ) ) mime = true; // :handle (void)arguments()->takeTaggedString( ":handle" ); // reason arguments()->numberRemainingArguments(); UString reason = arguments()->takeString( 1 ); if ( mime ) { if ( !reason.isAscii() ) setError( ":mime bodies must be all-ASCII, " "8-bit text is not permitted" ); // so says the RFC EString x = reason.utf8(); uint i = 0; Header * h = Message::parseHeader( i, x.length(), x, Header::Mime ); Bodypart * bp = Bodypart::parseBodypart( i, x.length(), x, h, 0 ); if ( !h->error().isEmpty() ) setError( "While parsing MIME header: " + h->error() ); else if ( !bp->error().isEmpty() ) setError( "While parsing MIME bodypart: " + bp->error() ); List<HeaderField>::Iterator f( h->fields() ); while ( f ) { if ( !f->name().startsWith( "Content-" ) ) setError( "Header field not permitted: " + f->name() ); ++f; } if ( bp->children()->isEmpty() && bp->text().isEmpty() ) setError( "Vacation reply does not contain any text" ); } else { if ( reason.isEmpty() ) setError( "Empty vacation text does not make sense" ); } } else if ( i == "setflag" || i == "addflags" || i == "removeflag" ) { arguments()->numberRemainingArguments(); (void)arguments()->takeStringList( 1 ); } else if ( i == "notify" ) { require( "enotify" ); UString from; if ( arguments()->findTag( ":from" )) from = arguments()->takeTaggedString( ":from" ); UString importance; importance.append( "2" ); if ( arguments()->findTag( ":importance" ) ) importance = arguments()->takeTaggedString( ":from" ); uint c = importance[0]; if ( c < '1' || c > '3' ) arguments()->tagError( ":importance", "Importance must be 1, 2 or 3" ); UStringList * options; if ( arguments()->findTag( ":options" ) ) options = arguments()->takeTaggedStringList( ":options" ); UString message; if ( arguments()->findTag( ":message" ) ) message = arguments()->takeTaggedString( ":message" ); arguments()->numberRemainingArguments(); UString method = arguments()->takeString( 1 ); SieveNotifyMethod * m = new SieveNotifyMethod( method, arguments()->takeArgument( 1 ), this ); if ( m->valid() ) { if ( arguments()->findTag( ":from" ) ) m->setFrom( from, arguments()->findTag( ":from" ) ); if ( arguments()->findTag( ":message" ) ) m->setMessage( message, arguments()->findTag( ":message" ) ); } } else { setError( "Command unknown: " + identifier() ); } arguments()->flagUnparsedAsBad(); if ( test ) { // we must have a test if ( !arguments() || arguments()->tests()->count() != 1 ) setError( "Command " + identifier() + " requires one test" ); if ( arguments() ) { List<SieveTest>::Iterator i( arguments()->tests() ); while ( i ) { i->parse(); if ( blk && block() ) { if ( i->ihaveFailed() ) block()->setIhaveFailed(); else block()->addExtensions( i->addedExtensions() ); } ++i; } } } else { // we cannot have a test if ( arguments() && arguments()->tests()->isEmpty() ) { List<SieveTest>::Iterator i( arguments()->tests() ); while ( i ) { i->setError( "Command " + identifier() + " does not use tests" ); ++i; } } } if ( blk ) { // we must have a subsidiary block if ( !block() ) { setError( "Command " + identifier() + " requires a subsidiary {..} block" ); } else { EString prev; List<SieveCommand>::Iterator i( block()->commands() ); while ( i ) { i->parse( prev ); prev = i->identifier(); ++i; } } } else { // we cannot have a subsidiary block if ( block() ) block()->setError( "Command " + identifier() + " does not use a subsidiary command block" ); // in this case we don't even bother syntax-checking the test // or block } }