// . parse an incoming request // . return false and set g_errno on error // . CAUTION: we destroy "req" by replacing it's last char with a \0 // . last char must be \n or \r for it to be a proper request anyway bool HttpRequest::set ( char *origReq , long origReqLen , TcpSocket *sock ) { // reset number of cgi field terms reset(); if ( ! m_reqBuf.reserve ( origReqLen + 1 ) ) { log("http: failed to copy request: %s",mstrerror(g_errno)); return false; } // copy it to avoid mangling it m_reqBuf.safeMemcpy ( origReq , origReqLen ); // NULL term m_reqBuf.pushChar('\0'); m_reqBufValid = true; // and point to that char *req = m_reqBuf.getBufStart(); long reqLen = m_reqBuf.length() - 1; // save this m_userIP = 0; if ( sock ) m_userIP = sock->m_ip; m_isSSL = 0; if ( sock ) m_isSSL = (bool)sock->m_ssl; // TcpServer should always give us a NULL terminated request if ( req[reqLen] != '\0' ) { char *xx = NULL; *xx = 0; } // how long is the first line, the primary request long i; // for ( i = 0 ; i<reqLen && i<MAX_REQ_LEN && // req[i]!='\n' && req[i]!='\r'; i++); // . now fill up m_buf, used to log the request // . make sure the url was encoded correctly // . we don't want assholes encoding every char so we can't see what // url they are submitting to be spidered/indexed // . also, don't de-code encoded ' ' '+' '?' '=' '&' because that would // change the meaning of the url // . and finally, non-ascii chars that don't display correctly // . this should NULL terminate m_buf, too // . turn this off for now, just try to log a different way // m_bufLen = urlNormCode ( m_buf , MAX_REQ_LEN - 1 , req , i ); // ensure it's big enough to be a valid request if ( reqLen < 5 ) { log("http: got reqlen<5 = %s",req); g_errno = EBADREQUEST; return false; } // or if first line too long //if ( i >= 1024 ) { g_errno = EBADREQUEST; return false; } // get the type, must be GET or HEAD if ( strncmp ( req , "GET " , 4 ) == 0 ) m_requestType = 0; // these means a compressed reply was requested. use by query // compression proxies. else if ( strncmp ( req , "ZET " , 4 ) == 0 ) m_requestType = 0; else if ( strncmp ( req , "HEAD " , 5 ) == 0 ) m_requestType = 1; else if ( strncmp ( req , "POST " , 5 ) == 0 ) m_requestType = 2; else { log("http: got bad request cmd: %s",req); g_errno = EBADREQUEST; return false; } // . NULL terminate the request (a destructive operation!) // . this removes the last \n in the trailing \r\n // . shit, but it f***s up POST requests if ( m_requestType != 2 ) { req [ reqLen - 1 ] = '\0'; reqLen--; } // POST requests can be absolutely huge if you are injecting a 100MB // file, so limit our strstrs to the end of the mime char *d = NULL; char dc; // check for body if it was a POST request if ( m_requestType == 2 ) { d = strstr ( req , "\r\n\r\n" ); if ( d ) { dc = *d; *d = '\0'; } else log("http: Got POST request without \\r\\n\\r\\n."); } // . point to the file path // . skip over the "GET " long filenameStart = 4 ; // skip over extra char if it's a "HEAD " request if ( m_requestType == 1 || m_requestType == 2 ) filenameStart++; // are we a redirect? i = filenameStart; m_redirLen = 0; if ( strncmp ( &req[i] , "/?redir=" , 8 ) == 0 ) { for ( long k = i+8; k<reqLen && m_redirLen<126 ; k++) { if ( req[k] == '\r' ) break; if ( req[k] == '\n' ) break; if ( req[k] == '\t' ) break; if ( req[k] == ' ' ) break; m_redir[m_redirLen++] = req[k]; } } m_redir[m_redirLen] = '\0'; // find a \n space \r or ? that delimits the filename for ( i = filenameStart ; i < reqLen ; i++ ) { if ( is_wspace_a ( req [ i ] ) ) break; if ( req [ i ] == '?' ) break; } // now calc the filename length m_filenameLen = i - filenameStart; // return false and set g_errno if it's 0 if ( m_filenameLen <= 0 ) { log("http: got filenameLen<=0: %s",req); g_errno = EBADREQUEST; return false; } // . bitch if too big // . leave room for strcatting "index.html" below if ( m_filenameLen >= MAX_HTTP_FILENAME_LEN - 10 ) { log("http: got filenameLen>=max"); g_errno = EBADREQUEST; return false; } // . decode the filename into m_filename and reassign it's length // . decode %2F to / , etc... m_filenameLen = urlDecode(m_filename,req+filenameStart,m_filenameLen); // NULL terminate m_filename m_filename [ m_filenameLen ] = '\0'; // does it have a file extension AFTER the last / in the filename? bool hasExtension = false; for ( long j = m_filenameLen-1 ; j >= 0 ; j-- ) { if ( m_filename[j] == '.' ) { hasExtension = true; break; } if ( m_filename[j] == '/' ) break; } // if it has no file extension append a /index.html if ( ! hasExtension && m_filename [ m_filenameLen - 1 ] == '/' ) { strcat ( m_filename , "index.html" ); m_filenameLen = gbstrlen ( m_filename ); } // set file offset/size defaults m_fileOffset = 0; // -1 means ALL the file from m_fileOffset onwards m_fileSize = -1; // "e" points to where the range actually starts, if any //char *e; // . TODO: speed up by doing one strstr for Range: and maybe range: // . do they have a Range: 0-100\n in the mime denoting a partial get? //char *s = strstr ( req ,"Range:bytes=" ); //e = s + 12; // try alternate formats //if ( ! s ) { s = strstr ( req ,"Range: bytes=" ); e = s + 13; } //if ( ! s ) { s = strstr ( req ,"Range: " ); e = s + 7; } // parse out the range if we got one //if ( s ) { // long x = 0; // sscanf ( e ,"%li-%li" , &m_fileOffset , &x ); // // get all file if range's 2nd number is non-existant // if ( x == 0 ) m_fileSize = -1; // else m_fileSize = x - m_fileOffset; // // ensure legitimacy // if ( m_fileOffset < 0 ) m_fileOffset = 0; //} // reset our hostname m_hostLen = 0; // assume request is NOT from local network //m_isAdmin = false; m_isLocal = false; // get the virtual hostname they want to use char *s = strstr ( req ,"Host:" ); // try alternate formats if ( ! s ) s = strstr ( req , "host:" ); // must be on its own line, otherwise it's not valid if ( s && s > req && *(s-1) !='\n' ) s = NULL; // parse out the host if we got one if ( s ) { // skip field name, host: s += 5; // skip e to beginning of the host name after "host:" while ( *s==' ' || *s=='\t' ) s++; // find end of the host name char *end = s; while ( *end && !is_wspace_a(*end) ) end++; // . now *end should be \0, \n, \r, ' ', ... // . get host len m_hostLen = end - s; // truncate if too big if ( m_hostLen >= 255 ) m_hostLen = 254; // copy into hostname memcpy ( m_host , s , m_hostLen ); } // NULL terminate it m_host [ m_hostLen ] = '\0'; // get Referer: field s = strstr ( req ,"Referer:" ); // find another if ( ! s ) s = strstr ( req ,"referer:" ); // must be on its own line, otherwise it's not valid if ( s && s > req && *(s-1) !='\n' ) s = NULL; // assume no referer m_refLen = 0; // parse out the referer if we got one if ( s ) { // skip field name, referer: s += 8; // skip e to beginning of the host name after ':' while ( *s==' ' || *s=='\t' ) s++; // find end of the host name char *end = s; while ( *end && !is_wspace_a(*end) ) end++; // . now *end should be \0, \n, \r, ' ', ... // . get len m_refLen = end - s; // truncate if too big if ( m_refLen >= 255 ) m_refLen = 254; // copy into m_ref memcpy ( m_ref , s , m_refLen ); } // NULL terminate it m_ref [ m_refLen ] = '\0'; // get User-Agent: field s = strstr ( req ,"User-Agent:" ); // find another if ( ! s ) s = strstr ( req ,"user-agent:" ); // must be on its own line, otherwise it's not valid if ( s && s > req && *(s-1) !='\n' ) s = NULL; // assume empty long len = 0; // parse out the referer if we got one if ( s ) { // skip field name, referer: s += 11; // skip e to beginning of the host name after ':' while ( *s==' ' || *s=='\t' ) s++; // find end of the agent name char *end = s; while ( *end && *end!='\n' && *end!='\r' ) end++; // . now *end should be \0, \n, \r, ' ', ... // . get agent len len = end - s; // truncate if too big if ( len > 127 ) len = 127; // copy into m_userAgent memcpy ( m_userAgent , s , len ); } // NULL terminate it m_userAgent [ len ] = '\0'; m_isMSIE = false; if ( strstr ( m_userAgent , "MSIE" ) ) m_isMSIE = true; // get Cookie: field s = strstr ( req, "Cookie:" ); // find another if ( !s ) s = strstr ( req, "cookie:" ); // must be on its own line, otherwise it's not valid if ( s && s > req && *(s-1) != '\n' ) s = NULL; // assume empty // m_cookieBufLen = 0; m_cookiePtr = s; // parse out the cookie if we got one if ( s ) { // skip field name, Cookie: s += 7; // skip s to beginning of cookie after ':' while ( *s == ' ' || *s == '\t' ) s++; // find end of the cookie char *end = s; while ( *end && *end != '\n' && *end != '\r' ) end++; // save length m_cookieLen = end - m_cookiePtr; // get cookie len //m_cookieBufLen = end - s; // trunc if too big //if (m_cookieBufLen > 1023) m_cookieBufLen = 1023; // copy into m_cookieBuf //memcpy(m_cookieBuf, s, m_cookieBufLen); } // NULL terminate it if ( m_cookiePtr ) m_cookiePtr[m_cookieLen] = '\0'; //m_cookieBuf[m_cookieBufLen] = '\0'; // convert every '&' in cookie to a \0 for parsing the fields // for ( long j = 0 ; j < m_cookieBufLen ; j++ ) // if ( m_cookieBuf[j] == '&' ) m_cookieBuf[j] = '\0'; // mark it as cgi if it has a ? bool isCgi = ( req [ i ] == '?' ) ; // reset m_filename length to exclude the ?* stuff if ( isCgi ) { // skip over the '?' i++; // find a space the delmits end of cgi long j; for ( j = i; j < reqLen; j++) if (is_wspace_a(req[j])) break; // now add it if ( ! addCgi ( &req[i] , j-i ) ) return false; // update i i = j; } // . set path ptrs // . the whole /cgi/14.cgi?coll=xxx&..... thang m_path = req + filenameStart; m_plen = i - filenameStart; // we're local if hostname is 192.168.[0|1].y //if ( strncmp(iptoa(sock->m_ip),"192.168.1.",10) == 0) { // m_isAdmin = true; m_isLocal = true; } //if ( strncmp(iptoa(sock->m_ip),"192.168.0.",10) == 0) { // m_isAdmin = true; m_isLocal = true; } //if(strncmp(iptoa(sock->m_ip),"192.168.1.",10) == 0) m_isLocal = true; //if(strncmp(iptoa(sock->m_ip),"192.168.0.",10) == 0) m_isLocal = true; if ( sock && strncmp(iptoa(sock->m_ip),"192.168.",8) == 0) m_isLocal = true; if ( sock && strncmp(iptoa(sock->m_ip),"10.",3) == 0) m_isLocal = true; // steve cook's comcast at home: // if ( sock && strncmp(iptoa(sock->m_ip),"68.35.100.143",13) == 0) // m_isLocal = true; // procog's ip // if ( sock && strncmp(iptoa(sock->m_ip),"216.168.36.21",13) == 0) // m_isLocal = true; // roadrunner ip // if ( sock && strncmp(iptoa(sock->m_ip),"66.162.42.131",13) == 0) // m_isLocal = true; // cnsp ip //if ( sock && strncmp(iptoa(sock->m_ip),"67.130.216.27",13) == 0) // m_isLocal = true; // emily parker //if ( sock && strncmp(iptoa(sock->m_ip),"69.92.68.202",12) == 0) //m_isLocal = true; // 127.0.0.1 if ( sock && sock->m_ip == 16777343 ) m_isLocal = true; // steve cook's webserver //if ( sock && strncmp(iptoa(sock->m_ip),"216.168.36.21",13) == 0) // m_isLocal = true; // . also if we're coming from lenny at my house consider it local // . this is a security risk, however... TODO: FIX!!! //if ( sock->m_ip == atoip ("68.35.105.199" , 13 ) ) m_isAdmin = true; // . TODO: now add any cgi data from a POST..... // . look after the mime //char *d = NULL; // check for body if it was a POST request //if ( m_requestType == 2 ) d = strstr ( req , "\r\n\r\n" ); // now put d's char back, just in case... does it really matter? if ( d ) *d = dc; // return true now if no cgi stuff to parse if ( d ) { char *post = d + 4; long postLen = reqLen-(d+4-req) ; // post sometimes has a \r or\n after it while ( postLen > 0 && post[postLen-1]=='\r' ) postLen--; // add it to m_cgiBuf, filter and everything if ( ! addCgi ( post , postLen ) ) return false; } // sometimes i don't want to be admin //if ( getLong ( "admin" , 1 ) == 0 ) m_isAdmin = false; // success ///// // Handle Extra parms... char *ep = g_conf.m_extraParms; char *epend = g_conf.m_extraParms + g_conf.m_extraParmsLen; char *qstr = m_cgiBuf; long qlen = m_cgiBufLen; while (ep < epend){ char buf[AUTOBAN_TEXT_SIZE]; long bufLen = 0; // get next substring while (*ep && ep < epend && *ep != ' ' && *ep != '\n'){ buf[bufLen++] = *ep++; } // skip whitespace while (*ep && ep < epend && *ep == ' '){ ep++; } // null terminate buf[bufLen] = '\0'; // No match if (!bufLen || !strnstr(qstr, qlen, buf)){ // skip to end of line while (*ep && ep < epend && *ep != '\n') ep++; // skip newline while (*ep && ep < epend && *ep == '\n') ep++; // try next substr continue; } // found a match... // get parm string bufLen = 0; while (*ep && ep < epend && *ep != '\n'){ buf[bufLen++] = *ep++; } buf[bufLen] = '\0'; // skip newline while (*ep && ep < epend && *ep == '\n') ep++; logf(LOG_DEBUG, "query: appending \"%s\" to query", buf); long newSize = m_cgiBuf2Size + bufLen+1; char *newBuf = (char*)mmalloc(newSize, "extraParms"); if (!newBuf){ return log("query: unable to allocate %ld bytes " "for extraParms", newSize); } char *p = newBuf; if (m_cgiBuf2Size) { memcpy(newBuf, m_cgiBuf2, m_cgiBuf2Size); p += m_cgiBuf2Size-1; mfree(m_cgiBuf2, m_cgiBuf2Size, "extraParms"); m_cgiBuf2 = NULL; m_cgiBuf2Size = 0; } memcpy(p, buf, bufLen); m_cgiBuf2 = newBuf; m_cgiBuf2Size = newSize; p += bufLen; *p = '\0'; } // Put '\0' back into the HttpRequest buffer... if (m_cgiBuf){ // do not mangle the "ucontent"! long cgiBufLen = m_cgiBufLen; cgiBufLen -= m_ucontentLen; char *buf = m_cgiBuf; for (long i = 0; i < cgiBufLen ; i++) if (buf[i] == '&') buf[i] = '\0'; // don't decode the ucontent= field! long decodeLen = m_cgiBufLen; // so subtract that if ( m_ucontent ) decodeLen -= m_ucontentLen; // decode everything long len = urlDecode ( m_cgiBuf , m_cgiBuf , decodeLen ); // we're parsing crap after the null if the last parm // has no value //memset(m_cgiBuf+len, '\0', m_cgiBufLen-len); m_cgiBufLen = len; // ensure that is null i guess if ( ! m_ucontent ) m_cgiBuf[len] = '\0'; } if (m_cgiBuf2){ char *buf = m_cgiBuf2; for (long i = 0; i < m_cgiBuf2Size-1 ; i++) if (buf[i] == '&') buf[i] = '\0'; long len = urlDecode ( m_cgiBuf2 , m_cgiBuf2 , m_cgiBuf2Size); memset(m_cgiBuf2+len, '\0', m_cgiBuf2Size-len); } // . parse the fields after the ? in a cgi filename // . or fields in the content if it's a POST // . m_cgiBuf must be and is NULL terminated for this parseFields ( m_cgiBuf , m_cgiBufLen ); // Add extra parms to the request. if (m_cgiBuf2Size){ parseFields(m_cgiBuf2, m_cgiBuf2Size); } // urldecode the cookie buf too!! if ( m_cookiePtr ) { char *p = m_cookiePtr; for (long i = 0; i < m_cookieLen ; i++) { //if (p[i] == '&') p[i] = '\0'; // cookies are separated with ';' in the request only if (p[i] == ';') p[i] = '\0'; // a hack for the metacookie=.... // which uses &'s to separate its subcookies // this is a hack for msie's limit of 50 cookies if ( p[i] == '&' ) p[i] = '\0'; // set m_metaCookie to start of meta cookie if ( p[i] == 'm' && p[i+1] == 'e' && strncmp(p,"metacookie",10) == 0 ) m_metaCookie = p; } long len = urlDecode ( m_cookiePtr , m_cookiePtr, m_cookieLen ); // we're parsing crap after the null if the last parm // has no value memset(m_cookiePtr+len, '\0', m_cookieLen-len); m_cookieLen = len; } return true; }
// . parse an incoming request // . return false and set g_errno on error // . CAUTION: we destroy "req" by replacing it's last char with a \0 // . last char must be \n or \r for it to be a proper request anyway bool HttpRequest::set ( char *origReq , int32_t origReqLen , TcpSocket *sock ) { // reset number of cgi field terms reset(); if ( ! m_reqBuf.reserve ( origReqLen + 1 ) ) { log("http: failed to copy request: %s",mstrerror(g_errno)); return false; } // copy it to avoid mangling it m_reqBuf.safeMemcpy ( origReq , origReqLen ); // NULL term m_reqBuf.pushChar('\0'); m_reqBufValid = true; // and point to that char *req = m_reqBuf.getBufStart(); if( !req ) { log(LOG_ERROR, "http: req is NULL"); g_errno = EBADREQUEST; return false; } int32_t reqLen = m_reqBuf.length() - 1; // save this m_userIP = sock ? sock->m_ip : 0; m_isSSL = sock ? (sock->m_ssl!=NULL) : false; // TcpServer should always give us a NULL terminated request if ( req[reqLen] != '\0' ) { g_process.shutdownAbort(true); } // how long is the first line, the primary request // int32_t i; // for ( i = 0 ; i<reqLen && i<MAX_REQ_LEN && // req[i]!='\n' && req[i]!='\r'; i++); // . now fill up m_buf, used to log the request // . make sure the url was encoded correctly // . we don't want assholes encoding every char so we can't see what // url they are submitting to be spidered/indexed // . also, don't de-code encoded ' ' '+' '?' '=' '&' because that would // change the meaning of the url // . and finally, non-ascii chars that don't display correctly // . this should NULL terminate m_buf, too // . turn this off for now, just try to log a different way // m_bufLen = urlNormCode ( m_buf , MAX_REQ_LEN - 1 , req , i ); // ensure it's big enough to be a valid request if ( reqLen < 5 ) { log(LOG_WARN, "http: got reqlen %" PRId32"<5 = %s",reqLen,req); g_errno = EBADREQUEST; return false; } int32_t cmdLen = 0; // or if first line too long //if ( i >= 1024 ) { g_errno = EBADREQUEST; return false; } // get the type, must be GET or HEAD if ( strncmp ( req , "GET " , 4 ) == 0 ) { m_requestType = RT_GET; cmdLen = 3; } // these means a compressed reply was requested. use by query // compression proxies. else if ( strncmp ( req , "ZET " , 4 ) == 0 ) { m_requestType = RT_GET; cmdLen = 3; } else if ( strncmp ( req , "HEAD " , 5 ) == 0 ) { m_requestType = RT_HEAD; cmdLen = 4; } else if ( strncmp ( req , "POST " , 5 ) == 0 ) { m_requestType = RT_POST; cmdLen = 4; } else if ( strncmp ( req , "CONNECT " , 8 ) == 0 ) { // take this out until it stops losing descriptors and works //m_requestType = RT_CONNECT; //cmdLen = 7; // we no longer insert section info. emmanuel gets section // info when injecting a doc now i think in PageInject.cpp. // we do not proxy https requests because we can't // decrypt the page contents to cache them or to insert // the sectiondb voting markup, so it's kinda pointless... // and i'm not aiming to be a full-fledge squid proxy. log("http: CONNECT request not supported because we " "can't insert section markup and we can't cache: %s",req); g_errno = EBADREQUEST; return false; } else { log("http: got bad request cmd: %s",req); g_errno = EBADREQUEST; return false; } // . NULL terminate the request (a destructive operation!) // . this removes the last \n in the trailing \r\n // . shit, but it f***s up POST requests if ( m_requestType != RT_POST ) { req [ reqLen - 1 ] = '\0'; reqLen--; } // POST requests can be absolutely huge if you are injecting a 100MB // file, so limit our strstrs to the end of the mime char *d = NULL; char dc; // check for body if it was a POST request if ( m_requestType == RT_POST ) { d = strstr ( req , "\r\n\r\n" ); if ( d ) { dc = *d; *d = '\0'; } else log("http: Got POST request without \\r\\n\\r\\n."); } // is it a proxy request? m_isSquidProxyRequest = false; if ( strncmp ( req + cmdLen + 1, "http://" ,7) == 0 || strncmp ( req + cmdLen + 1, "https://",8) == 0 ) { m_isSquidProxyRequest = true; // set url parms for it m_squidProxiedUrl = req + cmdLen + 1; char *p = m_squidProxiedUrl + 7; if ( *p == '/' ) p++; // https:// ? // stop at whitespace or \0 for ( ; *p && ! is_wspace_a(*p) ; p++ ); // that's the length of it m_squidProxiedUrlLen = p - m_squidProxiedUrl; } else if ( m_requestType == RT_CONNECT ) { m_isSquidProxyRequest = true; // set url parms for it m_squidProxiedUrl = req + cmdLen + 1; // usually its like CONNECT diffbot.com:443 char *p = m_squidProxiedUrl; // stop at whitespace or \0 for ( ; *p && ! is_wspace_a(*p) ; p++ ); // that's the length of it m_squidProxiedUrlLen = p - m_squidProxiedUrl; } // check authentication char *auth = NULL; if ( m_isSquidProxyRequest && req ) auth = strstr(req,"Proxy-authorization: Basic "); //if ( m_isSquidProxyRequest && ! auth ) { // log("http: no auth in proxy request %s",req); // g_errno = EBADREQUEST; // return false; //} SafeBuf tmp; if ( auth ) { // find end of it char *p = auth; for ( ; *p && *p != '\r' && *p != '\n' ; p++ ); tmp.base64Decode ( auth , p - auth ); } // assume incorrect username/password bool matched = false; if ( m_isSquidProxyRequest ) { // now try to match in g_conf.m_proxyAuth safebuf of // username:password space-separated list char *p = g_conf.m_proxyAuth.getBufStart(); // loop over those for ( ; p && *p ; ) { // skip initial white space for ( ; *p && is_wspace_a(*p); p++ ); // skip to end of username:password thing char *end = p; for ( ; *end && !is_wspace_a(*end); end++); // save char *start = p; // advance p = end; // this is always a match if ( end-start == 3 && strncmp(start,"*:*",3) == 0 ) { matched = true; break; } // compare now if ( tmp.length() != end-start ) continue; if ( strncmp(tmp.getBufStart(),start,end-start) != 0 ) continue; // we got a match matched = true; break; } } // incorrect username:passwrod? if ( m_isSquidProxyRequest && ! matched ) { log("http: bad username:password in proxy request %s",req); g_errno = EPERMDENIED; return false; } // if proxy request to download a url through us, we are done if ( m_isSquidProxyRequest ) return true; bool multipart = false; if ( m_requestType == 2 ) { // is POST? char *cd ; cd = gb_strcasestr(req,"Content-Type: multipart/form-data"); if ( cd ) multipart = true; } // . point to the file path // . skip over the "GET " int32_t filenameStart = 4 ; // skip over extra char if it's a "HEAD " request if ( m_requestType == RT_HEAD || m_requestType == RT_POST ) filenameStart++; // are we a redirect? int32_t i = filenameStart; m_redirLen = 0; if ( strncmp ( &req[i] , "/?redir=" , 8 ) == 0 ) { for ( int32_t k = i+8; k<reqLen && m_redirLen<126 ; k++) { if ( req[k] == '\r' ) break; if ( req[k] == '\n' ) break; if ( req[k] == '\t' ) break; if ( req[k] == ' ' ) break; m_redir[m_redirLen++] = req[k]; } } m_redir[m_redirLen] = '\0'; // find a \n space \r or ? that delimits the filename for ( i = filenameStart ; i < reqLen ; i++ ) { if ( is_wspace_a ( req [ i ] ) ) break; if ( req [ i ] == '?' ) break; } // now calc the filename length m_filenameLen = i - filenameStart; // return false and set g_errno if it's 0 if ( m_filenameLen <= 0 ) { log("http: got filenameLen<=0: %s",req); g_errno = EBADREQUEST; return false; } // . bitch if too big // . leave room for strcatting "index.html" below if ( m_filenameLen >= MAX_HTTP_FILENAME_LEN - 10 ) { log("http: got filenameLen>=max"); g_errno = EBADREQUEST; return false; } // . decode the filename into m_filename and reassign it's length // . decode %2F to / , etc... m_filenameLen = urlDecode(m_filename,req+filenameStart,m_filenameLen); // NULL terminate m_filename m_filename [ m_filenameLen ] = '\0'; // does it have a file extension AFTER the last / in the filename? bool hasExtension = false; for ( int32_t j = m_filenameLen-1 ; j >= 0 ; j-- ) { if ( m_filename[j] == '.' ) { hasExtension = true; break; } if ( m_filename[j] == '/' ) break; } // if it has no file extension append a /index.html if ( ! hasExtension && m_filename [ m_filenameLen - 1 ] == '/' ) { strcat ( m_filename , "index.html" ); m_filenameLen = strlen ( m_filename ); } // . uses the TcpSocket::m_readBuf // . if *p was ? then keep going m_origUrlRequest = origReq + filenameStart; char *p = origReq + m_filenameLen; for ( ; *p && ! is_wspace_a(*p) ; p++ ); m_origUrlRequestLen = p - m_origUrlRequest; // set file offset/size defaults m_fileOffset = 0; // -1 means ALL the file from m_fileOffset onwards m_fileSize = -1; // "e" points to where the range actually starts, if any //char *e; // . TODO: speed up by doing one strstr for Range: and maybe range: // . do they have a Range: 0-100\n in the mime denoting a partial get? //char *s = strstr ( req ,"Range:bytes=" ); //e = s + 12; // try alternate formats //if ( ! s ) { s = strstr ( req ,"Range: bytes=" ); e = s + 13; } //if ( ! s ) { s = strstr ( req ,"Range: " ); e = s + 7; } // parse out the range if we got one //if ( s ) { // int32_t x = 0; // sscanf ( e ,"%" PRId32"-%" PRId32 , &m_fileOffset , &x ); // // get all file if range's 2nd number is non-existant // if ( x == 0 ) m_fileSize = -1; // else m_fileSize = x - m_fileOffset; // // ensure legitimacy // if ( m_fileOffset < 0 ) m_fileOffset = 0; //} // reset our hostname m_hostLen = 0; // assume request is NOT from local network //m_isMasterAdmin = false; m_isLocal = false; // get the virtual hostname they want to use char *s = strstr ( req ,"Host:" ); // try alternate formats if ( ! s ) s = strstr ( req , "host:" ); // must be on its own line, otherwise it's not valid if ( s && s > req && *(s-1) !='\n' ) s = NULL; // parse out the host if we got one if ( s ) { // skip field name, host: s += 5; // skip e to beginning of the host name after "host:" while ( *s==' ' || *s=='\t' ) s++; // find end of the host name char *end = s; while ( *end && !is_wspace_a(*end) ) end++; // . now *end should be \0, \n, \r, ' ', ... // . get host len m_hostLen = end - s; // truncate if too big if ( m_hostLen >= 255 ) m_hostLen = 254; // copy into hostname gbmemcpy ( m_host , s , m_hostLen ); } // NULL terminate it m_host [ m_hostLen ] = '\0'; // get Referer: field s = strstr ( req ,"Referer:" ); // find another if ( ! s ) s = strstr ( req ,"referer:" ); // must be on its own line, otherwise it's not valid if ( s && s > req && *(s-1) !='\n' ) s = NULL; // assume no referer m_refLen = 0; // parse out the referer if we got one if ( s ) { // skip field name, referer: s += 8; // skip e to beginning of the host name after ':' while ( *s==' ' || *s=='\t' ) s++; // find end of the host name char *end = s; while ( *end && !is_wspace_a(*end) ) end++; // . now *end should be \0, \n, \r, ' ', ... // . get len m_refLen = end - s; // truncate if too big if ( m_refLen >= 255 ) m_refLen = 254; // copy into m_ref gbmemcpy ( m_ref , s , m_refLen ); } // NULL terminate it m_ref [ m_refLen ] = '\0'; // get User-Agent: field s = strstr ( req ,"User-Agent:" ); // find another if ( ! s ) s = strstr ( req ,"user-agent:" ); // must be on its own line, otherwise it's not valid if ( s && s > req && *(s-1) !='\n' ) s = NULL; // assume empty int32_t len = 0; // parse out the referer if we got one if ( s ) { // skip field name, referer: s += 11; // skip e to beginning of the host name after ':' while ( *s==' ' || *s=='\t' ) s++; // find end of the agent name char *end = s; while ( *end && *end!='\n' && *end!='\r' ) end++; // . now *end should be \0, \n, \r, ' ', ... // . get agent len len = end - s; // truncate if too big if ( len > 127 ) len = 127; // copy into m_userAgent gbmemcpy ( m_userAgent , s , len ); } // NULL terminate it m_userAgent [ len ] = '\0'; // get Cookie: field s = strstr ( req, "Cookie:" ); // find another if ( !s ) s = strstr ( req, "cookie:" ); // must be on its own line, otherwise it's not valid if ( s && s > req && *(s-1) != '\n' ) s = NULL; // assume empty // m_cookieBufLen = 0; m_cookiePtr = s; // parse out the cookie if we got one if ( s ) { // skip field name, Cookie: s += 7; // skip s to beginning of cookie after ':' while ( *s == ' ' || *s == '\t' ) s++; // find end of the cookie char *end = s; while ( *end && *end != '\n' && *end != '\r' ) end++; // save length m_cookieLen = end - m_cookiePtr; // get cookie len //m_cookieBufLen = end - s; // trunc if too big //if (m_cookieBufLen > 1023) m_cookieBufLen = 1023; // copy into m_cookieBuf //gbmemcpy(m_cookieBuf, s, m_cookieBufLen); } // NULL terminate it if ( m_cookiePtr ) m_cookiePtr[m_cookieLen] = '\0'; //m_cookieBuf[m_cookieBufLen] = '\0'; // convert every '&' in cookie to a \0 for parsing the fields // for ( int32_t j = 0 ; j < m_cookieBufLen ; j++ ) // if ( m_cookieBuf[j] == '&' ) m_cookieBuf[j] = '\0'; // mark it as cgi if it has a ? bool isCgi = ( req [ i ] == '?' ) ; // reset m_filename length to exclude the ?* stuff if ( isCgi ) { // skip over the '?' i++; // find a space the delmits end of cgi int32_t j; for ( j = i; j < reqLen; j++) if (is_wspace_a(req[j])) break; // now add it if ( ! addCgi ( &req[i] , j-i ) ) return false; // update i i = j; } // . set path ptrs // . the whole /cgi/14.cgi?coll=xxx&..... thang m_path = req + filenameStart; m_plen = i - filenameStart; // we're local if hostname is 192.168.[0|1].y //if ( strncmp(iptoa(sock->m_ip),"192.168.1.",10) == 0) { // m_isMasterAdmin = true; m_isLocal = true; } //if ( strncmp(iptoa(sock->m_ip),"192.168.0.",10) == 0) { // m_isMasterAdmin = true; m_isLocal = true; } //if(strncmp(iptoa(sock->m_ip),"192.168.1.",10) == 0) m_isLocal = true; //if(strncmp(iptoa(sock->m_ip),"192.168.0.",10) == 0) m_isLocal = true; if ( sock && strncmp(iptoa(sock->m_ip),"192.168.",8) == 0) m_isLocal = true; if ( sock && strncmp(iptoa(sock->m_ip),"10.",3) == 0) m_isLocal = true; // gotta scan all ips in hosts.conf as well... // if we are coming from any of our own hosts.conf c blocks // consider ourselves local uint32_t last = 0; for ( int32_t i = 0 ; i < g_hostdb.getNumHosts() ; i++ ) { Host *h = g_hostdb.getHost(i); // save time with this check if ( h->m_ip == last ) continue; // update it last = h->m_ip; // returns number of top bytes in comon int32_t nt = sock ? ipCmp ( sock->m_ip , h->m_ip ) : 0; // at least be in the same c-block as a host in hosts.conf if ( nt < 3 ) continue; m_isLocal = true; break; } // connectips/adminips // for ( int32_t i = 0 ; i < g_conf.m_numConnectIps ; i++ ) { // if ( sock->m_ip != g_conf.m_connectIps[i] ) continue; // m_isLocal = true; // break; // } // 127.0.0.1 if ( sock && sock->m_ip == 16777343 ) m_isLocal = true; // . TODO: now add any cgi data from a POST..... // . look after the mime //char *d = NULL; // check for body if it was a POST request //if ( m_requestType == RT_POST ) d = strstr ( req , "\r\n\r\n" ); // return true now if no cgi stuff to parse if ( d ) { // now put d's char back, just in case... does it really matter? *d = dc; char *post = d + 4; int32_t postLen = reqLen-(d+4-req) ; // post sometimes has a \r or\n after it while ( postLen > 0 && post[postLen-1]=='\r' ) postLen--; // add it to m_cgiBuf, filter and everything if ( ! addCgi ( post , postLen ) ) return false; } // Put '\0' back into the HttpRequest buffer... // crap, not if we are multi-part unencoded stuff... if ( m_cgiBuf && ! multipart ) { // do not mangle the "ucontent"! int32_t cgiBufLen = m_cgiBufLen; cgiBufLen -= m_ucontentLen; char *buf = m_cgiBuf; for (int32_t i = 0; i < cgiBufLen ; i++) if (buf[i] == '&') buf[i] = '\0'; // don't decode the ucontent= field! int32_t decodeLen = m_cgiBufLen; // so subtract that if ( m_ucontent ) decodeLen -= m_ucontentLen; // decode everything. fixed for %00 in &content= so it // doesn't set our parms when injecting. int32_t len = urlDecodeNoZeroes(m_cgiBuf,m_cgiBuf,decodeLen); // we're parsing crap after the null if the last parm // has no value //memset(m_cgiBuf+len, '\0', m_cgiBufLen-len); m_cgiBufLen = len; // ensure that is null i guess if ( ! m_ucontent ) m_cgiBuf[len] = '\0'; } if (m_cgiBuf2){ char *buf = m_cgiBuf2; for (int32_t i = 0; i < m_cgiBuf2Size-1 ; i++) if (buf[i] == '&') buf[i] = '\0'; // decode everything. fixed for %00 in &content= so it // doesn't set our parms when injecting. int32_t len = urlDecodeNoZeroes ( m_cgiBuf2 , m_cgiBuf2 , m_cgiBuf2Size); memset(m_cgiBuf2+len, '\0', m_cgiBuf2Size-len); } // . parse the fields after the ? in a cgi filename // . or fields in the content if it's a POST // . m_cgiBuf must be and is NULL terminated for this parseFields ( m_cgiBuf , m_cgiBufLen ); // Add extra parms to the request. if (m_cgiBuf2Size){ parseFields(m_cgiBuf2, m_cgiBuf2Size); } // urldecode the cookie buf too!! if ( m_cookiePtr ) { char *p = m_cookiePtr; for (int32_t i = 0; i < m_cookieLen ; i++) { //if (p[i] == '&') p[i] = '\0'; // cookies are separated with ';' in the request only if (p[i] == ';') p[i] = '\0'; // a hack for the metacookie=.... // which uses &'s to separate its subcookies // this is a hack for msie's limit of 50 cookies if ( p[i] == '&' ) p[i] = '\0'; // set m_metaCookie to start of meta cookie if ( p[i] == 'm' && p[i+1] == 'e' && strncmp(p,"metacookie",10) == 0 ) m_metaCookie = p; } int32_t len = urlDecode ( m_cookiePtr , m_cookiePtr, m_cookieLen ); // we're parsing crap after the null if the last parm // has no value memset(m_cookiePtr+len, '\0', m_cookieLen-len); m_cookieLen = len; } return true; }