// returns false on bad mime
bool HttpMime::parse ( char *mime , long mimeLen , Url *url ) {
	// reset locUrl to 0
	m_locUrl.reset();
	// return if we have no valid complete mime
	if ( mimeLen == 0 ) return false;
	// status is on first line
	m_status = -1;
	// skip HTTP/x.x till we hit a space
	char *p = mime;
	char *pend = mime + mimeLen;
	while ( p < pend && !is_wspace_a(*p) ) p++;
	// then skip over spaces
	while ( p < pend &&  is_wspace_a(*p) ) p++;
	// return false on a problem
	if ( p == pend ) return false;
	// then read in the http status
	m_status = atol2 ( p , pend - p );
	// if no Content-Type: mime field was provided, assume html
	m_contentType = CT_HTML;
	// assume default charset
	m_charset    = NULL;
	m_charsetLen = 0;
	// set contentLen, lastModifiedDate, m_cookie
	p = mime;
	while ( p < pend ) {
		// compute the length of the string starting at p and ending
		// at a \n or \r
		long len = 0;
		while ( &p[len] < pend && p[len]!='\n' && p[len]!='\r' ) len++;
		// . if we could not find a \n or \r there was an error
		// . MIMEs must always end in \n or \r
		if ( &p[len] >= pend ) return false;
		// . stick a NULL at the end of the line 
		// . overwrites \n or \r TEMPORARILY
		char c = p [ len ];
		p [ len ] = '\0';
		// parse out some meaningful data
		if      ( strncasecmp ( p , "Content-Length:" ,15) == 0 ) {
			m_contentLengthPos = p + 15;
			m_contentLen = atol( m_contentLengthPos);
		}
		else if ( strncasecmp ( p , "Last-Modified:"  ,14) == 0 ) {
			m_lastModifiedDate=atotime(p+14);
			// do not let them exceed current time for purposes
			// of sorting by date using datedb (see Msg16.cpp)
			time_t now = time(NULL);
			if (m_lastModifiedDate > now) m_lastModifiedDate = now;
		}
		else if ( strncasecmp ( p , "Content-Type:"   ,13) == 0 ) 
			m_contentType = getContentTypePrivate ( p + 13 );
		else if ( strncasecmp ( p , "Set-Cookie: "   ,11) == 0 ) {
			m_cookie = p + 11;
			m_cookieLen = gbstrlen ( p + 11 );
		}
		else if ( strncasecmp ( p , "Location:"       , 9) == 0 ) {
			// point to it
			char *tt = p + 9;
			// skip if space
			if ( *tt == ' ' ) tt++;
			if ( *tt == ' ' ) tt++;
			// at least set this for Msg13.cpp to use
			m_locationField    = tt;
			m_locationFieldLen = gbstrlen(tt);
			// . we don't add the "www." because of slashdot.com
			// . we skip initial spaces in this Url::set() routine
			if(url)
				m_locUrl.set ( url, p + 9, len - 9,
					       false/*addWWW?*/);
		}
		else if ( strncasecmp ( p , "Content-Encoding:", 17) == 0 ) {
			//only support gzip now, it doesn't seem like servers
			//implement the other types much
			m_contentEncodingPos = p+17;
			if(strstr(m_contentEncodingPos, "gzip")) {
				m_contentEncoding = ET_GZIP;
			}
			else if(strstr(m_contentEncodingPos, "deflate")) {
				//zlib's compression
				m_contentEncoding = ET_DEFLATE;
			}
		}
		//else if ( strncasecmp ( p, "Cookie:", 7) == 0 )
		//	log (LOG_INFO, "mime: Got Cookie = %s", (p+7));
		// re-insert the character that we replaced with a '\0'
		p [ len ] = c;
		// go to next line
		p += len;
		// skip over the cruft at the end of this line
		while ( p < pend && ( *p=='\r' || *p=='\n' ) ) p++;
	}
	return true;
}				
// . when the Conf::m_proxyIps parm is updated we call this to rebuild
//   s_iptab, our table of SpiderProxy instances, which has the proxies and 
//   their performance statistics.
// . we try to maintain stats of ip/ports that did NOT change when rebuilding.
bool buildProxyTable ( ) {

	// scan the NEW list of proxy ip/port pairs in g_conf
	char *p = g_conf.m_proxyIps.getBufStart();

	HashTableX tmptab;
	tmptab.set(8,0,16,NULL,0,false,"tmptab");

	// scan the user inputted space-separated list of ip:ports
	// (optional username:password@ip:port)
	for ( ; *p ; ) {
		// skip white space
		if ( is_wspace_a(*p) ) { p++; continue; }

		// skip http://
		if ( strncasecmp(p,"http://",7) == 0 ) { p += 7; continue; }

		// scan in an ip:port
		char *s = p; char *portStr = NULL;
		int32_t dc = 0, pc = 0, gc = 0, bc = 0;
		const char *msg;

		char *usernamePwd = NULL;
		int32_t usernamePwdLen = 0;
		char *ipStart = p;

		// scan all characters until we hit \0 or another whitespace
		for ( ; *s && !is_wspace_a(*s); s++) {

			if ( *s == '@' ) {
				// must be username:pwd
				if ( pc != 1 ) {
					msg = "bad username:password";
					goto hadError;
				}
				usernamePwd = p;
				usernamePwdLen = s - p;
				if ( usernamePwdLen >= MAXUSERNAMEPWD-2 ) {
					msg = "username:password too long";
					goto hadError;
				}
				dc = 0;
				gc = 0;
				bc = 0;
				pc = 0;
				portStr = NULL;
				ipStart = s+1;
				continue;
			}

			if ( *s == '.' ) { dc++; continue; }
			if ( *s == ':' ) { portStr=s; pc++; continue; }
			if ( is_digit(*s) ) { gc++; continue; }
			bc++;
			continue;
		}
		// ensure it is a legit ip:port combo
		msg = NULL;
		if ( gc < 4 ) 
			msg = "not enough digits for an ip";
		if ( pc > 1 )
			msg = "too many colons";
		if ( dc != 3 )
			msg = "need 3 dots for an ip address";
		if ( bc )
			msg = "got illegal char in ip:port listing";
		if ( msg ) {
		hadError:
			char c = *s;
			*s = '\0';
			log("buf: %s for %s",msg,p);
			*s = c;
			return false;
		}

		// convert it
		int32_t iplen = s - ipStart;
		if ( portStr ) iplen = portStr - ipStart;
		int32_t ip = atoip(ipStart,iplen);
		// another sanity check
		if ( ip == 0 || ip == -1 ) {
			log("spider: got bad proxy ip for %s",p);
			return false;
		}

		// and the port default is 80
		int32_t port = 80;
		if ( portStr ) port = atol2(portStr+1,s-portStr-1);
		if ( port < 0 || port > 65535 ) {
			log("spider: got bad proxy port for %s",p);
			return false;
		}


		// . we got a legit ip:port
		// . see if already in our table
		uint64_t ipKey = (uint32_t)ip;
		ipKey <<= 16;
		ipKey |= (uint16_t)(port & 0xffff);

		// also store into tmptable to see what we need to remove
		tmptab.addKey(&ipKey);

		// see if in table
		int32_t islot = s_iptab.getSlot( &ipKey);

		// advance p
		p = s;

		// if in there, keep it as is
		if ( islot >= 0 ) continue;

		// otherwise add new entry
		SpiderProxy newThing;
		memset ( &newThing , 0 , sizeof(SpiderProxy));
		newThing.m_ip = ip;
		newThing.m_port = port;
		newThing.m_lastDownloadTookMS = -1;
		newThing.m_lastSuccessfulTestMS = -1;

		gbmemcpy(newThing.m_usernamePwd,usernamePwd,usernamePwdLen);
		// ensure it is NULL terminated
		newThing.m_usernamePwd[usernamePwdLen] = '\0';

		if ( ! s_iptab.addKey ( &ipKey, &newThing ) )
			return false;
	}		

 redo:
	int32_t removed = 0;
	// scan all SpiderProxies in tmptab
	for ( int32_t i = 0 ; i < s_iptab.getNumSlots() ; i++ ) {
		// skip empty buckets in hashtable s_iptab
		if ( ! s_iptab.m_flags[i] ) continue;
		// get the key
		int64_t key = *(int64_t *)s_iptab.getKeyFromSlot(i);
		// must also exist in tmptab, otherwise it got removed by user
		if ( tmptab.isInTable ( &key ) ) continue;
		// skip if not in table
		if ( s_iptab.getSlot ( &key ) < 0 ) {
			log("sproxy: iptable hashing messed up");
			continue;
		}
		// shoot, it got removed. not in the new list of ip:ports
		s_iptab.removeKey ( &key );
		removed++;
		// hashtable is messed up now, start over
		//goto redo;
	}
	if ( removed ) goto redo;
	return true;
}
// returns false on bad mime
bool HttpMime::parse(char *mime, int32_t mimeLen, Url *url) {
#ifdef _VALGRIND_
	VALGRIND_CHECK_MEM_IS_DEFINED(mime,mimeLen);
#endif
	// reset locUrl to 0
	m_locUrl.reset();

	// return if we have no valid complete mime
	if (mimeLen == 0) {
		return false;
	}

	// status is on first line
	m_status = -1;

	// skip HTTP/x.x till we hit a space
	char *p = mime;
	char *pend = mime + mimeLen;
	while (p < pend && !is_wspace_a(*p)) p++;
	// then skip over spaces
	while (p < pend && is_wspace_a(*p)) p++;
	// return false on a problem
	if (p == pend) return false;
	// then read in the http status
	m_status = atol2(p, pend - p);
	// if no Content-Type: mime field was provided, assume html
	m_contentType = CT_HTML;
	// assume default charset
	m_charset = NULL;
	m_charsetLen = 0;

	// skip over first line
	getNextLine();

	while (getNextLine()) {
		const char *field = NULL;
		size_t fieldLen = 0;

		if (getField(&field, &fieldLen)) {
			if (parseContentEncoding(field, fieldLen)) {
				continue;
			}

			if (parseContentLength(field, fieldLen)) {
				continue;
			}

			if (parseContentType(field, fieldLen)) {
				continue;
			}

			if (parseLocation(field, fieldLen, url)) {
				continue;
			}

			if (parseSetCookie(field, fieldLen)) {
				continue;
			}

			// add parsing of other header here
		}
	}

	return true;
}