C++ (Cpp) Url::getDomain примеры использования

Язык программирования: C++ (Cpp)

Класс/Тип: Url

Метод/Функция: getDomain

Примеров на hotexamples.com: 3

C++ (Cpp) Url::getDomain - 3 примера найдено. Это лучшие примеры C++ (Cpp) кода для Url::getDomain, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

getIdentity(30)

getFieldParameter(20)

format(14)

GetHost(9)

GetString(8)

getHost(8)

append(6)

getHostAddress(5)

getDomainLen(5)

getHostLen(4)

getDisplayName(4)

GetScheme(4)

getHostWithPort(4)

getHostPort(3)

getDomain(3)

getEtag(3)

getPageMD5Str(3)

getHostHash32(3)

getIp(2)

empty(2)

SetPort(2)

IsAbsolute(2)

GetPath(2)

CutFilename(2)

getDomainHash32(2)

getMajorErrorCode(1)

getAge(1)

getDomainHash64(1)

getLocation(1)

getHostname(1)

getHostHash64(1)

getFilename(1)

getFilenameLen(1)

getFile(1)

Collapse(1)

get(1)

SetHost(1)

GetFilename(1)

GetFirstPath(1)

GetFragment(1)

GetNextPath(1)

GetRawQuery(1)

GetRoot(1)

GetUrl(1)

IsEmpty(1)

Resource(1)

SetFragment(1)

SetPassword(1)

fullAddr(1)

SetPath(1)

Пример #1

Показать файл

Файл: Titledb.cpp Проект: privacore/open-source-search-engine

// init our rdb
bool Titledb::init ( ) {

	// key sanity tests
	int64_t uh48  = 0x1234567887654321LL & 0x0000ffffffffffffLL;
	int64_t docId = 123456789;
	key96_t k = makeKey(docId,uh48,false);
	if ( getDocId(&k) != docId ) { g_process.shutdownAbort(true);}
	if ( getUrlHash48(&k) != uh48 ) { g_process.shutdownAbort(true);}

	const char *url = "http://.ezinemark.com/int32_t-island-child-custody-attorneys-new-york-visitation-lawyers-melville-legal-custody-law-firm-45f00bbed18.html";
	Url uu;
	uu.set(url);
	const char *d1 = uu.getDomain();
	int32_t  dlen1 = uu.getDomainLen();
	int32_t dlen2 = 0;
	const char *d2 = getDomFast ( url , &dlen2 );
	if ( !d1 || !d2 ) { g_process.shutdownAbort(true); }
	if ( dlen1 != dlen2 ) { g_process.shutdownAbort(true); }

	// another one
	url = "http://ok/";
	uu.set(url);
	const char *d1a = uu.getDomain();
	dlen1 = uu.getDomainLen();
	dlen2 = 0;
	const char *d2a = getDomFast ( url , &dlen2 );
	if ( d1a || d2a ) { g_process.shutdownAbort(true); }
	if ( dlen1 != dlen2 ) { g_process.shutdownAbort(true); }

	// . what's max # of tree nodes?
	// . assume avg TitleRec size (compressed html doc) is about 1k we get:
	// . NOTE: overhead is about 32 bytes per node
	int32_t maxTreeNodes  = g_conf.m_titledbMaxTreeMem / (1*1024);

	// initialize our own internal rdb
	return m_rdb.init ( "titledb"                   ,
			    -1                          , // fixed record size
			    //g_conf.m_titledbMinFilesToMerge ,
			    // this should not really be changed...
			    -1,
			    g_conf.m_titledbMaxTreeMem  ,
			    maxTreeNodes                ,
			    false,                         // half keys?
			    12,             // key size
			    false,          //isCollectionLess
			    false);         //useIndexFile

	// validate
	//return verify ( );
}

Пример #2

Показать файл

Файл: Titledb.cpp Проект: DeadNumbers/open-source-search-engine

// init our rdb
bool Titledb::init ( ) {

	// key sanity tests
	int64_t uh48  = 0x1234567887654321LL & 0x0000ffffffffffffLL;
	int64_t docId = 123456789;
	key_t k = makeKey(docId,uh48,false);
	if ( getDocId(&k) != docId ) { char *xx=NULL;*xx=0;}
	if ( getUrlHash48(&k) != uh48 ) { char *xx=NULL;*xx=0;}

	char *url = "http://.ezinemark.com/int32_t-island-child-custody-attorneys-new-york-visitation-lawyers-melville-legal-custody-law-firm-45f00bbed18.html";
	Url uu;
	uu.set(url);
	char *d1 = uu.getDomain();
	int32_t  dlen1 = uu.getDomainLen();
	int32_t dlen2 = 0;
	char *d2 = getDomFast ( url , &dlen2 );
	if ( dlen1 != dlen2 ) { char *xx=NULL;*xx=0; }
	// another one
	url = "http://ok/";
	uu.set(url);
	d1 = uu.getDomain();
	dlen1 = uu.getDomainLen();
	dlen2 = 0;
	d2 = getDomFast ( url , &dlen2 );
	if ( dlen1 != dlen2 ) { char *xx=NULL;*xx=0; }


	int64_t maxMem = 200000000; // 200MB

	// . what's max # of tree nodes?
	// . assume avg TitleRec size (compressed html doc) is about 1k we get:
	// . NOTE: overhead is about 32 bytes per node
	int32_t maxTreeNodes  = maxMem  / (1*1024);

	// . we now use a disk page cache for titledb as opposed to the
	//   old rec cache. i am trying to do away with the Rdb::m_cache rec
	//   cache in favor of cleverly used disk page caches, because
	//   the rec caches are not real-time and get stale.
	// . just hard-code 30MB for now
	int32_t pcmem    = 30000000; // = g_conf.m_titledbMaxDiskPageCacheMem;
	// f**k that we need all the mem!
	//pcmem = 0;
	// do not use any page cache if doing tmp cluster in order to
	// prevent swapping
	if ( g_hostdb.m_useTmpCluster ) pcmem = 0;
	int32_t pageSize = GB_INDEXDB_PAGE_SIZE;
	// init the page cache
	// . MDW: "minimize disk seeks" not working otherwise i'd enable it!
	if ( ! m_pc.init ( "titledb",
			   RDB_TITLEDB,
			   pcmem    ,
			   pageSize ) )
		return log("db: Titledb init failed.");

	// each entry in the cache is usually just a single record, no lists
	//int32_t maxCacheNodes = g_conf.m_titledbMaxCacheMem / (10*1024);
	// initialize our own internal rdb
	if ( ! m_rdb.init ( g_hostdb.m_dir              ,
			    "titledb"                   ,
			    true                        , // dedup same keys?
			    -1                          , // fixed record size
			    //g_hostdb.m_groupMask          ,
			    //g_hostdb.m_groupId            ,
			    //g_conf.m_titledbMinFilesToMerge , 
			    // this should not really be changed...
			    -1,//3,//230  minfilestomerge mintomerge
			    maxMem, // g_conf.m_titledbMaxTreeMem  ,
			    maxTreeNodes                ,
			    // now we balance so Sync.cpp can ordered huge list
			    true                        , // balance tree?
			    // turn off cache for now because the page cache
			    // is just as fast and does not get out of date
			    // so bad??
			    //0                         ,
			    0,//g_conf.m_titledbMaxCacheMem ,
			    0,//maxCacheNodes               ,
			    false                       ,// half keys?
			    false                       ,// g_conf.m_titledbSav
			    &m_pc               , // page cache ptr
			    true                        ) )// is titledb?
		return false;
	return true;
	// validate
	//return verify ( );
}

Пример #3

Показать файл

Файл: urlinfo.cpp Проект: DeadNumbers/open-source-search-engine

int main ( int argc , char *argv[] ) {
	bool addWWW = true;
	bool stripSession = true;
	// check for arguments
	for (int32_t i = 1; i < argc; i++) {
		if (strcmp(argv[i], "-w") == 0)
			addWWW = false;
		else if (strcmp(argv[i], "-s") == 0)
			stripSession = false;
	}
	// initialize
	//g_mem.init(100*1024);
	hashinit();
	//g_conf.m_tfndbExtBits = 23;
 loop:
	// read a url from stddin
	char sbuf[1024];
	if ( ! fgets ( sbuf , 1024 , stdin ) ) exit(1);
	char *s = sbuf;
	char fbuf[1024];
	// decode if we should
	if ( strncmp(s,"http%3A%2F%2F",13) == 0 ||
	     strncmp(s,"https%3A%2F%2F",13) == 0 ) {
		urlDecode(fbuf,s,gbstrlen(s));
		s = fbuf;
	}
	// old url
	printf("###############\n");
	printf("old: %s",s);
	int32_t slen = gbstrlen(s);
	// remove any www. if !addWWW
	if (!addWWW) {
		if (slen >= 4 &&
		    strncasecmp(s, "www.", 4) == 0) {
			slen -= 4;
			memmove(s, &s[4], slen);
		}
		else {
			// get past a ://
			int32_t si = 0;
			while (si < slen &&
			       ( s[si] != ':' ||
				 s[si+1] != '/' ||
				 s[si+2] != '/' ) )
				si++;
			// remove the www.
			if (si + 7 < slen) {
				si += 3;
				if (strncasecmp(&s[si], "www.", 4) == 0) {
					slen -= 4;
					memmove(&s[si], &s[si+4], slen-si);
				}
			}
		}
	}
	// set it
	Url u;
	u.set ( s , slen ,
		addWWW   ,      /*add www?*/
		stripSession ); /*strip session ids?*/
	// print it
	char out[1024*4];
	char *p = out;
	p += sprintf(p,"tld: ");
	gbmemcpy ( p, u.getTLD(),u.getTLDLen());
	p += u.getTLDLen();
	char c = *p;
	*p = '\0';
	printf("%s\n",out);
	*p = c;
	

	// dom
	p = out;
	sprintf ( p , "dom: ");
	p += gbstrlen ( p );
	gbmemcpy ( p , u.getDomain() , u.getDomainLen() );
	p += u.getDomainLen();
	c = *p;
	*p = '\0';
	printf("%s\n",out);
	*p = c;
	// host
	p = out;
	sprintf ( p , "host: ");
	p += gbstrlen ( p );
	gbmemcpy ( p , u.getHost() , u.getHostLen() );
	p += u.getHostLen();
	c = *p;
	*p = '\0';
	printf("%s\n",out);
	*p = c;
	// then the whole url
	printf("url: %s\n", u.getUrl() );

	/*
	int32_t  siteLen;
	char *site = u.getSite ( &siteLen , NULL , false );
	if ( site ) {
		c = site[siteLen];
		site[siteLen] = '\0';
	}
	printf("site: %s\n", site );
	if ( site ) site[siteLen] = c;
	*/
	SiteGetter sg;
	sg.getSite ( u.getUrl() ,
		     NULL , // tagrec
		     0 , // timestamp
		     NULL, // coll
		     0 , // niceness
		     //false , // addtags
		     NULL , // state
		     NULL ); // callback
	if ( sg.m_siteLen )
		printf("site: %s\n",sg.m_site);

	printf("isRoot: %"INT32"\n",(int32_t)u.isRoot());

	/*
	bool perm = ::isPermalink ( NULL        , // coll
				    NULL        , // Links ptr
				    &u          , // the url
				    CT_HTML     , // contentType
				    NULL        , // LinkInfo ptr
				    false       );// isRSS?
	printf ("isPermalink: %"INT32"\n",(int32_t)perm);
	*/

	// print the path too
	p = out;

	p += sprintf ( p , "path: " );
	gbmemcpy ( p , u.getPath(), u.getPathLen() );
	p += u.getPathLen();

	if ( u.getFilename() ) {
		p += sprintf ( p , "\nfilename: " );
		gbmemcpy ( p , u.getFilename(), u.getFilenameLen() );
		p += u.getFilenameLen();
		*p = '\0';
		printf("%s\n", out );
	}

	// encoded
	char dst[MAX_URL_LEN+200];
	urlEncode ( dst,MAX_URL_LEN+100,
				u.getUrl(), u.getUrlLen(), 
				false ); // are we encoding a request path?
	printf("encoded: %s\n",dst);

	// the probable docid
	int64_t pd = g_titledb.getProbableDocId(&u);
	printf("pdocid: %"UINT64"\n", pd );
	printf("dom8: 0x%"XINT32"\n", (int32_t)g_titledb.getDomHash8FromDocId(pd) );
	//printf("ext23: 0x%"XINT32"\n",g_tfndb.makeExt(&u));
	if ( u.isLinkLoop() ) printf("islinkloop: yes\n");
	else                  printf("islinkloop: no\n");
	int64_t hh64 = u.getHostHash64();
	printf("hosthash64: 0x%016"XINT64"\n",hh64);
	uint32_t hh32 = u.getHostHash32();
	printf("hosthash32: 0x%08"XINT32" (%"UINT32")\n",hh32,hh32);
	int64_t dh64 = u.getDomainHash64();
	printf("domhash64: 0x%016"XINT64"\n",dh64);
	int64_t uh64 = u.getUrlHash64();
	printf("urlhash64: 0x%016"XINT64"\n",uh64);
	//if(isUrlUnregulated(NULL ,0,&u)) printf("unregulated: yes\n");
	//else                            printf("unregulated: no\n");
	goto loop;
}