Esempio n. 1
0
void wordsAdd(char word[],enum parsed_unit_flag puf) {
int i;
int wordlLength;
int wordTypeadd;
			if (pagewords.nr < maxWordForPage){

				switch (puf)
                        	{
                            		case puf_none: 
						//printf(" +p"); 
						wordTypeadd=1000;break;
                            		case puf_title: 
						//printf(" +title"); 
						wordTypeadd=100; break;
                            		case puf_h1: 
						//printf(" +h1"); 
						wordTypeadd=500; break;
                            		case puf_h2: 
						//printf(" +h2"); 
						wordTypeadd=500; break;
                            		case puf_h3: 
						//printf(" +h3"); 
						wordTypeadd=500; break;
                            		case puf_h4: 
						//printf(" +h4"); 
						wordTypeadd=500; break;
                            		case puf_h5: 
						//printf(" +h5"); 
						wordTypeadd=500; break;
                            		case puf_h6: 
						//printf(" +h6"); 
						wordTypeadd=500; break;
                        	}

				wordlLength = strlen(word);

				//gjør om til små bokstaver
				for(i=0;i<wordlLength;i++) {
					word[i] = (char)tolower(word[i]);
				}


				#ifdef DEBUG_ADULT
					strcpy(pagewords.words[pagewords.nr].word,word);
				#endif
				pagewords.words[pagewords.nr].WordID =  crc32boitho(word);
				pagewords.words[pagewords.nr].position = (pagewords.nextPosition + wordTypeadd);

				//printf("nextPosition %i, wordTypeadd %i, position %i\n",pagewords.nextPosition,wordTypeadd,pagewords.words[pagewords.nr].position);

				++pagewords.nextPosition;

				//printf("%s : %lu\n",word,pagewords.words[pagewords.nr]);

				++pagewords.nr;		
			}
			else {
				//printf("To many words in dokument\n");
			}
}
Esempio n. 2
0
void wordsAdd(char word[]) {
int i;
int wordlLength;

			if (pagewords.nr < maxWordForPage){
				wordlLength = strlen(word);

				//gjør om til små bokstaver
				for(i=0;i<wordlLength;i++) {
					word[i] = (char)tolower(word[i]);
				}


				#ifdef DEBUG_ADULT
					strcpy(pagewords.words[pagewords.nr].word,word);
				#endif
				pagewords.words[pagewords.nr].WordID =  crc32boitho(word);
				pagewords.words[pagewords.nr].position = pagewords.nextPosition;
				
				++pagewords.nextPosition;

				//printf("%s : %lu\n",word,pagewords.words[pagewords.nr]);

				++pagewords.nr;		
			}
			else {
				//printf("To many words in dokument\n");
			}
}
void attribadd(struct IndexerRes_attrib *attrib, char word[]) {

	convert_to_lowercase((unsigned char *)word);

	#ifdef DEBUG
	printf("attribadd: got \"%s\"\n",word);
	#endif

	if ((*attrib).attribnr > maxAttribForPage){
        	#ifdef DEBUG
                	printf("more than maxAttribForPage words\n");
               	#endif
        }
        else {

		(*attrib).attrib[(*attrib).attribnr].WordID =  crc32boitho(word);
		(*attrib).attrib[(*attrib).attribnr].position = 0;

		++(*attrib).attribnr;
	}
}
void acladd(struct IndexerRes_acls *acl, char word[]) {


	convert_to_lowercase((unsigned char *)word);

	#ifdef DEBUG
	printf("acladd: got \"%s\"\n",word);
	#endif

	if ((*acl).aclnr > maxAclForPage){
        	#ifdef DEBUG
                	printf("mor then maxAclForPage words\n");
               	#endif
        }
        else {

		(*acl).acls[(*acl).aclnr].WordID =  crc32boitho(word);
		(*acl).acls[(*acl).aclnr].position = 0;

		++(*acl).aclnr;
	}
}
/*************************************************************************************
* slår opp i databasen for å finne DoCID for en url
*************************************************************************************/
int getDocIDFromUrl(char bdbfiledir[],char url[],unsigned int *DocID) {

        unsigned int crc32Value;
        int dbFileForUrl;
        int ret;
        DB *dbp;
	static char inited;
	static DB *dbp_store[nrOfUrlToDocIDFiles];

        DBT key, data;
        char fileName[256];
	
        crc32Value = crc32boitho(url);
        dbFileForUrl = (crc32Value % nrOfUrlToDocIDFiles);

	if (inited == 0) {
		int i;

		for(i = 0; i < nrOfUrlToDocIDFiles; i++) {
			sprintf(fileName,"%s%i.db",bdbfiledir,i);
			/* Create and initialize database object */
			if ((ret = db_create(&dbp, NULL, 0)) != 0) {
				fprintf(stderr, "%s: db_create: %s\n", "getDocIDFromUrl", db_strerror(ret));
				return (EXIT_FAILURE);
			}
			/* open the database. */
			//if ((ret = dbp->open(dbp, NULL, fileName, NULL, DB_BTREE, DB_CREATE, 0444)) != 0) {
			if ((ret = dbp->open(dbp, NULL, fileName, NULL, DB_BTREE, DB_RDONLY, 0444)) != 0) {
				dbp->err(dbp, ret, "%s: open", fileName);
				//goto err1;
			}
			dbp_store[i] = dbp;
		}

		inited = 1;
        }

	dbp = dbp_store[dbFileForUrl];


        //finner ut hvilken database vi skal opne
        //lager en has verdi slik at vi kan velge en av filene


        #ifdef DEBUG
                printf("Openig db %s\n",fileName);
        #endif


        /* Initialize the key/data pair so the flags aren't set. */
        memset(&key, 0, sizeof(key));
        memset(&data, 0, sizeof(data));


        key.data = url;
        key.size = strlen(url);


        /* Walk through the database and print out the key/data pairs. */
        if ((ret = dbp->get(dbp, NULL, &key, &data, 0)) == 0) {
                //printf("%s : %u-%i \n", key.data, *(int *)data.data,rLotForDOCid(*(int *)data.data));
                *DocID = *(int *)data.data;
                return 1;
        }
        else if (ret == DB_NOTFOUND) {
		#ifdef DEBUG
                dbp->err(dbp, ret, "DBcursor->get");
		#endif
                return 0;
        }
        else {
                dbp->err(dbp, ret, "DBcursor->get");
                return 0;
        }
}
Esempio n. 6
0
void fn( char* word, int pos, enum parsed_unit pu, enum parsed_unit_flag puf, void* pagewords )
{

#ifdef DEBUG
    printf("\t%s (%i) ", word, pos);
    printf("type %i ",pu);
#endif
    switch (pu)
    {
    case pu_word:


        switch (puf)
        {
        case puf_none:
            printf(" none");
            break;
        case puf_title:
            printf(" +title");
            break;
        case puf_h1:
            printf(" +h1");
            break;
        case puf_h2:
            printf(" +h2");
            break;
        case puf_h3:
            printf(" +h3");
            break;
        case puf_h4:
            printf(" +h4");
            break;
        case puf_h5:
            printf(" +h5");
            break;
        case puf_h6:
            printf(" +h6");
            break;
        }

        convert_to_lowercase(word);

        printf("[word] is now %s (crc32 %u, pos %i)", word, crc32boitho(word), pos);


        break;
    case pu_linkword:
        printf("[linkword]");
        break;
    case pu_link:
        printf("[link]");
        break;
    case pu_baselink:
        printf("[baselink]");
        break;
    case pu_meta_keywords:
        printf("[meta keywords]");
        break;
    case pu_meta_description:
        printf("[meta description]");
        break;
    case pu_meta_author:
        printf("[meta author]");
        break;
    default:
        printf("[...]");
    }

    printf("\n");

}
Esempio n. 7
0
void adultLoad (struct adultFormat *adult) {

	FILE *FH;
	char buff[128];
	int i,y,x;
	char *cpoint;
	char word1[128];
	char word2[128];	
	int weight;
	unsigned long crc32tmp;

	//AdultWordsFile
	if ((FH = fopen(AdultWordsVektetFile,"r")) == NULL) {
                        perror(AdultWordsVektetFile);
                        exit(1);
	}

	i=0;
	while ((fgets(buff,sizeof(buff),FH) != NULL) && (i < maxAdultWords)) {
		//fjerner \n en på slutteten
		buff[strlen(buff) -1] = '\0';

		//gjør om til lite case
		for(x=0;x<strlen(buff);x++) {
			buff[x] = tolower(buff[x]);
		}

		//finner space, som er det som skiller
                cpoint = strchr(buff,' ');
		if (cpoint != NULL) {

			strncpy((*adult).AdultWords[i].word,buff,cpoint - buff);
			//vil ikke ha men spacen. Går et hakk vidre
			++cpoint;
			(*adult).AdultWords[i].weight = atoi(cpoint);
	
			(*adult).AdultWords[i].crc32 = crc32boitho((*adult).AdultWords[i].word);

		}		

		//(*adult).AdultWords[i].word[strlen((*adult).AdultWords[i].word) -1] = '\0';

		//printf("%i: -%s- %lu %i\n",i,(*adult).AdultWords[i].word,(*adult).AdultWords[i].crc32,(*adult).AdultWords[i].weight);
		++i;
	}
	(*adult).adultWordnr = i;

	fclose(FH);

	qsort((*adult).AdultWords, i , sizeof(struct adultWordFormat), compare_elements_adultWord);	

//debug: vis alle ordene, sortert
//	for(y=0;y<i;y++) {
//		printf("%i: -%s- %lu %i\n",y,(*adult).AdultWords[y].word,(*adult).AdultWords[y].crc32,(*adult).AdultWords[y].weight);
//	}

	for(i=0;i<maxAdultWords;i++) {
		(*adult).adultFraser[i].adultWordCount = 0;
	}


	//AdultFraserFile
	if ((FH = fopen(AdultFraserVektetFile,"r")) == NULL) {
                        perror(AdultFraserVektetFile);
                        exit(1);
	}

	i=-1;
	while ((fgets(buff,sizeof(buff) -1,FH) != NULL) && (i < maxAdultWords)) {
                //gjør om til lite case
                for(x=0;x<strlen(buff);x++) {
                        buff[x] = tolower(buff[x]);
                }

		//printf("buff %s\n",buff);
		if ((x=sscanf(buff,"%s %s %i\n",word1,word2,&weight))!=3) {
			
			printf("bad AdultFraserVektetFile format: %s\n",buff);

		}
		else {

			//printf("%i: %s, %s, %i\n",i,word1,word2,weight);
	
			//finner crc32 verdeien for første ord
			crc32tmp = crc32boitho(word1);

			//hvsi dette er første så her vi ikke noen forige å legge den til i, så vi må opprette ny
			//hvsi dette derimot har samme word1 som forige så legger vi det til
			if ((i!=-1) && (crc32tmp == (*adult).adultFraser[i].crc32)) {
				//printf("nr to\n");
			}
			else {
				++i;
			}
		

			strcpy((*adult).adultFraser[i].word,word1);
			(*adult).adultFraser[i].crc32 = crc32boitho(word1);		

			(*adult).adultFraser[i].adultWord[(*adult).adultFraser[i].adultWordCount].weight = weight;
			strcpy((*adult).adultFraser[i].adultWord[(*adult).adultFraser[i].adultWordCount].word,word2);
			(*adult).adultFraser[i].adultWord[(*adult).adultFraser[i].adultWordCount].crc32 = crc32boitho(word2);

			if ((*adult).adultFraser[i].adultWordCount < MaxAdultWordCount -1) {
				++(*adult).adultFraser[i].adultWordCount;
			}
			else {
				printf("MaxAdultWordCount %i for %s\n",MaxAdultWordCount,buff);
			}

			



		}
	}
	fclose(FH);

	(*adult).adultWordFrasernr = i;
	qsort((*adult).adultFraser, (*adult).adultWordFrasernr , sizeof(struct adultWordFraserFormat), compare_elements_AdultFraser);

/*
	for(i=0;i<(*adult).adultWordFrasernr;i++) {
		printf("%i, -%s-, nr %i\n",i,(*adult).adultFraser[i].word,(*adult).adultFraser[i].adultWordCount);

		for(y=0;y<(*adult).adultFraser[i].adultWordCount;y++) {
			printf("\t %i: %s-%s: %i\n",y,(*adult).adultFraser[i].word,(*adult).adultFraser[i].adultWord[y].word,(*adult).adultFraser[i].adultWord[y].weight);
		}
		

	}
*/

}
Esempio n. 8
0
void *issueAdd(void *arg) {

	int     mysocfd = (int) arg;

	struct betaler_keywords_visninger_format {
		int kid;
		int betaler_side_id;
	};

	struct betaler_keywords_visninger_format betaler_keywords_visninger[10];
	char buff[1024];
	struct timeval globalstart_time, globalend_time;
	unsigned int addid;
        char *strpointer;
	int siderType_ppctopNr,siderType_ppcsideNr;
	
	struct queryNodeHederFormat queryNodeHeder;
	char queryEscaped[MaxQueryLen*2+1];
	char ppcprovider[32];

	int i,n, y, net_status, showabal;;

	//sjekker vårt egent anonsesystem

	char mysql_query [2048];

	static MYSQL demo_db;


        MYSQL_RES *mysqlres; /* To be used to fetch information into */
        MYSQL_ROW mysqlrow;


	struct SiderHederFormat SiderHeder;
	struct ppcPagesFormat ppcPages[10];

	struct SiderFormat *Sider;

	gettimeofday(&globalstart_time, NULL);

	
        if ((i=recv(mysocfd, &queryNodeHeder, sizeof(queryNodeHeder),MSG_WAITALL)) == -1) {
                perror("recv");
        }

	printf("Query %s\n",queryNodeHeder.query);

	Sider  = (struct SiderFormat *)malloc(sizeof(struct SiderFormat) * (queryNodeHeder.MaxsHits));

        //setter alle sidene som sletett
        for (i=0;i<queryNodeHeder.MaxsHits;i++) {
                Sider[i].deletet = 1;
        }

        //sender svar med en gang at vi kan gjøre dette
        net_status = net_CanDo;
        if ((n=sendall(mysocfd,&net_status, sizeof(net_status))) != sizeof(net_status)) {
                printf("send only %i of %i\n",n,sizeof(net_status));
                perror("sendall net_status");
        }



        /********************************************************************************************/
        #ifdef DEBUG
                printf("sending query to ppc db\n");
        #endif


        mysql_init(&demo_db);

	#ifdef WITH_THREAD
		my_thread_init(); // kalt mysql_thread_init() i mysql 5.0
	#endif




        //if(!mysql_real_connect(&demo_db, "www2.boitho.com", "boitho_remote", "G7J7v5L5Y7", "boitho", 3306, NULL, 0)){
        if(!mysql_real_connect(&demo_db, "localhost", "boitho", "G7J7v5L5Y7", "boithoweb", 3306, NULL, 0)){
                printf(mysql_error(&demo_db));
                //return(1);
		pthread_exit((void *)1); /* exit with status */
        }

        //escaper queryet rikit
        mysql_real_escape_string(&demo_db,queryEscaped,queryNodeHeder.query,strlen(queryNodeHeder.query));

        sprintf(mysql_query, "select tittel,url,beskrivelse,betaler_sider.bruker_navn,betaler_keywords.betaler,betaler_keywords.kid,betaler_sider.id from betaler_keywords,betaler_sider where betaler_keywords.keyword ='%s' and betaler_keywords.betaler_side_id=betaler_sider.id order by betaler desc",queryEscaped);


        if(mysql_real_query(&demo_db, mysql_query, strlen(mysql_query))){ /* Make query */
             	printf(mysql_error(&demo_db));
             	//return(1);
		pthread_exit((void *)1); /* exit with status */
        }
        #ifdef DEBUG
                printf("sending query to ppc db end\n");
        #endif

        /********************************************************************************************/


	SiderHeder.TotaltTreff = 0;
	int nrOfppcPages = 0;
	int nrOfBoithoAds = 0;
        //printer ut eventuelt ppc ord
        mysqlres=mysql_store_result(&demo_db); /* Download result from server */
        while ((mysqlrow=mysql_fetch_row(mysqlres)) != NULL) { /* Get a row from the results */
                        //printf("\t<beskrivelse>%s</beskrivelse>\n",mysqlrow[2]);
			//Sider[showabal].type = siderType_ppctop;

			strncpy(ppcPages[nrOfppcPages].title,mysqlrow[0],sizeof(ppcPages[nrOfppcPages].title));
			strncpy(ppcPages[nrOfppcPages].url,mysqlrow[1],sizeof(ppcPages[nrOfppcPages].url));
			strncpy(ppcPages[nrOfppcPages].uri,mysqlrow[1],sizeof(ppcPages[nrOfppcPages].uri));
			strncpy(ppcPages[nrOfppcPages].description,mysqlrow[2],sizeof(ppcPages[nrOfppcPages].description));
			strncpy(ppcPages[nrOfppcPages].user,mysqlrow[3],sizeof(ppcPages[nrOfppcPages].user));

			ppcPages[nrOfppcPages].thumbnail[0] = '\0';

			ppcPages[nrOfppcPages].bid = atof(mysqlrow[4]);
			ppcPages[nrOfppcPages].keyword_id = atoi(mysqlrow[5]);
			ppcPages[nrOfppcPages].DocID = strtoul(mysqlrow[6], (char **)NULL, 10);

			ppcPages[nrOfppcPages].allrank = 10000;

			#ifdef DEBUG
			printf("aa bid %f\n",ppcPages[nrOfppcPages].bid);
			printf("\tUrl: %s\n",ppcPages[nrOfppcPages].url);
	                printf("\tTitle: %s\n",ppcPages[nrOfppcPages].title);
			printf("keyword_id -%s-\n",mysqlrow[5]);
			#endif

			betaler_keywords_visninger[nrOfBoithoAds].kid = ppcPages[nrOfppcPages].keyword_id;
			betaler_keywords_visninger[nrOfBoithoAds].betaler_side_id = ppcPages[nrOfppcPages].DocID;
		++nrOfppcPages;
		++nrOfBoithoAds;
        }
	mysql_free_result(mysqlres);


	/*********************************/

	printf("contry: %s\n",queryNodeHeder.GeoIPcontry);

	if (strcmp(queryNodeHeder.GeoIPcontry,"NO") == 0) {
		strcpy(ppcprovider,"hent");
		//strcpy(ppcprovider,"revenuepilot");

	}
	else {
		//alle språk
		//strcpy(ppcprovider,"revenuepilot");
		//strcpy(ppcprovider,"searchboss");
	}
	strcpy(ppcprovider,"amazon");

	//temp: skrur av 3p xml feeds
	//getPpcAds(ppcprovider,ppcPages,&nrOfppcPages,&queryNodeHeder);

	//temp: Viser en mindre side da vi får problemer med siste?
	//nrOfppcPages--;

	showabal = 0;        
        for (i=0;i<nrOfppcPages;i++) {


		
		/*********************************************/
		//Sider[showabal].type = siderType_ppcside;
		#ifdef DEBUG
		printf("issue add. keyword_id %i\n",ppcPages[i].keyword_id);
		#endif
        	sprintf(mysql_query, "insert into issuedadds values(%s,'%s','%f','%s',%s,'%s','%s','%s','%s','%s','%s','%s','%i','%i')",
			"NULL",
			queryEscaped,
			ppcPages[i].bid,
			ppcPages[i].uri,
			"NOW()",
			0,
			ppcPages[i].user,
			queryNodeHeder.search_user,
			queryNodeHeder.userip,
			queryNodeHeder.HTTP_ACCEPT_LANGUAGE,
			queryNodeHeder.HTTP_USER_AGENT,
			queryNodeHeder.HTTP_REFERER,
			ppcPages[i].keyword_id,
			ppcPages[i].DocID
			);
        
		
		#ifdef DEBUG
		printf("ppc user %s\naffuser %s\n",Sider[i].user,queryNodeHeder.search_user);
		#endif
        	if(mysql_real_query(&demo_db, mysql_query, strlen(mysql_query))){ /* Make query */
        	     	printf("Cant insert into issuedadds: %s\nSql query vas %s\n",mysql_error(&demo_db),mysql_query);
        	     	//return(1);
			pthread_exit((void *)1); /* exit with status */
	        }

		addid = mysql_insert_id(&demo_db);

		#ifdef DEBUG
		printf("addid %u\n",addid);
		#endif
		//sprintf(ppcPages[showabal].uri,"http://search.boitho.com/cgi-bin/addout.cgi?addid=%u&addurl=%s",addid,ppcPages[showabal].url);
		sprintf(ppcPages[showabal].uri,"http://bbh-001.boitho.com/cgi-bin/addout.cgi?addid=%u&addurl=%s",addid,ppcPages[showabal].url);
		
		//strcpy(Sider[i].uri,buff);
		/*********************************************/

			if (strlen(ppcPages[i].title) == (sizeof(ppcPages[i].title) -1)) {
				//strcpy(Sider[showabal].title,"Title to long.");
				
				strncpy(Sider[showabal].title,ppcPages[i].title,sizeof(Sider[showabal].title) -3);
				strcat(Sider[showabal].title,"..");
			}
			else {
				strncpy(Sider[showabal].title,ppcPages[i].title,sizeof(Sider[showabal].title));
			}


			strncpy(Sider[showabal].description,ppcPages[i].description,sizeof(Sider[showabal].description));
                        
			strncpy(Sider[showabal].url,ppcPages[i].url,sizeof(Sider[showabal].url));
			strncpy(Sider[showabal].uri,ppcPages[i].uri,sizeof(Sider[showabal].uri));
			strncpy(Sider[showabal].user,ppcPages[i].user,sizeof(Sider[showabal].user));
                        
			strscpy(Sider[showabal].domain,ppcPages[i].domain,sizeof(Sider[showabal].domain));
			
			strscpy(Sider[showabal].thumbnale,ppcPages[i].thumbnail,sizeof(Sider[showabal].thumbnale));

			Sider[showabal].thumbnailwidth = atol(ppcPages[i].thumbnailwidth);
			Sider[showabal].thumbnailheight = atol(ppcPages[i].thumbnailheight);
			
			Sider[showabal].bid = ppcPages[i].bid;

			Sider[showabal].iindex.allrank = ppcPages[i].allrank;


			#ifdef DEBUG
                        printf("%s\t%s\t%f\n",Sider[showabal].url,Sider[showabal].title,ppcPages[i].bid);
              		#endif
			

		++showabal;

        }

	/*********************************/
	siderType_ppctopNr = 0;
	siderType_ppcsideNr = 0;

        for(i=0;i<showabal;i++) {

			#ifdef DEBUG
			printf("uri %s\n",Sider[i].uri);
			#endif

			Sider[i].DocumentIndex.crc32 = crc32boitho(Sider[i].description);
                        Sider[i].deletet = 0;





		//lager fin beskrivlse som slutter på .. isteden får bare et kappet ord, hvis beskrivlese er for lang                
                if (strlen(Sider[i].description) >= 250) {
                	//søker oss til siste space , eller ; og avslutter der
                        if ((strpointer = (char *)strrchr(Sider[i].description,' ')) != NULL) {
                        	strpointer[0] = '\0';
                        }
                        else if ((strpointer = (char *)strrchr(Sider[i].description,';')) != NULL) {
                        	++strpointer; //pekeren peker på semikolonet. SKal ha det med, så må legge il en
                                strpointer[0] = '\0';
                        }
                        strncat(Sider[i].description,"..",2);
            	}
               	
		//hiliter ordet
		sprintf(buff,"<b>%s</b>",queryNodeHeder.query);
		strcasesandr(Sider[i].description,sizeof(Sider[i].description),queryNodeHeder.query,buff);

		//bestemmer ppc type
		//Sider[showabal].type = siderType_ppcside
		//Sider[i].type = siderType_ppctop;
		if ((siderType_ppctopNr < 2) && (strcasestr(Sider[i].description,queryNodeHeder.query) != 0)) {
			Sider[i].type = siderType_ppctop;
			++siderType_ppctopNr;
		}
		else {
			Sider[i].type = siderType_ppcside;
			++siderType_ppcsideNr;
		}

	}	

	//legger datane in i mysql database.
        for(i=0;i<showabal;i++) {


	}


	gettimeofday(&globalend_time, NULL);
	SiderHeder.total_usecs = getTimeDifference(&globalstart_time,&globalend_time);

	SiderHeder.TotaltTreff = showabal;
	SiderHeder.showabal = showabal;
	SiderHeder.filtered = 0;
	SiderHeder.hiliteQuery[0] = '\0';
	sprintf(SiderHeder.servername,"adserver.boitho.com");

	//SiderHeder.queryTime = 0;

        if ((n=sendall(mysocfd,&SiderHeder, sizeof(SiderHeder))) != sizeof(SiderHeder)) {
                printf("send only %i of %i\n",n,sizeof(SiderHeder));
                perror("sendall SiderHeder");
        }

        for(i=0;i<SiderHeder.showabal;i++) {
        //for (i=0;i<queryNodeHeder.MaxsHits;i++) {
		#ifdef DEBUG
                       printf("sending %s, deletet %i\n",Sider[i].url,Sider[i].deletet);
                       printf("bb: -%s-\n",Sider[i].title);
                       printf("url: -%s-\n",Sider[i].url);
			
		#endif

                //if (!Sider[i].deletet) {
                        if ((n=sendall(mysocfd,&Sider[i], sizeof(struct SiderFormat))) != sizeof(struct SiderFormat)) {
                                printf("send only %i of %i\n",n,sizeof(struct SiderFormat));
                                perror("sendall");
                        }

                //}
        }

	//logger alle visningene vi har hatt på egen ppc ord
	for (i=0;i<nrOfBoithoAds;i++) {
		sprintf(mysql_query, "insert DELAYED into betaler_keywords_visninger values(NULL,'%i','%i',NOW())",betaler_keywords_visninger[i].kid,betaler_keywords_visninger[i].betaler_side_id);


	        if(mysql_real_query(&demo_db, mysql_query, strlen(mysql_query))){ /* Make query */
	             	printf(mysql_error(&demo_db));
	             	//return(1);
			pthread_exit((void *)0); /* exit with status */
	        }
		
	}


        mysql_close(&demo_db);

        //close(mysocfd);


	free(Sider);

	close(mysocfd);

 	#ifdef WITH_THREAD
		my_thread_end(); // kalt mysql_thread_end() i mysql 5.0
            	pthread_exit((void *)0); /* exit with status */
        #endif

	printf("end\n");
       

	//return 0;
}
Esempio n. 9
0
int main (int argc, char *argv[]) {

	int lotNr;
	int i;
	unsigned int DocID;
	char text[50];
	unsigned int radress;
	unsigned int rsize;
	char **Data;
  	int Count, TokCount;
	unsigned short hits;
	unsigned long WordID;
	int bucket;
	int y;
	int nr;
	FILE *revindexFilesHa[NrOfDataDirectorys];
	unsigned char lang;
	FILE *FH;
	unsigned int DocIDPlace;

	int *nrOfLinkWordsToDocID = malloc(sizeof(int) * NrofDocIDsInLot);

	for (i=0;i<NrofDocIDsInLot;i++) {
		//begynner på 2000 så det skal være lett og skille de visuelt fra andre hits
		nrOfLinkWordsToDocID[i] = 2000;
	}
        //tester for at vi har fåt hvilken lot vi skal bruke
        if (argc < 3) {
                printf("Usage: ./anchorread lotnr subname\n\n");
		exit(1);
        }

	lotNr = atoi(argv[1]);
	char *subname = argv[2];

	if ( (FH = lotOpenFileNoCasheByLotNr(lotNr,"anchors","rb", 's',subname)) == NULL) {
		printf("lot dont have a anchors file\n");
		exit(1);
	}	
	fclose(FH);

	revindexFilesOpenLocal(revindexFilesHa,lotNr,"Anchor","wb",subname);

	//int anchorGetNext (int LotNr,unsigned int *DocID,char *text,unsigned int *radress,unsigned int *rsize)
	while (anchorGetNext(lotNr,&DocID,text,sizeof(text),&radress,&rsize,subname) ) {	

			DocIDPlace = (DocID - LotDocIDOfset(rLotForDOCid(DocID)));	
			++nrOfLinkWordsToDocID[DocIDPlace];



			convert_to_lowercase((unsigned char *)text);


			#ifdef DEBUG
			if (DocID == 4999999) {
				printf("DocID %i, text: \"%s\", DocIDPlace %i, nrOfLinkWordsToDocID %i\n",DocID,text,DocIDPlace,nrOfLinkWordsToDocID[DocIDPlace]);
			}
			#endif

  			if ((TokCount = split(text, " ", &Data)) == -1) {
				printf("canæt splitt \"%s\"\n",text);
			}

			//for (i=(TokCount-1);i>=0;i--) {
			i=0;
			while (Data[i] != NULL) {

				/*
				if (nrOfLinkWordsToDocID[DocIDPlace] > 65505) {
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("reach max nr of words for DocID %u. Hav %i+ words\n",DocID,nrOfLinkWordsToDocID[DocIDPlace]);
						}
					#endif
					break;
				}
				*/

				if (Data[i][0] == '\0') {
					#ifdef DEBUG
						if (DocID == 4999999) {

							printf("emty data element\n");
						}
					#endif
				} 
				else if (strcmp(Data[i],"www") == 0) {
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("www\n");
						}
					#endif
					++nrOfLinkWordsToDocID[DocIDPlace];
				} 
				else if (isStoppWord(Data[i])) {
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("stopword \"%s\"\n",Data[i]);
						}
					#endif
					//++nrOfLinkWordsToDocID[DocIDPlace];
				}
				else {
				
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("\t\"%s\" %i\n",Data[i],nrOfLinkWordsToDocID[DocIDPlace]);
						}
					#endif


			

					WordID = crc32boitho(Data[i]);

					if (WordID == 0) {
						printf("got 0 as word id for \"%s\". Somthing may be wrong.\n",Data[i]);
					}

                			bucket = WordID % NrOfDataDirectorys;

					if (nrOfLinkWordsToDocID[DocIDPlace] > 65535) {
						hits = 65535;
					}
					else {
						hits = nrOfLinkWordsToDocID[DocIDPlace];

					}

					#ifdef DEBUG
						if (DocID == 4999999) {
	    		       				printf("\thits %i: \"%s\": %hu, bucket %i\n",i,Data[i],hits,bucket);
						}
					#endif

                
        	        		if (fwrite(&DocID,sizeof(unsigned int),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite DocID");
					}
					//runarb: 13 mai 2007. vi har byttet til å bruke et tal for språk.
					//burde da dette fra DocumentIndex hvis det finnes, men lagres ikke der
					//må si i IndexRes på hvordan vi gjør det der
        	        		//fprintf(revindexFilesHa[bucket],"aa ");
					lang = 0;
					nr = 1;
					if(fwrite(&lang,sizeof(unsigned char),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite lang");
					}


        	        		if(fwrite(&WordID,sizeof(unsigned long),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite WordID");
					}

        	        		if(fwrite(&nr,sizeof(unsigned long),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite nr");
					}


        		        	if(fwrite(&hits,sizeof(unsigned short),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite hits");
					}

			                
        	        		++nrOfLinkWordsToDocID[DocIDPlace];
				
			
				}


				++i;
			}
  			FreeSplitList(Data);


			#ifdef DEBUG
				if (DocID == 4999999) {
				printf("\n");
				}
			#endif
	}

	free(nrOfLinkWordsToDocID);

}
Esempio n. 10
0
void wordsAdd(struct pagewordsFormatPartFormat *wordsPart, char word[],enum parsed_unit_flag puf) {

			int wordlLength;
			int wordTypeadd = 0;

			if (wordsPart->nr > maxWordForPage){
				#ifdef DEBUG
					printf("mor then maxWordForPage words\n");
				#endif
			}
			else {

				switch (puf)
                        	{
                            		case puf_none: 
						//printf(" +p"); 
						wordTypeadd=1000;break;
                            		case puf_title: 
						//printf(" +title"); 
						wordTypeadd=100; break;
                            		case puf_h1: 
						//printf(" +h1"); 
						wordTypeadd=500; break;
                            		case puf_h2: 
						//printf(" +h2"); 
						wordTypeadd=500; break;
                            		case puf_h3: 
						//printf(" +h3"); 
						wordTypeadd=500; break;
                            		case puf_h4: 
						//printf(" +h4"); 
						wordTypeadd=500; break;
                            		case puf_h5: 
						//printf(" +h5"); 
						wordTypeadd=500; break;
                            		case puf_h6: 
						//printf(" +h6"); 
						wordTypeadd=500; break;
					default:
						printf(" no catsh\n"); break;
                        	}

				wordlLength = strlen(word);

				//gjør om til små bokstaver
				convert_to_lowercase((unsigned char *)word);


				#ifdef PRESERVE_WORDS
					strcpy(wordsPart->words[wordsPart->nr].word,word);
				#endif

				wordsPart->words[wordsPart->nr].WordID =  crc32boitho(word);

				#ifdef DEBUG
					printf(" (crc %s -> %u) ",word,wordsPart->words[wordsPart->nr].WordID);
				#endif

				wordsPart->words[wordsPart->nr].position = (wordsPart->nextPosition + wordTypeadd);
				// må ha en index posisjon her. Slik at vi kan finne ord før og etter. Posisjon er kodet
				wordsPart->words[wordsPart->nr].unsortetIndexPosition = wordsPart->nr;


				++wordsPart->nextPosition;

				//printf("%s : %u\n",word,wordsPart->words[wordsPart->nr]);

				++wordsPart->nr;		
			}
}