Пример #1
0
void linkwordadd(struct pagewordsFormat *pagewords, char word[]) {


	int wordlLength;

	wordlLength = strlen(word);

	convert_to_lowercase((unsigned char *)word);
	
	//-1 så vi ikke appender alt på slutten av siste linken hvis vi har mr en IndexerMaxLinks
	if ((IndexerMaxLinks)> (*pagewords).nrOfOutLinks) {

		if (!(*pagewords).outlinks[(*pagewords).nrOfOutLinks -1].good) {
			//dårlig link
		}
		else if (((*pagewords).outlinks[(*pagewords).nrOfOutLinks -1].linktextlen + wordlLength +2 ) > sizeof((*pagewords).outlinks[(*pagewords).nrOfOutLinks -1].linktext)) {
			//har ikke mer plass til link tekst
		}
		else {
			//må ha -1 her da det er den nåverende vi jobber på, ikke den neste
			strcpy((*pagewords).outlinks[(*pagewords).nrOfOutLinks -1].linktext + (*pagewords).outlinks[(*pagewords).nrOfOutLinks -1].linktextlen,word);
			(*pagewords).outlinks[(*pagewords).nrOfOutLinks -1].linktextlen += wordlLength;
			strcpy((*pagewords).outlinks[(*pagewords).nrOfOutLinks -1].linktext + (*pagewords).outlinks[(*pagewords).nrOfOutLinks -1].linktextlen," ");
			(*pagewords).outlinks[(*pagewords).nrOfOutLinks -1].linktextlen += 1;
		}
	}	
	else {
		#ifdef DEBUG
		printf("note: to many links. Has %i sow far\n",(*pagewords).nrOfOutLinks);
		#endif
	}

}
Пример #2
0
static int make_key(char *buf, size_t buflen, enum keytype type,
		struct mlfi_priv *priv, const char *rcpt)
{
	int r = 0;
	switch (type) {
	case TO:
		r = snprintf(buf, buflen, "%s", rcpt);
		break;
	case TO_IP:
		r = snprintf(buf, buflen, "%s:%s", rcpt, priv->priv_ip);
		break;
	case TO_IP_FROM:
		r = snprintf(buf, buflen, "%s:%s:%s", rcpt, priv->priv_ip,
				priv->priv_from);
		break;
	case BOUNCE_TO:
		r = snprintf(buf, buflen, "%s:<>", rcpt);
		break;
	case BOUNCE_TO_IP:
		r = snprintf(buf, buflen, "%s:%s:<>", rcpt, priv->priv_ip);
		break;
	}

	if (r >= buflen) {
		return 0;
	}

	convert_to_lowercase (buf, r);

	return r;
}
Пример #3
0
void attribadd(struct IndexerRes_attrib *attrib, char word[]) {

	convert_to_lowercase((unsigned char *)word);

	#ifdef DEBUG
	printf("attribadd: got \"%s\"\n",word);
	#endif

	if ((*attrib).attribnr > maxAttribForPage){
        	#ifdef DEBUG
                	printf("more than maxAttribForPage words\n");
               	#endif
        }
        else {

		(*attrib).attrib[(*attrib).attribnr].WordID =  crc32boitho(word);
		(*attrib).attrib[(*attrib).attribnr].position = 0;

		++(*attrib).attribnr;
	}
}
Пример #4
0
void acladd(struct IndexerRes_acls *acl, char word[]) {


	convert_to_lowercase((unsigned char *)word);

	#ifdef DEBUG
	printf("acladd: got \"%s\"\n",word);
	#endif

	if ((*acl).aclnr > maxAclForPage){
        	#ifdef DEBUG
                	printf("mor then maxAclForPage words\n");
               	#endif
        }
        else {

		(*acl).acls[(*acl).aclnr].WordID =  crc32boitho(word);
		(*acl).acls[(*acl).aclnr].position = 0;

		++(*acl).aclnr;
	}
}
Пример #5
0
void
suggest_1(char *host, char *arg, char *user, char *collection)
{
	CLIENT *clnt;
	numbest_res *result_1;
	struct senddata args;
	char first[1024];
	char suggeston[1024];
	char **wordlist;
	int splitn;

	splitn = split(arg, " ", &wordlist);

	if (splitn == 0)
		return;

	args.word = wordlist[splitn-1];
	convert_to_lowercase(args.word);
	args.user = user;
	args.collection = (collection ? collection : "");

#ifndef	DEBUG
	clnt = clnt_create (host, SUGGEST, SUGGESTVERS, "udp");
	if (clnt == NULL) {
		clnt_pcreateerror (host);
		exit (1);
	}
#endif	/* DEBUG */

	/* XXX: set lower timeout */
	result_1 = get_best_results_2(&args, clnt);
	if (!result_1) {
#if 1
		//clnt_perror (clnt, "call failed");
#endif
	}
	else {
		if (result_1->_errno == 0) {
			namelist nl;
			int i;

			if (strlen(args.word) > 0) {
				for (nl = result_1->numbest_res_u.list;
				     nl != NULL;
				     nl = nl->next) {
					for (i = 0; i < splitn-1; i++)
						printf("%s ", wordlist[i]);
					printf("%s\n", nl->name);
				}
			} else {		
				for (i = 0; i < splitn-1; i++)
					printf("%s ", wordlist[i]);
				puts("");
			}
		}
	}

	FreeSplitList(wordlist);

#ifndef	DEBUG
	clnt_destroy (clnt);
#endif	 /* DEBUG */
}
Пример #6
0
void fn( char* word, int pos, enum parsed_unit pu, enum parsed_unit_flag puf, void* pagewords )
{

#ifdef DEBUG
    printf("\t%s (%i) ", word, pos);
    printf("type %i ",pu);
#endif
    switch (pu)
    {
    case pu_word:


        switch (puf)
        {
        case puf_none:
            printf(" none");
            break;
        case puf_title:
            printf(" +title");
            break;
        case puf_h1:
            printf(" +h1");
            break;
        case puf_h2:
            printf(" +h2");
            break;
        case puf_h3:
            printf(" +h3");
            break;
        case puf_h4:
            printf(" +h4");
            break;
        case puf_h5:
            printf(" +h5");
            break;
        case puf_h6:
            printf(" +h6");
            break;
        }

        convert_to_lowercase(word);

        printf("[word] is now %s (crc32 %u, pos %i)", word, crc32boitho(word), pos);


        break;
    case pu_linkword:
        printf("[linkword]");
        break;
    case pu_link:
        printf("[link]");
        break;
    case pu_baselink:
        printf("[baselink]");
        break;
    case pu_meta_keywords:
        printf("[meta keywords]");
        break;
    case pu_meta_description:
        printf("[meta description]");
        break;
    case pu_meta_author:
        printf("[meta author]");
        break;
    default:
        printf("[...]");
    }

    printf("\n");

}
Пример #7
0
int main (int argc, char *argv[]) {

	int lotNr;
	int i;
	unsigned int DocID;
	char text[50];
	unsigned int radress;
	unsigned int rsize;
	char **Data;
  	int Count, TokCount;
	unsigned short hits;
	unsigned long WordID;
	int bucket;
	int y;
	int nr;
	FILE *revindexFilesHa[NrOfDataDirectorys];
	unsigned char lang;
	FILE *FH;
	unsigned int DocIDPlace;

	int *nrOfLinkWordsToDocID = malloc(sizeof(int) * NrofDocIDsInLot);

	for (i=0;i<NrofDocIDsInLot;i++) {
		//begynner på 2000 så det skal være lett og skille de visuelt fra andre hits
		nrOfLinkWordsToDocID[i] = 2000;
	}
        //tester for at vi har fåt hvilken lot vi skal bruke
        if (argc < 3) {
                printf("Usage: ./anchorread lotnr subname\n\n");
		exit(1);
        }

	lotNr = atoi(argv[1]);
	char *subname = argv[2];

	if ( (FH = lotOpenFileNoCasheByLotNr(lotNr,"anchors","rb", 's',subname)) == NULL) {
		printf("lot dont have a anchors file\n");
		exit(1);
	}	
	fclose(FH);

	revindexFilesOpenLocal(revindexFilesHa,lotNr,"Anchor","wb",subname);

	//int anchorGetNext (int LotNr,unsigned int *DocID,char *text,unsigned int *radress,unsigned int *rsize)
	while (anchorGetNext(lotNr,&DocID,text,sizeof(text),&radress,&rsize,subname) ) {	

			DocIDPlace = (DocID - LotDocIDOfset(rLotForDOCid(DocID)));	
			++nrOfLinkWordsToDocID[DocIDPlace];



			convert_to_lowercase((unsigned char *)text);


			#ifdef DEBUG
			if (DocID == 4999999) {
				printf("DocID %i, text: \"%s\", DocIDPlace %i, nrOfLinkWordsToDocID %i\n",DocID,text,DocIDPlace,nrOfLinkWordsToDocID[DocIDPlace]);
			}
			#endif

  			if ((TokCount = split(text, " ", &Data)) == -1) {
				printf("canæt splitt \"%s\"\n",text);
			}

			//for (i=(TokCount-1);i>=0;i--) {
			i=0;
			while (Data[i] != NULL) {

				/*
				if (nrOfLinkWordsToDocID[DocIDPlace] > 65505) {
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("reach max nr of words for DocID %u. Hav %i+ words\n",DocID,nrOfLinkWordsToDocID[DocIDPlace]);
						}
					#endif
					break;
				}
				*/

				if (Data[i][0] == '\0') {
					#ifdef DEBUG
						if (DocID == 4999999) {

							printf("emty data element\n");
						}
					#endif
				} 
				else if (strcmp(Data[i],"www") == 0) {
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("www\n");
						}
					#endif
					++nrOfLinkWordsToDocID[DocIDPlace];
				} 
				else if (isStoppWord(Data[i])) {
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("stopword \"%s\"\n",Data[i]);
						}
					#endif
					//++nrOfLinkWordsToDocID[DocIDPlace];
				}
				else {
				
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("\t\"%s\" %i\n",Data[i],nrOfLinkWordsToDocID[DocIDPlace]);
						}
					#endif


			

					WordID = crc32boitho(Data[i]);

					if (WordID == 0) {
						printf("got 0 as word id for \"%s\". Somthing may be wrong.\n",Data[i]);
					}

                			bucket = WordID % NrOfDataDirectorys;

					if (nrOfLinkWordsToDocID[DocIDPlace] > 65535) {
						hits = 65535;
					}
					else {
						hits = nrOfLinkWordsToDocID[DocIDPlace];

					}

					#ifdef DEBUG
						if (DocID == 4999999) {
	    		       				printf("\thits %i: \"%s\": %hu, bucket %i\n",i,Data[i],hits,bucket);
						}
					#endif

                
        	        		if (fwrite(&DocID,sizeof(unsigned int),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite DocID");
					}
					//runarb: 13 mai 2007. vi har byttet til å bruke et tal for språk.
					//burde da dette fra DocumentIndex hvis det finnes, men lagres ikke der
					//må si i IndexRes på hvordan vi gjør det der
        	        		//fprintf(revindexFilesHa[bucket],"aa ");
					lang = 0;
					nr = 1;
					if(fwrite(&lang,sizeof(unsigned char),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite lang");
					}


        	        		if(fwrite(&WordID,sizeof(unsigned long),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite WordID");
					}

        	        		if(fwrite(&nr,sizeof(unsigned long),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite nr");
					}


        		        	if(fwrite(&hits,sizeof(unsigned short),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite hits");
					}

			                
        	        		++nrOfLinkWordsToDocID[DocIDPlace];
				
			
				}


				++i;
			}
  			FreeSplitList(Data);


			#ifdef DEBUG
				if (DocID == 4999999) {
				printf("\n");
				}
			#endif
	}

	free(nrOfLinkWordsToDocID);

}
Пример #8
0
void wordsAdd(struct pagewordsFormatPartFormat *wordsPart, char word[],enum parsed_unit_flag puf) {

			int wordlLength;
			int wordTypeadd = 0;

			if (wordsPart->nr > maxWordForPage){
				#ifdef DEBUG
					printf("mor then maxWordForPage words\n");
				#endif
			}
			else {

				switch (puf)
                        	{
                            		case puf_none: 
						//printf(" +p"); 
						wordTypeadd=1000;break;
                            		case puf_title: 
						//printf(" +title"); 
						wordTypeadd=100; break;
                            		case puf_h1: 
						//printf(" +h1"); 
						wordTypeadd=500; break;
                            		case puf_h2: 
						//printf(" +h2"); 
						wordTypeadd=500; break;
                            		case puf_h3: 
						//printf(" +h3"); 
						wordTypeadd=500; break;
                            		case puf_h4: 
						//printf(" +h4"); 
						wordTypeadd=500; break;
                            		case puf_h5: 
						//printf(" +h5"); 
						wordTypeadd=500; break;
                            		case puf_h6: 
						//printf(" +h6"); 
						wordTypeadd=500; break;
					default:
						printf(" no catsh\n"); break;
                        	}

				wordlLength = strlen(word);

				//gjør om til små bokstaver
				convert_to_lowercase((unsigned char *)word);


				#ifdef PRESERVE_WORDS
					strcpy(wordsPart->words[wordsPart->nr].word,word);
				#endif

				wordsPart->words[wordsPart->nr].WordID =  crc32boitho(word);

				#ifdef DEBUG
					printf(" (crc %s -> %u) ",word,wordsPart->words[wordsPart->nr].WordID);
				#endif

				wordsPart->words[wordsPart->nr].position = (wordsPart->nextPosition + wordTypeadd);
				// må ha en index posisjon her. Slik at vi kan finne ord før og etter. Posisjon er kodet
				wordsPart->words[wordsPart->nr].unsortetIndexPosition = wordsPart->nr;


				++wordsPart->nextPosition;

				//printf("%s : %u\n",word,wordsPart->words[wordsPart->nr]);

				++wordsPart->nr;		
			}
}