Ejemplo n.º 1
0
/* -------------------------------- mgsort ------------------------------- */
int mgsort(void *data, int size, int esize, int i, int k, int (*compare)
   (const void *key1, const void *key2)) {

    int                j;

    /* Stop the recursion when no more divisions can be made */
    if (i < k) {

       /* Determine where to divide the elements */
       j = (int)(((i + k - 1)) / 2);

       /* Recursively sort the two divisions */
       if (mgsort(data, size, esize, i, j, compare) < 0)
	  return -1;

       if (mgsort(data, size, esize, j + 1, k, compare) < 0)
	  return -1;

       /* Merge the two sorted divisions into a single sorted set */
       if (merge(data, esize, i, j, k, compare) < 0)
	  return -1;

    }

    return 0;

}
Ejemplo n.º 2
0
TEST(Sort_Merge, mgsort)
{
	int size = 5;
	int i;
	int result;
	int *data = (int *)malloc(sizeof(int) * size);

	srand(time(NULL));

	for (i = 0; i < size; ++i)
		data[i] = 5 - i;

#ifdef DEBUG
	print_array(data, size);
#endif
	result = mgsort(data, size, sizeof(int), 0, size - 1, compare);
	EXPECT_EQ(0, result);

	for (i = 0; i < size - 1; ++i)
		EXPECT_TRUE(data[i] <= data[i + 1]);

#ifdef DEBUG
	print_array(data, size);
#endif
}
Ejemplo n.º 3
0
int mgsort(void *data, int size, int esize, int i, int k, int (*compare)(const void *key1, const void *key2))
{
    int j;
    if (i < k) {
        j = (int)((i + k -1) / 2);
    
        if (mgsort(data, size, esize, i, j, compare) < 0)
            return -1;
        
        if (mgsort(data, size, esize, j + 1, k, compare) < 0)
            return -1;
        
        if (merge(data, esize, i, j, k, compare) < 0)
            return -1;
    }
    return 0;
}
Ejemplo n.º 4
0
int Indekser(char revindexPath[],char iindexPath[],struct revIndexArrayFomat revIndexArray[]) {

	int i,y;
	//int mgsort_i,mgsort_k;
	FILE *REVINDEXFH;
	unsigned int nrOfHits;
	unsigned short hit;
	char recordSeperator[4];
	int count;
	char c;
        unsigned int DocID;
	unsigned int lastWordID;
        char lang[4];
	unsigned int nrofDocIDsForWordID[revIndexArraySize];
	int forekomstnr;


	if ((REVINDEXFH = fopen(revindexPath,"rb")) == NULL) {
		perror(revindexPath);
		//exit(1);
	}
	else {
	count = 0;
	while (!feof(REVINDEXFH)) {
	
		fread(&DocID,sizeof(DocID),1,REVINDEXFH);
		fread(lang,sizeof(lang) -1,1,REVINDEXFH);
		lang[3] = '\0';

		//printf("read DocID %u, lang \"%s\"\n",DocID,lang);

		/*
		vi kan ha DocID,Lang,recordseperator, uten noen etterfølgende hit. For
		ådetektere det må vi lese rc så søke ilbake.
		
		lite effektift, må 

		*/

		//så lenge vi ikke går over noen grense.
		//blir som regel avslutte med break, npr ci nåe record seperator
		while (count < revIndexArraySize) {

		//her kan vi enten ha record seperator, eller info om treff
		
		fread(recordSeperator,sizeof(recordSeperator) -1,1,REVINDEXFH);

		if ((recordSeperator[0] == '*') && (recordSeperator[1] == '*') && (recordSeperator[2] == '\n')) {
			//record seperator. Avslutter denne dokiden
			break;
		}
		else {		
			//nå record

			revIndexArray[count].DocID = DocID;
			memcpy(revIndexArray[count].lang,lang,sizeof(lang) -1);


			//leste jo 3 tegn for å lete etter record seperator. Må nå gå tilbake
			fseek(REVINDEXFH,-3,SEEK_CUR);
			
			fread(&revIndexArray[count].WordID,sizeof(revIndexArray[count].WordID),1,REVINDEXFH);
			fread(&revIndexArray[count].nrOfHits,sizeof(revIndexArray[count].nrOfHits),1,REVINDEXFH);

			//printf("\tWordID: %u, %u: ",revIndexArray[count].WordID,revIndexArray[count].nrOfHits);


			if (revIndexArray[count].nrOfHits > MaxsHitsInIndex) {
				printf("nrOfHits lager then MaxsHitsInIndex. Nr was %i for %s\n",revIndexArray[count].nrOfHits,revindexPath);
				return 0;
			}

			//leser antal hist vi skulle ha
			fread(&revIndexArray[count].hits,revIndexArray[count].nrOfHits * sizeof(short),1,REVINDEXFH);


			
			//debug:  hits
			/*
			for (i=0;i<revIndexArray[count].nrOfHits;i++) {
				printf("%hu, ",revIndexArray[count].hits[i]);
			}
			printf("\n");
			*/
			++count;


		}
		}
			
		//hvis vi når grensen
		if (count == revIndexArraySize) {
			printf("revIndexArraySize hit\n");
			break;
			
		}

	}
	--count;

	fclose(REVINDEXFH);

	if ((REVINDEXFH = fopen(iindexPath,"wb")) == NULL) {
		perror(iindexPath);
		exit(1);
	}
	
	//printf("sort\n");
	//sorterer på WordID
	//qsort(revIndexArray, count , sizeof(struct revIndexArrayFomat), compare_elements);
	//int mgsort(void *data, int size, int esize, int i, int k, int (*compare) (const void *key1, const void *key2));
	//må ha en stabil sorteringsalgoritme
	//mgsort_i = 0;	
	//mgsort_k = count -1;
	mgsort(revIndexArray, count , sizeof(struct revIndexArrayFomat),compare_elements);

	//teller forkomster av DocID's pr WordID
	lastWordID = 0;
	forekomstnr = 0;
	for(i=0;i<count;i++) {
		if (lastWordID != revIndexArray[i].WordID) {
			nrofDocIDsForWordID[forekomstnr] = 1;
			++forekomstnr;			
		}
		else {
			++nrofDocIDsForWordID[forekomstnr -1];
		}
		lastWordID = revIndexArray[i].WordID;
	}

	lastWordID = 0;
	forekomstnr = 0;
	for(i=0;i<count;i++) {
		
		if (lastWordID != revIndexArray[i].WordID) {
		
			fwrite(&revIndexArray[i].WordID,sizeof(revIndexArray[i].WordID),1,REVINDEXFH);
			fwrite(&nrofDocIDsForWordID[forekomstnr],sizeof(int),1,REVINDEXFH);

			//printf("WordID %u, nr %u\n",revIndexArray[i].WordID,nrofDocIDsForWordID[forekomstnr]);

			
			++forekomstnr;
		}
		lastWordID = revIndexArray[i].WordID;

		//printf("\tDocID %u, nrOfHits %u\n",revIndexArray[i].DocID,revIndexArray[i].nrOfHits);

		//skrive DocID og antall hit vi har
		fwrite(&revIndexArray[i].DocID,sizeof(revIndexArray[i].DocID),1,REVINDEXFH);
		fwrite(&revIndexArray[i].nrOfHits,sizeof(revIndexArray[i].nrOfHits),1,REVINDEXFH);

		//skriver alle hittene		
		for(y=0;y<revIndexArray[i].nrOfHits;y++) {
			//printf("\t\thit %hu\n",revIndexArray[i].hits[y]);
			fwrite(&revIndexArray[i].hits[y],sizeof(short),1,REVINDEXFH);
		}

		//printf("DocID %u, WordID: %u, %u\n",revIndexArray[i].DocID,revIndexArray[i].WordID,revIndexArray[i].nrOfHits);		
	}

	fclose(REVINDEXFH);
	} //else filsjekk
}
Ejemplo n.º 5
0
int main(int argc, char **argv) {

int                iarray[10],
                   marray[10],
                   qarray[10],
                   carray[10],
                   rarray[10];

char               sarray[10][STRSIZ];

int                size = 10;

/*****************************************************************************
*                                                                            *
*  Load the arrays with data to sort.                                        *
*                                                                            *
*****************************************************************************/

iarray[0] = 0;
iarray[1] = 5;
iarray[2] = 1;
iarray[3] = 7;
iarray[4] = 3;
iarray[5] = 2;
iarray[6] = 8;
iarray[7] = 9;
iarray[8] = 4;
iarray[9] = 6;

memcpy(marray, iarray, size * sizeof(int));
memcpy(qarray, iarray, size * sizeof(int));
memcpy(carray, iarray, size * sizeof(int));

rarray[0] = 11111323;
rarray[1] = 99283743;
rarray[2] = 98298383;
rarray[3] = 99987444;
rarray[4] = 43985209;
rarray[5] = 99911110;
rarray[6] = 11111324;
rarray[7] = 39842329;
rarray[8] = 97211029;
rarray[9] = 99272928;

strcpy(sarray[0], "ebcde");
strcpy(sarray[1], "ghidj");
strcpy(sarray[2], "ghiea");
strcpy(sarray[3], "abaae");
strcpy(sarray[4], "abaaa");
strcpy(sarray[5], "abcde");
strcpy(sarray[6], "abbcd");
strcpy(sarray[7], "ddaab");
strcpy(sarray[8], "faeja");
strcpy(sarray[9], "aaaaa");

/*****************************************************************************
*                                                                            *
*  Perform insertion sort.                                                   *
*                                                                            *
*****************************************************************************/

fprintf(stdout, "Before issort\n");
print_idata(iarray, size);

if (issort(iarray, size, sizeof(int), compare_int) != 0)
   return 1;

fprintf(stdout, "After issort\n");
print_idata(iarray, size);

/*****************************************************************************
*                                                                            *
*  Perform quicksort.                                                        *
*                                                                            *
*****************************************************************************/

fprintf(stdout, "Before qksort\n");
print_idata(qarray, size);

if (qksort(qarray, size, sizeof(int), 0, size - 1, compare_int) != 0)
   return 1;

fprintf(stdout, "After qksort\n");
print_idata(qarray, size);

/*****************************************************************************
*                                                                            *
*  Perform merge sort.                                                       *
*                                                                            *
*****************************************************************************/

fprintf(stdout, "Before mgsort\n");
print_sdata(sarray, size);

if (mgsort(sarray, size, STRSIZ, 0, size - 1, compare_str) != 0)
   return 1;

fprintf(stdout, "After mgsort\n");
print_sdata(sarray, size);

/*****************************************************************************
*                                                                            *
*  Perform counting sort.                                                    *
*                                                                            *
*****************************************************************************/

fprintf(stdout, "Before ctsort\n");
print_idata(carray, size);

if (ctsort(carray, size, 10) != 0)
   return 1;

fprintf(stdout, "After ctsort\n");
print_idata(carray, size);

/*****************************************************************************
*                                                                            *
*  Perform radix sort.                                                       *
*                                                                            *
*****************************************************************************/

fprintf(stdout, "Before rxsort\n");
print_idata(rarray, size);

if (rxsort(rarray, size, 8, 10) != 0)
   return 1;

fprintf(stdout, "After rxsort\n");
print_idata(rarray, size);

return 0;

}
Ejemplo n.º 6
0
int main (int argc, char *argv[]) {

    FILE *OLDLINKDBFILE;
    FILE *NEWLINKDBFILE;


    struct stat inode;      // lager en struktur for fstat å returnere.

    struct linkdb_block linkdbPost;
    struct linkdb_block *linkdbArray;
    int i,y;

    //printf("block size %i\n",sizeof(struct linkdb_block));

    if (argc < 3) {
        printf("Dette programet tar inn en linkdb fil og sorterer den.\n\n\tsortLinkdb old new\n\n");
        exit(0);
    }

    printf("sort %s -> %s\n",argv[1],argv[2]);

    if ((OLDLINKDBFILE = fopen(argv[1],"rb")) == NULL) {
        printf("Cant read linkdb ");
        perror(argv[1]);
        exit(1);
    }


    //kontrolerer at vi ikke overskriver en fil
    if ((NEWLINKDBFILE = fopen(argv[2],"rb")) != NULL) {
        printf("New file exsist. It shud not!\n");
        exit(1);
    }
    if ((NEWLINKDBFILE = fopen(argv[2],"wb")) == NULL) {
        printf("Cant read linkdb ");
        perror(argv[2]);
        exit(1);
    }

    fstat(fileno(OLDLINKDBFILE),&inode);
    if ((linkdbArray = malloc(inode.st_size)) == NULL) {
        perror("malloc");
        exit(1);
    }

    i =0;
    while (!feof(OLDLINKDBFILE)) {
        fread(&linkdbArray[i],sizeof(linkdbPost),1,OLDLINKDBFILE);
        //printf("%lu -> %lu\n",linkdbPost.DocID_from,linkdbPost.DocID_to);

        ++i;
    }

    //qsort(linkdbArray, i , sizeof(struct linkdb_block), compare_elements);
    mgsort(linkdbArray, i , sizeof(struct linkdb_block), compare_elements);

    for(y=0; y<i; y++) {
        fwrite(&linkdbArray[y],sizeof(linkdbPost),1,NEWLINKDBFILE);
    }

    fclose(OLDLINKDBFILE);
    fclose(NEWLINKDBFILE);
}
Ejemplo n.º 7
0
static int Indekser(char iindexPath[],struct revIndexArrayFomat revIndexArray[],int lotNr,char type[],int part,char subname[], int optAllowDuplicates) {

	int i,y;
	int mgsort_i,mgsort_k;
	FILE *REVINDEXFH;
	unsigned int nrOfHits;
	unsigned short hit;
	char recordSeperator[4];
	int count;
	char c;
        unsigned int DocID;
	unsigned int lastWordID;
	unsigned int lastDocID;
        //char lang[4];
	unsigned int nrofDocIDsForWordID[revIndexArraySize];
	int forekomstnr;

	#ifdef DEBUG
		//printf("revindexPath \"%s\"\n",revindexPath);
	#endif


	//if ((REVINDEXFH = fopen(revindexPath,"rb")) == NULL) {
	if ((REVINDEXFH = revindexFilesOpenLocalPart(lotNr,type,"rb",subname,part)) == NULL) {
		perror("revindexFilesOpenLocalPart");
		//exit(1);
	}
	else {
	count = 0;
	while ((!feof(REVINDEXFH)) && (count < revIndexArraySize)) {
	


		//så lenge vi ikke går over noen grense.
		//while (count < revIndexArraySize) {

		//her kan vi enten ha record seperator, eller info om treff
		
		

			if (fread(&revIndexArray[count].DocID,sizeof(revIndexArray[count].DocID),1,REVINDEXFH) != 1) {
				#ifdef DEBUG
				//har kommer vi til eof, det er helt normalt
				printf("can't read any more data\n");
				perror("revindex");
				#endif
				break;
			}
			//v3
			fread(&revIndexArray[count].langnr,sizeof(char),1,REVINDEXFH);
			//printf("lang1 %i\n",(int)revIndexArray[count].langnr);


			fread(&revIndexArray[count].WordID,sizeof(revIndexArray[count].WordID),1,REVINDEXFH);
			fread(&revIndexArray[count].nrOfHits,sizeof(revIndexArray[count].nrOfHits),1,REVINDEXFH);

			#ifdef DEBUG
				printf("%i\n",count);
				printf("\tDocID %u lang %i\n",revIndexArray[count].DocID,(int)revIndexArray[count].langnr);
				printf("\tread WordID: %u, nrOfHits %u\n",revIndexArray[count].WordID,revIndexArray[count].nrOfHits);
			#endif

			if (revIndexArray[count].nrOfHits > MaxsHitsInIndex) {
				printf("nrOfHits lager then MaxsHitsInIndex. Nr was %i\n",revIndexArray[count].nrOfHits);
				return 0;
			}

			//leser antal hist vi skulle ha
			fread(&revIndexArray[count].hits,revIndexArray[count].nrOfHits * sizeof(short),1,REVINDEXFH);

			revIndexArray[count].tombstone = 0;
			
			//debug:  hits
			#ifdef DEBUG
			printf("\tread hits: ");
			for (i=0;i<revIndexArray[count].nrOfHits;i++) {
				printf("%hu, ",revIndexArray[count].hits[i]);
			}
			printf("\n");
			#endif
			++count;


		//}
		//}
			
		////hvis vi når grensen
		//if (count == revIndexArraySize) {
		//	printf("revIndexArraySize hit\n");
		//	break;
		//	
		//}

	}

	#ifdef DEBUG
	printf("Documents in index: %i\n",count);
	#endif
	
	//runarb: 17 aug 2007: hvorfor har vi med -- her. Ser ut til at vi da mksiter siste dokumentet. haker ut for nå
	//--count;

	fclose(REVINDEXFH);

	
	//printf("sort\n");
	//sorterer på WordID
	//qsort(revIndexArray, count , sizeof(struct revIndexArrayFomat), compare_elements);
	//int mgsort(void *data, int size, int esize, int i, int k, int (*compare) (const void *key1, const void *key2));
	//må ha en stabil sorteringsalgoritme
	//mgsort_i = 0;	
	//mgsort_k = count -1;
	//mgsort(revIndexArray, count , sizeof(struct revIndexArrayFomat),mgsort_i,mgsort_k,compare_elements);

	mgsort(revIndexArray, count , sizeof(struct revIndexArrayFomat),compare_elements);

	//int mgsort(void *data, int size, int esize, int (*compare) (const void *key1, const void *key2));

	if ((REVINDEXFH = fopen(iindexPath,"wb")) == NULL) {
		perror(iindexPath);
		exit(1);
	}

	//teller forkomster av DocID's pr WordID
	lastWordID = 0;
	forekomstnr = -1;
	lastDocID = 0;
	for(i=0;i<count;i++) {
		#ifdef DEBUG
		printf("WordID: %u, DocID %u\n",revIndexArray[i].WordID,revIndexArray[i].DocID);
		#endif

		if (lastWordID != revIndexArray[i].WordID) {
			++forekomstnr;			
			nrofDocIDsForWordID[forekomstnr] = 0;
			lastDocID = 0;
		}

		if ((optAllowDuplicates == 0) && (revIndexArray[i].DocID == lastDocID)) {
			#ifdef DEBUG
			printf("DocID %u is same as last\n",revIndexArray[i].DocID);
			#endif

			revIndexArray[i -1].tombstone = 1;
		}
		else {
			++nrofDocIDsForWordID[forekomstnr];
		}

		lastWordID = revIndexArray[i].WordID;
		lastDocID = revIndexArray[i].DocID;
	}

	lastWordID = 0;
	forekomstnr = 0;
	for(i=0;i<count;i++) {

		#ifdef DEBUG
		printf("looking at  WordID %u, nr %u\n",revIndexArray[i].WordID,nrofDocIDsForWordID[forekomstnr]);
		#endif

		if (lastWordID != revIndexArray[i].WordID) {

			#ifdef DEBUG
				printf("write WordID %u, nr %u\n",revIndexArray[i].WordID,nrofDocIDsForWordID[forekomstnr]);
			#endif
		
			fwrite(&revIndexArray[i].WordID,sizeof(revIndexArray[i].WordID),1,REVINDEXFH);
			fwrite(&nrofDocIDsForWordID[forekomstnr],sizeof(int),1,REVINDEXFH);

			++forekomstnr;
		}
		lastWordID = revIndexArray[i].WordID;

		//printf("\tDocID %u, nrOfHits %u\n",revIndexArray[i].DocID,revIndexArray[i].nrOfHits);

		//sjekker at dette ikke er en slettet DocID
		if (revIndexArray[i].tombstone) {
			#ifdef DEBUG
				printf("DocID %u is tombstoned\n",revIndexArray[i].DocID);
			#endif
			continue;
		}
		//skrive DocID og antall hit vi har
		fwrite(&revIndexArray[i].DocID,sizeof(revIndexArray[i].DocID),1,REVINDEXFH);
		//v3
		fwrite(&revIndexArray[i].langnr,sizeof(char),1,REVINDEXFH);

		fwrite(&revIndexArray[i].nrOfHits,sizeof(revIndexArray[i].nrOfHits),1,REVINDEXFH);

		//skriver alle hittene		
		for(y=0;y<revIndexArray[i].nrOfHits;y++) {
			#ifdef DEBUG		
				printf("\t\thit %hu\n",revIndexArray[i].hits[y]);
			#endif
			fwrite(&revIndexArray[i].hits[y],sizeof(short),1,REVINDEXFH);
		}
		#ifdef DEBUG
		printf("write: DocID %u, WordID: %u, %u\n",revIndexArray[i].DocID,revIndexArray[i].WordID,revIndexArray[i].nrOfHits);		
		#endif
	}

	fclose(REVINDEXFH);
	} //else filsjekk
}