/* -------------------------------- mgsort ------------------------------- */ int mgsort(void *data, int size, int esize, int i, int k, int (*compare) (const void *key1, const void *key2)) { int j; /* Stop the recursion when no more divisions can be made */ if (i < k) { /* Determine where to divide the elements */ j = (int)(((i + k - 1)) / 2); /* Recursively sort the two divisions */ if (mgsort(data, size, esize, i, j, compare) < 0) return -1; if (mgsort(data, size, esize, j + 1, k, compare) < 0) return -1; /* Merge the two sorted divisions into a single sorted set */ if (merge(data, esize, i, j, k, compare) < 0) return -1; } return 0; }
TEST(Sort_Merge, mgsort) { int size = 5; int i; int result; int *data = (int *)malloc(sizeof(int) * size); srand(time(NULL)); for (i = 0; i < size; ++i) data[i] = 5 - i; #ifdef DEBUG print_array(data, size); #endif result = mgsort(data, size, sizeof(int), 0, size - 1, compare); EXPECT_EQ(0, result); for (i = 0; i < size - 1; ++i) EXPECT_TRUE(data[i] <= data[i + 1]); #ifdef DEBUG print_array(data, size); #endif }
int mgsort(void *data, int size, int esize, int i, int k, int (*compare)(const void *key1, const void *key2)) { int j; if (i < k) { j = (int)((i + k -1) / 2); if (mgsort(data, size, esize, i, j, compare) < 0) return -1; if (mgsort(data, size, esize, j + 1, k, compare) < 0) return -1; if (merge(data, esize, i, j, k, compare) < 0) return -1; } return 0; }
int Indekser(char revindexPath[],char iindexPath[],struct revIndexArrayFomat revIndexArray[]) { int i,y; //int mgsort_i,mgsort_k; FILE *REVINDEXFH; unsigned int nrOfHits; unsigned short hit; char recordSeperator[4]; int count; char c; unsigned int DocID; unsigned int lastWordID; char lang[4]; unsigned int nrofDocIDsForWordID[revIndexArraySize]; int forekomstnr; if ((REVINDEXFH = fopen(revindexPath,"rb")) == NULL) { perror(revindexPath); //exit(1); } else { count = 0; while (!feof(REVINDEXFH)) { fread(&DocID,sizeof(DocID),1,REVINDEXFH); fread(lang,sizeof(lang) -1,1,REVINDEXFH); lang[3] = '\0'; //printf("read DocID %u, lang \"%s\"\n",DocID,lang); /* vi kan ha DocID,Lang,recordseperator, uten noen etterfølgende hit. For ådetektere det må vi lese rc så søke ilbake. lite effektift, må */ //så lenge vi ikke går over noen grense. //blir som regel avslutte med break, npr ci nåe record seperator while (count < revIndexArraySize) { //her kan vi enten ha record seperator, eller info om treff fread(recordSeperator,sizeof(recordSeperator) -1,1,REVINDEXFH); if ((recordSeperator[0] == '*') && (recordSeperator[1] == '*') && (recordSeperator[2] == '\n')) { //record seperator. Avslutter denne dokiden break; } else { //nå record revIndexArray[count].DocID = DocID; memcpy(revIndexArray[count].lang,lang,sizeof(lang) -1); //leste jo 3 tegn for å lete etter record seperator. Må nå gå tilbake fseek(REVINDEXFH,-3,SEEK_CUR); fread(&revIndexArray[count].WordID,sizeof(revIndexArray[count].WordID),1,REVINDEXFH); fread(&revIndexArray[count].nrOfHits,sizeof(revIndexArray[count].nrOfHits),1,REVINDEXFH); //printf("\tWordID: %u, %u: ",revIndexArray[count].WordID,revIndexArray[count].nrOfHits); if (revIndexArray[count].nrOfHits > MaxsHitsInIndex) { printf("nrOfHits lager then MaxsHitsInIndex. Nr was %i for %s\n",revIndexArray[count].nrOfHits,revindexPath); return 0; } //leser antal hist vi skulle ha fread(&revIndexArray[count].hits,revIndexArray[count].nrOfHits * sizeof(short),1,REVINDEXFH); //debug: hits /* for (i=0;i<revIndexArray[count].nrOfHits;i++) { printf("%hu, ",revIndexArray[count].hits[i]); } printf("\n"); */ ++count; } } //hvis vi når grensen if (count == revIndexArraySize) { printf("revIndexArraySize hit\n"); break; } } --count; fclose(REVINDEXFH); if ((REVINDEXFH = fopen(iindexPath,"wb")) == NULL) { perror(iindexPath); exit(1); } //printf("sort\n"); //sorterer på WordID //qsort(revIndexArray, count , sizeof(struct revIndexArrayFomat), compare_elements); //int mgsort(void *data, int size, int esize, int i, int k, int (*compare) (const void *key1, const void *key2)); //må ha en stabil sorteringsalgoritme //mgsort_i = 0; //mgsort_k = count -1; mgsort(revIndexArray, count , sizeof(struct revIndexArrayFomat),compare_elements); //teller forkomster av DocID's pr WordID lastWordID = 0; forekomstnr = 0; for(i=0;i<count;i++) { if (lastWordID != revIndexArray[i].WordID) { nrofDocIDsForWordID[forekomstnr] = 1; ++forekomstnr; } else { ++nrofDocIDsForWordID[forekomstnr -1]; } lastWordID = revIndexArray[i].WordID; } lastWordID = 0; forekomstnr = 0; for(i=0;i<count;i++) { if (lastWordID != revIndexArray[i].WordID) { fwrite(&revIndexArray[i].WordID,sizeof(revIndexArray[i].WordID),1,REVINDEXFH); fwrite(&nrofDocIDsForWordID[forekomstnr],sizeof(int),1,REVINDEXFH); //printf("WordID %u, nr %u\n",revIndexArray[i].WordID,nrofDocIDsForWordID[forekomstnr]); ++forekomstnr; } lastWordID = revIndexArray[i].WordID; //printf("\tDocID %u, nrOfHits %u\n",revIndexArray[i].DocID,revIndexArray[i].nrOfHits); //skrive DocID og antall hit vi har fwrite(&revIndexArray[i].DocID,sizeof(revIndexArray[i].DocID),1,REVINDEXFH); fwrite(&revIndexArray[i].nrOfHits,sizeof(revIndexArray[i].nrOfHits),1,REVINDEXFH); //skriver alle hittene for(y=0;y<revIndexArray[i].nrOfHits;y++) { //printf("\t\thit %hu\n",revIndexArray[i].hits[y]); fwrite(&revIndexArray[i].hits[y],sizeof(short),1,REVINDEXFH); } //printf("DocID %u, WordID: %u, %u\n",revIndexArray[i].DocID,revIndexArray[i].WordID,revIndexArray[i].nrOfHits); } fclose(REVINDEXFH); } //else filsjekk }
int main(int argc, char **argv) { int iarray[10], marray[10], qarray[10], carray[10], rarray[10]; char sarray[10][STRSIZ]; int size = 10; /***************************************************************************** * * * Load the arrays with data to sort. * * * *****************************************************************************/ iarray[0] = 0; iarray[1] = 5; iarray[2] = 1; iarray[3] = 7; iarray[4] = 3; iarray[5] = 2; iarray[6] = 8; iarray[7] = 9; iarray[8] = 4; iarray[9] = 6; memcpy(marray, iarray, size * sizeof(int)); memcpy(qarray, iarray, size * sizeof(int)); memcpy(carray, iarray, size * sizeof(int)); rarray[0] = 11111323; rarray[1] = 99283743; rarray[2] = 98298383; rarray[3] = 99987444; rarray[4] = 43985209; rarray[5] = 99911110; rarray[6] = 11111324; rarray[7] = 39842329; rarray[8] = 97211029; rarray[9] = 99272928; strcpy(sarray[0], "ebcde"); strcpy(sarray[1], "ghidj"); strcpy(sarray[2], "ghiea"); strcpy(sarray[3], "abaae"); strcpy(sarray[4], "abaaa"); strcpy(sarray[5], "abcde"); strcpy(sarray[6], "abbcd"); strcpy(sarray[7], "ddaab"); strcpy(sarray[8], "faeja"); strcpy(sarray[9], "aaaaa"); /***************************************************************************** * * * Perform insertion sort. * * * *****************************************************************************/ fprintf(stdout, "Before issort\n"); print_idata(iarray, size); if (issort(iarray, size, sizeof(int), compare_int) != 0) return 1; fprintf(stdout, "After issort\n"); print_idata(iarray, size); /***************************************************************************** * * * Perform quicksort. * * * *****************************************************************************/ fprintf(stdout, "Before qksort\n"); print_idata(qarray, size); if (qksort(qarray, size, sizeof(int), 0, size - 1, compare_int) != 0) return 1; fprintf(stdout, "After qksort\n"); print_idata(qarray, size); /***************************************************************************** * * * Perform merge sort. * * * *****************************************************************************/ fprintf(stdout, "Before mgsort\n"); print_sdata(sarray, size); if (mgsort(sarray, size, STRSIZ, 0, size - 1, compare_str) != 0) return 1; fprintf(stdout, "After mgsort\n"); print_sdata(sarray, size); /***************************************************************************** * * * Perform counting sort. * * * *****************************************************************************/ fprintf(stdout, "Before ctsort\n"); print_idata(carray, size); if (ctsort(carray, size, 10) != 0) return 1; fprintf(stdout, "After ctsort\n"); print_idata(carray, size); /***************************************************************************** * * * Perform radix sort. * * * *****************************************************************************/ fprintf(stdout, "Before rxsort\n"); print_idata(rarray, size); if (rxsort(rarray, size, 8, 10) != 0) return 1; fprintf(stdout, "After rxsort\n"); print_idata(rarray, size); return 0; }
int main (int argc, char *argv[]) { FILE *OLDLINKDBFILE; FILE *NEWLINKDBFILE; struct stat inode; // lager en struktur for fstat å returnere. struct linkdb_block linkdbPost; struct linkdb_block *linkdbArray; int i,y; //printf("block size %i\n",sizeof(struct linkdb_block)); if (argc < 3) { printf("Dette programet tar inn en linkdb fil og sorterer den.\n\n\tsortLinkdb old new\n\n"); exit(0); } printf("sort %s -> %s\n",argv[1],argv[2]); if ((OLDLINKDBFILE = fopen(argv[1],"rb")) == NULL) { printf("Cant read linkdb "); perror(argv[1]); exit(1); } //kontrolerer at vi ikke overskriver en fil if ((NEWLINKDBFILE = fopen(argv[2],"rb")) != NULL) { printf("New file exsist. It shud not!\n"); exit(1); } if ((NEWLINKDBFILE = fopen(argv[2],"wb")) == NULL) { printf("Cant read linkdb "); perror(argv[2]); exit(1); } fstat(fileno(OLDLINKDBFILE),&inode); if ((linkdbArray = malloc(inode.st_size)) == NULL) { perror("malloc"); exit(1); } i =0; while (!feof(OLDLINKDBFILE)) { fread(&linkdbArray[i],sizeof(linkdbPost),1,OLDLINKDBFILE); //printf("%lu -> %lu\n",linkdbPost.DocID_from,linkdbPost.DocID_to); ++i; } //qsort(linkdbArray, i , sizeof(struct linkdb_block), compare_elements); mgsort(linkdbArray, i , sizeof(struct linkdb_block), compare_elements); for(y=0; y<i; y++) { fwrite(&linkdbArray[y],sizeof(linkdbPost),1,NEWLINKDBFILE); } fclose(OLDLINKDBFILE); fclose(NEWLINKDBFILE); }
static int Indekser(char iindexPath[],struct revIndexArrayFomat revIndexArray[],int lotNr,char type[],int part,char subname[], int optAllowDuplicates) { int i,y; int mgsort_i,mgsort_k; FILE *REVINDEXFH; unsigned int nrOfHits; unsigned short hit; char recordSeperator[4]; int count; char c; unsigned int DocID; unsigned int lastWordID; unsigned int lastDocID; //char lang[4]; unsigned int nrofDocIDsForWordID[revIndexArraySize]; int forekomstnr; #ifdef DEBUG //printf("revindexPath \"%s\"\n",revindexPath); #endif //if ((REVINDEXFH = fopen(revindexPath,"rb")) == NULL) { if ((REVINDEXFH = revindexFilesOpenLocalPart(lotNr,type,"rb",subname,part)) == NULL) { perror("revindexFilesOpenLocalPart"); //exit(1); } else { count = 0; while ((!feof(REVINDEXFH)) && (count < revIndexArraySize)) { //så lenge vi ikke går over noen grense. //while (count < revIndexArraySize) { //her kan vi enten ha record seperator, eller info om treff if (fread(&revIndexArray[count].DocID,sizeof(revIndexArray[count].DocID),1,REVINDEXFH) != 1) { #ifdef DEBUG //har kommer vi til eof, det er helt normalt printf("can't read any more data\n"); perror("revindex"); #endif break; } //v3 fread(&revIndexArray[count].langnr,sizeof(char),1,REVINDEXFH); //printf("lang1 %i\n",(int)revIndexArray[count].langnr); fread(&revIndexArray[count].WordID,sizeof(revIndexArray[count].WordID),1,REVINDEXFH); fread(&revIndexArray[count].nrOfHits,sizeof(revIndexArray[count].nrOfHits),1,REVINDEXFH); #ifdef DEBUG printf("%i\n",count); printf("\tDocID %u lang %i\n",revIndexArray[count].DocID,(int)revIndexArray[count].langnr); printf("\tread WordID: %u, nrOfHits %u\n",revIndexArray[count].WordID,revIndexArray[count].nrOfHits); #endif if (revIndexArray[count].nrOfHits > MaxsHitsInIndex) { printf("nrOfHits lager then MaxsHitsInIndex. Nr was %i\n",revIndexArray[count].nrOfHits); return 0; } //leser antal hist vi skulle ha fread(&revIndexArray[count].hits,revIndexArray[count].nrOfHits * sizeof(short),1,REVINDEXFH); revIndexArray[count].tombstone = 0; //debug: hits #ifdef DEBUG printf("\tread hits: "); for (i=0;i<revIndexArray[count].nrOfHits;i++) { printf("%hu, ",revIndexArray[count].hits[i]); } printf("\n"); #endif ++count; //} //} ////hvis vi når grensen //if (count == revIndexArraySize) { // printf("revIndexArraySize hit\n"); // break; // //} } #ifdef DEBUG printf("Documents in index: %i\n",count); #endif //runarb: 17 aug 2007: hvorfor har vi med -- her. Ser ut til at vi da mksiter siste dokumentet. haker ut for nå //--count; fclose(REVINDEXFH); //printf("sort\n"); //sorterer på WordID //qsort(revIndexArray, count , sizeof(struct revIndexArrayFomat), compare_elements); //int mgsort(void *data, int size, int esize, int i, int k, int (*compare) (const void *key1, const void *key2)); //må ha en stabil sorteringsalgoritme //mgsort_i = 0; //mgsort_k = count -1; //mgsort(revIndexArray, count , sizeof(struct revIndexArrayFomat),mgsort_i,mgsort_k,compare_elements); mgsort(revIndexArray, count , sizeof(struct revIndexArrayFomat),compare_elements); //int mgsort(void *data, int size, int esize, int (*compare) (const void *key1, const void *key2)); if ((REVINDEXFH = fopen(iindexPath,"wb")) == NULL) { perror(iindexPath); exit(1); } //teller forkomster av DocID's pr WordID lastWordID = 0; forekomstnr = -1; lastDocID = 0; for(i=0;i<count;i++) { #ifdef DEBUG printf("WordID: %u, DocID %u\n",revIndexArray[i].WordID,revIndexArray[i].DocID); #endif if (lastWordID != revIndexArray[i].WordID) { ++forekomstnr; nrofDocIDsForWordID[forekomstnr] = 0; lastDocID = 0; } if ((optAllowDuplicates == 0) && (revIndexArray[i].DocID == lastDocID)) { #ifdef DEBUG printf("DocID %u is same as last\n",revIndexArray[i].DocID); #endif revIndexArray[i -1].tombstone = 1; } else { ++nrofDocIDsForWordID[forekomstnr]; } lastWordID = revIndexArray[i].WordID; lastDocID = revIndexArray[i].DocID; } lastWordID = 0; forekomstnr = 0; for(i=0;i<count;i++) { #ifdef DEBUG printf("looking at WordID %u, nr %u\n",revIndexArray[i].WordID,nrofDocIDsForWordID[forekomstnr]); #endif if (lastWordID != revIndexArray[i].WordID) { #ifdef DEBUG printf("write WordID %u, nr %u\n",revIndexArray[i].WordID,nrofDocIDsForWordID[forekomstnr]); #endif fwrite(&revIndexArray[i].WordID,sizeof(revIndexArray[i].WordID),1,REVINDEXFH); fwrite(&nrofDocIDsForWordID[forekomstnr],sizeof(int),1,REVINDEXFH); ++forekomstnr; } lastWordID = revIndexArray[i].WordID; //printf("\tDocID %u, nrOfHits %u\n",revIndexArray[i].DocID,revIndexArray[i].nrOfHits); //sjekker at dette ikke er en slettet DocID if (revIndexArray[i].tombstone) { #ifdef DEBUG printf("DocID %u is tombstoned\n",revIndexArray[i].DocID); #endif continue; } //skrive DocID og antall hit vi har fwrite(&revIndexArray[i].DocID,sizeof(revIndexArray[i].DocID),1,REVINDEXFH); //v3 fwrite(&revIndexArray[i].langnr,sizeof(char),1,REVINDEXFH); fwrite(&revIndexArray[i].nrOfHits,sizeof(revIndexArray[i].nrOfHits),1,REVINDEXFH); //skriver alle hittene for(y=0;y<revIndexArray[i].nrOfHits;y++) { #ifdef DEBUG printf("\t\thit %hu\n",revIndexArray[i].hits[y]); #endif fwrite(&revIndexArray[i].hits[y],sizeof(short),1,REVINDEXFH); } #ifdef DEBUG printf("write: DocID %u, WordID: %u, %u\n",revIndexArray[i].DocID,revIndexArray[i].WordID,revIndexArray[i].nrOfHits); #endif } fclose(REVINDEXFH); } //else filsjekk }