Пример #1
0
/**
 * Insert the record whose contents is pointed at by recPtr into relation relNum.
 *
 * @param   relNum - Relation number
 * @param   recPtr - A pointer to a record-sized byte array whose contents
 *                   will be copied to an empty record slot in the relation.
 * @return  OK or NOTOK
 *
 * @author nithin
 *
 * GLOBAL VARIABLES MODIFIED:
 *      g_Buffer[relNum]
 *      g_CatCache[relNum]
 *
 * ERRORS REPORTED:
 *      NULL_ARGUMENT_RECEIVED
 *      DUPLICATE_TUPLE
 *
 * ALGORITHM:
 *      1. Check if the record pointed by recPtr already exists in the relation
 *          (only if the check duplicates flag is set)
 *      2. Find the first free slot in the relation by linear search
 *      3. Copy the contents into that slot
 *          (using a loop and not strcpy)
 *      4. Update the dirty bit and slotmap in Buffer
 *      5. Update the dirty bit, numRecs and numPgs in CatCache
 *
 * IMPLEMENTATION NOTES:
 *      Uses ReadPage()
 *
 *
 */
int InsertRec(const int relNum, char*recPtr) {
    if (recPtr == NULL) {
        return ErrorMsgs(NULL_ARGUMENT_RECEIVED, g_PrintFlag);
    }

    /* Checking for duplicates */
    Rid *fRid, sRid = { 0, 0 };
    char *record;
    while (GetNextRec(relNum, &sRid, &fRid, &record) == OK && g_CheckDuplicateTuples == OK) {
        if (compareRecords(record, recPtr, g_CatCache[relNum].recLength) == OK) {
            return ErrorMsgs(DUPLICATE_TUPLE, g_PrintFlag);
        }
        sRid = *fRid;
        free(fRid);
    }

    Rid startRid = { 1, 0 }, foundRid;
    /* Insert record    */
    getNextFreeSlot(relNum, startRid, &foundRid);
    ReadPage(relNum, foundRid.pid);
    unsigned int recLength = g_CatCache[relNum].recLength;
    int i, j;
    int offset = (foundRid.slotnum - 1) * recLength;
    for (i = offset, j = 0; j < recLength; ++i, j++) {
        g_Buffer[relNum].page.contents[i] = recPtr[j];
    }

    /*  Update dirty bits and slotmap*/
    g_Buffer[relNum].dirty = TRUE;
    g_Buffer[relNum].page.slotmap = (g_Buffer[relNum].page.slotmap | 1 << (32 - foundRid.slotnum));

    /*  Update numRecs in catCache*/
    g_CatCache[relNum].dirty = TRUE;
    g_CatCache[relNum].numRecs++;
    g_CatCache[relNum].numPgs =
            g_CatCache[relNum].numPgs > foundRid.pid ? g_CatCache[relNum].numPgs : foundRid.pid;

    return OK;
}
Пример #2
0
void MergeJoin (char *infile1, char *infile2, unsigned char field, block_t *buffer, unsigned int nmem_blocks, char *outfile, unsigned int *nres, unsigned int *nios){
    //memSize of buffer is -2 cause 2 last blocks is used for, one reading block from big file and two writing to output
    int memSize = nmem_blocks - 2;
    //get sizes of files
    int infile1Size = getSize(infile1);
    int infile2Size = getSize(infile2);

    unsigned int noneed1=0, noneed2=0, ios=0;
    *nres = 0;
    *nios = 0;

    FILE *out = fopen(outfile, "ab");

    char outfile1[] = "outfile1.bin";
    char outfile2[] = "outfile2.bin";



    MergeSort(infile1, 1, buffer, nmem_blocks, outfile1, &noneed1, &noneed2, &ios);
    (*nios) += ios;

    MergeSort(infile2, 1, buffer, nmem_blocks, outfile2, &noneed1, &noneed2, &ios);
    (*nios) += ios;

    //if file1 is bigger switch files cause next we assume that file1 is the small one.
    if(infile1Size > infile2Size){
        char temp0 = outfile1[7];
        outfile1[7] = outfile2[7];
        outfile2[7] = temp0;
        int temp = infile1Size;
        infile1Size = infile2Size;
        infile2Size = temp;
    }
    FILE *input1 = fopen(outfile1, "rb");
    FILE *input2 = fopen(outfile2, "rb");
    ////printfile(outfile1);
    ////printfile(outfile2);

    block_t *bigFileBlock = buffer + memSize;
    block_t *outputBlock = buffer + memSize + 1;
    (*outputBlock).blockid = 0;
    //offset that changes every time we need new block from big file to check
    int bigFileBlockOffset=0;
    //counts the records of buffer that has same value of a record of bigFileBlock
    int countSameBufferEntries=0;
    //number of blocks in big file. Useful to end the main loop.
    int blocks = infile2Size - 1;
    //printf("%d", blocks);
    //this will represent the id of the first block of the buffer
    int firstBlockId = 0;
    int lastBlockId = memSize-1;
    //at first we read first blocks of small and big file so we have something to compare in first loop
    (*nios) += readBuffer(buffer, input1, 0, memSize);
    (*nios) += readBlock(bigFileBlock, input2, 0);
    recordPos bufferRecPos = getRecordPos(0);
    record_t tempBufferRec;
    int tempBufferBlock=0;
    record_t bigFileBlockRec;
    while(blocks>0){
        /*
        General:
            if(bufferRecPos.block%memSize==firstBlockId%memSize)
                this condition checks if we have reached i circle in the buffer.

            firstBlockId%memSize
                defines the first block of the buffer and makes it easy to replace it with another block if necessary with:
                    buffer + firstBlockId%memSize



        */

        for(int blockEntrie=0; blockEntrie<MAX_RECORDS_PER_BLOCK; blockEntrie++){

            bigFileBlockRec = (*bigFileBlock).entries[blockEntrie];

            if(compareRecords(tempBufferRec, bigFileBlockRec, field)==0){
                //Here we  have to go back to the block and record of tempBufferRec.
                for(int i=0; i<countSameBufferEntries; i++){
                    decr(bufferRecPos);
                    ////printf("%d\n", bufferRecPos.block);
                }
                int i=0;//keeps i for load2
                int load, load2;
                if(countSameBufferEntries/memSize > memSize){
                    load = memSize;
                    load2=0;
                }else{
                    load = countSameBufferEntries/memSize;
                    load2 = countSameBufferEntries%memSize;
                }
                for( ;i<load; i++){
                    (*nios) += readBlock(buffer + (tempBufferBlock + i) % memSize, input1, tempBufferBlock + i);
                }
                if(load2!=0){//we have to read one more block.
                    (*nios) += readBlock(buffer + (tempBufferBlock + i) % memSize, input1, tempBufferBlock + i);
                }
                //return to firstBlockID and lastBlockID their previous values
                firstBlockId = tempBufferBlock ;
                lastBlockId = firstBlockId + memSize - 1;
            }

            while(compareRecords(getRecord(buffer, bufferRecPos), bigFileBlockRec, field) < 0){
                //Here we have to pass the records in the small file that are smaller than the bigFileRec
                incr(bufferRecPos);
                if (bufferRecPos.record == 0) {
                     if(bufferRecPos.block%memSize==firstBlockId%memSize){
                        if (lastBlockId < infile1Size - 1) {
                            (*nios) += readBlock(buffer + firstBlockId%memSize, input1, lastBlockId + 1);
                            firstBlockId += 1;
                            lastBlockId += 1;
                        }else{
                            blocks=0;//No point to continue merging as all next records are greater than the last of buffer.
                            break;
                        }
                    }
                }
            }

            if(compareRecords(getRecord(buffer, bufferRecPos), bigFileBlockRec, field) > 0){
                    continue;//...
            }


            tempBufferRec = getRecord(buffer, bufferRecPos);
            tempBufferBlock = bufferRecPos.block;
            countSameBufferEntries=0;

            while(compareRecords(getRecord(buffer, bufferRecPos), bigFileBlockRec, field)==0){
                //Here we add in output the merges.
                (*outputBlock).entries[(*outputBlock).nreserved++] = bigFileBlockRec;
                (*outputBlock).entries[(*outputBlock).nreserved++] = getRecord(buffer, bufferRecPos);
                (*nres)++;
                if ((*outputBlock).nreserved == MAX_RECORDS_PER_BLOCK) {
                    (*nios) += writeBlock(out, outputBlock);
                    emptyBlock(outputBlock);
                    (*outputBlock).blockid += 1;
                }

                countSameBufferEntries++;
                incr(bufferRecPos);

                if(bufferRecPos.record==0){
                    if(bufferRecPos.block%memSize==firstBlockId%memSize){
                        if (lastBlockId < infile1Size - 1) {
                            (*nios) += readBlock(buffer + firstBlockId%memSize, input1, lastBlockId + 1);
                            firstBlockId++;
                            lastBlockId++;
                        }else {//take always the same value because it is the last value to compare with last big file's blocks
                            if (bufferRecPos.block == 0) {
                                bufferRecPos.block = memSize - 1;
                            } else {
                                bufferRecPos.block -= 1;
                            }
                            bufferRecPos.record = MAX_RECORDS_PER_BLOCK - 1;
                            break;
                        }
                    }
                }
                ////printf("fsdfds");
            }


        }
        //records in bigFileBlock are over and we read the next one.
        bigFileBlockOffset++;
        blocks--;
        (*nios) += readBlock(bigFileBlock, input2, bigFileBlockOffset);
    }








}
uint mergeElimination(int &input, int &output, block_t *buffer, uint memSize, uint segsToMerge, uint *blocksLeft, uint segmentSize, uint firstSegOffset, unsigned char field, bool lastPass, bool lastMergeOfPass, uint *nunique) {
    uint ios = 0;
    block_t *bufferOut = buffer + memSize;
    uint blocksWritten = 0;
    uint sizeOfLastSeg;
    if (lastMergeOfPass) {
        sizeOfLastSeg = blocksLeft[segsToMerge - 1] + 1;
    }
    // holds the last unique value written to the output
    record_t *lastRecordAdded = NULL;

    recordPtr *nextRecord = (recordPtr*) malloc(segsToMerge * sizeof (recordPtr));
    for (uint i = 0; i < segsToMerge; i++) {
        nextRecord[i].block = i;
        nextRecord[i].record = 0;
    }
    emptyBlock(bufferOut);
    (*bufferOut).blockid = 0;

    uint segsToMergeCopy = segsToMerge;
    while (segsToMergeCopy != 0) {
        uint i;
        for (i = 0; i < segsToMerge; i++) {
            if (buffer[i].valid) {
                break;
            }
        }
        record_t minRec = getRecord(buffer, nextRecord[i]);
        uint minBuffIndex = i;

        for (uint j = i + 1; j < segsToMerge; j++) {
            if (buffer[j].valid && compareRecords(getRecord(buffer, nextRecord[j]), minRec, field) < 0) {
                minRec = getRecord(buffer, nextRecord[j]);
                minBuffIndex = j;
            }
        }

        if (!lastPass) {
            (*bufferOut).entries[(*bufferOut).nreserved++] = minRec;
        } else {
            if (!lastRecordAdded) {
                (*bufferOut).entries[(*bufferOut).nreserved++] = minRec;
                (*nunique) += 1;
                lastRecordAdded = (record_t*) malloc(sizeof (record_t));
                memcpy(lastRecordAdded, &minRec, sizeof (record_t));
            } else {
                if (compareRecords(*lastRecordAdded, minRec, field) != 0) {
                    (*bufferOut).entries[(*bufferOut).nreserved++] = minRec;
                    (*nunique) += 1;
                    memcpy(lastRecordAdded, &minRec, sizeof (record_t));
                }
            }
        }

        if ((*bufferOut).nreserved == MAX_RECORDS_PER_BLOCK) {
            ios += writeBlocks(output, bufferOut, 1);
            (*bufferOut).blockid += 1;
            blocksWritten += 1;
            emptyBlock(bufferOut);
        }

        incr(nextRecord[minBuffIndex]);

        if (nextRecord[minBuffIndex].record == 0) {
            nextRecord[minBuffIndex].block -= 1;
            if (blocksLeft[minBuffIndex] > 0) {
                uint blockOffset;
                if (lastMergeOfPass && minBuffIndex == segsToMerge - 1) {
                    blockOffset = firstSegOffset + segmentSize * minBuffIndex + sizeOfLastSeg - blocksLeft[minBuffIndex];
                } else {
                    blockOffset = firstSegOffset + segmentSize * minBuffIndex + segmentSize - blocksLeft[minBuffIndex];
                }
                ios += preadBlocks(input, buffer + minBuffIndex, blockOffset, 1);
                blocksLeft[minBuffIndex] -= 1;
                if (!buffer[minBuffIndex].valid) {
                    segsToMergeCopy -= 1;
                }
            } else {
                buffer[minBuffIndex].valid = false;
                segsToMergeCopy -= 1;
            }
        } else {
            if (!getRecord(buffer, nextRecord[minBuffIndex]).valid) {
                buffer[minBuffIndex].valid = false;
                segsToMergeCopy -= 1;
            }
        }
    }
    free(nextRecord);
    if (lastRecordAdded) {
        free(lastRecordAdded);
    }

    if ((*bufferOut).nreserved != 0) {
        ios += writeBlocks(output, bufferOut, 1);
        (*bufferOut).blockid += 1;
        blocksWritten += 1;
    }

    if (!lastPass && !lastMergeOfPass) {
        for (uint i = 0; i < segmentSize * segsToMerge - blocksWritten; i++) {
            ios += writeBlocks(output, buffer, 1);
        }
    }
    return ios;
}
/*
 * infile: input filename
 * size: size in blocks of input file
 * outfile: output filename
 * field: which field will be used for sorting
 * buffer: the buffer that is used
 * memSize: number of buffer blocks available for use, without counting the last one, which is for output
 * nunique: number of unique values
 * nios: number of ios
 * 
 * when the input file fits the buffer and there's still a block available for output,
 * hashes each record and writes it to the output, if a record of same value is not
 * found on the corresponding bucket.
 */
void hashElimination(char *infile, uint size, char *outfile, unsigned char field, block_t *buffer, uint memSize, uint *nunique, uint *nios) {
    int out = open(outfile, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU);
    block_t *bufferOut = buffer + memSize;
    emptyBlock(bufferOut);
    (*bufferOut).valid = true;
    (*bufferOut).blockid = 0;

    (*nunique) = 0;
    (*nios) += readBlocks(infile, buffer, size);

    // creates a hash index. for each value returned from the hash function,
    // there is a linkedList of pointers to the records with that specific hash
    // value
    uint hashSize = size*MAX_RECORDS_PER_BLOCK;
    linkedRecordPtr **hashIndex = (linkedRecordPtr**) malloc(hashSize * sizeof (linkedRecordPtr*));
    for (uint i = 0; i < hashSize; i++) {
        hashIndex[i] = NULL;
    }

    recordPtr start = newPtr(0);
    recordPtr end = newPtr(size * MAX_RECORDS_PER_BLOCK - 1);

    for (; start <= end; incr(start)) {
        if (!buffer[start.block].valid) {
            start.record = MAX_RECORDS_PER_BLOCK - 1;
            continue;
        }
        record_t record = getRecord(buffer, start);
        if (record.valid) {
            // hashes the record being examined
            uint index = hashRecord(infile, record, hashSize, field);
            linkedRecordPtr *element = hashIndex[index];
            // goes through the linked list for the hash value of the record
            // if a record with same value is not found, then a recordPtr is
            // added to the linked list and the record itself is written to
            // the output. otherwise, it is ignored.
            while (element) {
                if (compareRecords(record, getRecord(buffer, element->ptr), field) == 0) {
                    break;
                }
                element = element->next;
            }
            if (!element) {
                element = (linkedRecordPtr*) malloc(sizeof (linkedRecordPtr));
                element->ptr = start;
                element->next = hashIndex[index];
                hashIndex[index] = element;
                (*bufferOut).entries[(*bufferOut).nreserved++] = record;
                (*nunique) += 1;
                if ((*bufferOut).nreserved == MAX_RECORDS_PER_BLOCK) {
                    (*nios) += writeBlocks(out, bufferOut, 1);
                    emptyBlock(bufferOut);
                    (*bufferOut).blockid += 1;
                }
            }
        }
    }
    // writes records left in buffer to the outfile
    if ((*bufferOut).nreserved != 0) {
        (*nios) += writeBlocks(out, bufferOut, 1);
    }
    destroyHashIndex(hashIndex, size);
    close(out);
}
/*
 * infile: filename of the input file
 * outfile: filename of the output file
 * field: which field will be used for sorting
 * buffer: the buffer used
 * nmem_blocks: size of buffer
 * nunique: number of unique values
 * nios: number of ios
 * 
 * when the input file size is equal to buffer, the whole file is loaded and
 * sorted. then the first block is used as output where only unique values are
 * written
 */
void useFirstBlock(char *infile, char *outfile, unsigned char field, block_t *buffer, uint nmem_blocks, uint *nunique, uint *nios) {
    int out = open(outfile, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU);
    (*nios) += readBlocks(infile, buffer, nmem_blocks);
    if (sortBuffer(buffer, nmem_blocks, field)) {
        // all the unique values of the first block are shifted to the start
        // of it. the rest are marked as invalid
        recordPtr i = newPtr(1);
        recordPtr j = newPtr(1);
        (*nunique) += 1;
        buffer[0].nreserved = 1;
        for (; j.block < 1; incr(j)) {
            record_t record = getRecord(buffer, j);
            if (record.valid && compareRecords(record, getRecord(buffer, i - 1), field) != 0) {
                setRecord(buffer, record, i);
                (*nunique) += 1;
                incr(i);
                buffer[0].nreserved += 1;
            }
        }

        j = newPtr(i, 0);
        for (; j.block < 1; incr(j)) {
            buffer[j.block].entries[j.record].valid = false;
        }

        record_t *lastRecordAdded = (record_t*) malloc(sizeof (record_t));
        record_t lastUnique = getRecord(buffer, i - 1);
        memcpy(lastRecordAdded, &lastUnique, sizeof (record_t));
        // if the first block is full after the shifting (meaning that all its
        // values were actually unique), writes it to the outfile and empties it
        if (buffer[0].nreserved == MAX_RECORDS_PER_BLOCK) {
            i.block -= 1;
            (*nios) += writeBlocks(out, buffer, 1);
            emptyBlock(buffer);
            buffer[0].blockid += 1;
        }

        // write the unique values of the other blocks to the first one. if it
        // becomes full writes it to outfile and empties it. at the end, if it
        // has records not writtend yet, writes them to the outfile as well.
        j = newPtr(MAX_RECORDS_PER_BLOCK);
        while (buffer[j.block].valid && j.block < nmem_blocks) {
            record_t record = getRecord(buffer, j);
            if (!record.valid) {
                break;
            }
            if (compareRecords(record, (*lastRecordAdded), field) != 0) {
                setRecord(buffer, record, i);
                memcpy(lastRecordAdded, &record, sizeof (record_t));
                (*nunique) += 1;
                incr(i);
                buffer[0].nreserved += 1;
            }
            if (buffer[0].nreserved == MAX_RECORDS_PER_BLOCK) {
                i.block -= 1;
                (*nios) += writeBlocks(out, buffer, 1);
                emptyBlock(buffer);
                buffer[0].blockid += 1;
            }
            incr(j);
        }
        if (buffer[0].nreserved != 0) {
            (*nios) += writeBlocks(out, buffer, 1);
        }
        free(lastRecordAdded);
    }
    close(out);
}