bool lemur::index::InvDocList::addTerm(lemur::api::DOCID_T docid) { if (READ_ONLY) return false; // check that we can add at all if (size == 0) return false; // check to see if it's a new document if (docid == *lastid) { (*freq)++; } else { //get more mem if needed if ((end-begin+2)*LOC_Tsize > size) { if (!getMoreMem()) return false; } lastid = end; *lastid = docid; freq = lastid+1; *freq = 1; end = freq+1; df++; } return true; }
void *malloc(int size) { t_header *tmp; int total_size; if (size <= 0) return (NULL); lock_thread(); if ((size % sizeof(int)) != 0) size += (sizeof(int) - (size % sizeof(int))); if ((tmp = (t_header *)findFreeBlock(size)) == NULL) tmp = (t_header *)getMoreMem(size + sizeof(*tmp)); else { if (tmp->size - size >= THRESHOLD) { total_size = tmp->size + sizeof(*tmp); tmp->size = size; split_mid(tmp, total_size); } deleteFromFreeList(tmp); } unlock_thread(); if (tmp == NULL) return (NULL); return ((void *)((int)tmp + sizeof(*tmp))); }
/** * 在原有的doclist尾添加新的doclist. */ bool link::api::InvLinkDocList::append(lemur::index::InvDocList *part_tail) { if(this->READ_ONLY) return false; // through subclass object access the protected member of the parent-class. link::api::InvLinkDocList* tail = (link::api::InvLinkDocList*) part_tail; // we only want to append the actual content lemur::api::LOC_T* ptr = tail->begin; lemur::api::COUNT_T len = tail->length(); // check for memory while ((end-begin+len)*LOC_Tsize > size) { if (!getMoreMem()) return false; }//while // update doc frequency df += tail->docFreq(); // check for overlap (by 1 docid) // this method will mainly be used for merging lists from indexing // in that case, overlap of docids would only occur by 1 if (*ptr == *lastid) { // add linkfreqs together for(int i=0; i<=winCount; i++){ *(freq+i) += *(ptr+1+i); } // doc frequency is actually one less df--; // advance pointer to next doc ptr =ptr + (winCount+2); len =len - (winCount+2); } // copy list over if (len > 0) { memcpy(end, ptr, len*LOC_Tsize); end += len; lastid = end-(winCount+2); freq = end-(winCount+1); } return true; }
bool lemur::index::InvDocList::append(InvDocList* tail) { if (READ_ONLY) return false; // we only want to append the actual content lemur::api::LOC_T *ptr = tail->begin; int len = tail->length(); // check for memory while ((end-begin+len)*LOC_Tsize > size) { if (!getMoreMem()) return false; } // update doc frequency df += tail->docFreq(); // check for overlap (by 1 docid) // this method will mainly be used for merging lists from indexing // in that case, overlap of docids would only occur by 1 if (*ptr == *lastid) { // add tfs together *freq += *(ptr+1); // advance pointer to next doc ptr += 2; len -= 2; // doc frequency is actually one less df--; } // copy list over if (len > 0) { memcpy(end, ptr, len*LOC_Tsize); end += len; lastid = end-2; freq = end-1; } return true; }
/* * * Allocate a region with an alignment or fail. * The aligment is specifed as an alignment and an offset. * The resultant vaddr will satisfy vaddr mod align = offset. * The old style power of two interface is gotten with a power of two * align value, and a zero offset value. * This routine does a brute force search and is relatively expensive. * Note the bounded stack size - this may cause the allocation to fail * when it could be done, or cause a larger than necessary block to be * fragmented. */ SysStatus PageAllocatorDefault::allocPagesAligned(uval &vaddr, uval size, uval align, uval offset, uval f, VPNum n) { (void)f; (void)n; // flags and node parms not used here size = PAGE_ROUND_UP(size); lock.acquire(); #ifdef marcdebug marcCheckAvail(); #endif /* #ifdef marcdebug */ retry: #define STACK_SIZE 64 // Made up value - should be OK freePages *stack[STACK_SIZE]; uval sp = 0; freePages* cur = anchor; freePages** top = &anchor; freePages *found = 0; freePages **foundtop = 0; //search for first (smallest address) block which satisfies aligned request if (!cur) goto bad; while (1) { /* this test sees if the aligned request is within this block * start by rounding block address up as required * vaddr will always be ge cur->start */ vaddr = ((cur->start+align-offset-1)/align)*align+offset; if ((vaddr+size) <= (cur->start+cur->size)) { found = cur; foundtop = top; } // if block is completely too small or has no successors backtrack if ((cur->size < size) || (!(cur->low) && !(cur->high))) { if (found) break; if (sp) { cur = stack[--sp]; top = &(cur->high); cur = cur->high; // every stack entry has a high subtree } else { goto bad; } } else if (cur->low) { // continue down the tree searching if (cur->high && sp<STACK_SIZE) stack[sp++] = cur; top = &(cur->low); cur = cur->low; } else { top = &(cur->high); cur = cur->high; } } // we reach here with found pointing to first feasible block, foundtop // to anchor for that block in the tree // first remove that block vaddr = ((found->start+align-offset-1)/align)*align+offset; allocFromBlock(found,foundtop,vaddr,size); available -= size; tassertMsg((vaddr & PAGE_MASK) == 0, "%lx not page aligned?\n",vaddr); sanity(vaddr, size); #ifdef marcdebug marcCheckAvail(); #endif /* #ifdef marcdebug */ #ifdef DEBUG_MEMORY { leakProof.alloc(vaddr,size); #if 0 uval* p=(uval*)vaddr; //don't kill pages for now - simulator too slow //most unitialized bugs caught by clobber in alloc.H for (;p<(uval*)(vaddr+PAGE_SIZE);*(p++)=(uval)0xBFBFBFBFBFBFBFBFLL); #endif /* #if 0 */ } #endif /* #ifdef DEBUG_MEMORY */ lock.release(); return 0; bad: // call virtual function possibly overridden by subclass to get more space if (_SUCCESS(getMoreMem(size))) goto retry; lock.release(); tassertWrn(0, "warning allocator out of space: " "size %lx align %lx offset %lx\n", size, align, offset); vaddr = 0; return _SERROR(1476, 0, ENOMEM); }
/* * * Allocate the address range specified by vaddr and size if it is * free, otherwise fail. */ SysStatus PageAllocatorDefault::allocPagesAt(uval vaddr, uval size, uval f) { (void)f; // flags parm not used here tassert(((vaddr & (~PAGE_MASK)) == vaddr), err_printf("not aligned?\n")); lock.acquire(); #ifdef marcdebug marcCheckAvail(); #endif /* #ifdef marcdebug */ retry: freePages* cur = anchor; freePages** top = &anchor; while (cur) { if ((cur->start <= vaddr) && ((cur->start+cur->size) > vaddr)) { if ((vaddr+size) <= cur->start+cur->size) { allocFromBlock(cur,top,vaddr,size); available -= size; sanity(vaddr, size); lock.release(); #ifdef DEBUG_MEMORY { leakProof.alloc(vaddr,size); #if 0 uval* p=(uval*)vaddr; //don't kill pages for now - simulator too slow //most unitialized bugs caught by clobber in alloc.H for (;p<(uval*)(vaddr+PAGE_SIZE); *(p++)=(uval)0xBFBFBFBFBFBFBFBFLL); #endif /* #if 0 */ } #endif /* #ifdef DEBUG_MEMORY */ return 0; } else goto bad; } if (cur->start > vaddr) { top = &(cur->low); cur = cur->low; } else { top = &(cur->high); cur = cur->high; } } bad: // call virtual function possibly overridden by subclass to get more space if (_SUCCESS(getMoreMem(size))) goto retry; #ifdef marcdebug marcCheckAvail(); #endif /* #ifdef marcdebug */ lock.release(); #ifdef marcdebug tassertWrn(0,"warning allocator out of space: size %lx addr %lx\n", size, vaddr); #endif /* #ifdef marcdebug */ vaddr = 0; return _SERROR(1475, 0, ENOMEM); }
/* * *Allocate the first region of size or return null */ SysStatus PageAllocatorDefault::allocPages(uval &vaddr, uval size, uval f, VPNum n) { (void)f; (void)n; // flags and node parms not used here freePages **top; // address of pointer to subtree freePages *cur, *next; // round up to a multiple of a page size = PAGE_ROUND_UP(size); lock.acquire(); #ifdef marcdebug marcCheckAvail(); #endif /* #ifdef marcdebug */ retry: top = &anchor; cur = anchor; // top node is (one of) the largest blocks if (!cur || cur->size < size) goto nospace; // search for lowest address block which is big enough while (1) { if ((next = cur->low) && (next->size == cur->size)) { //To avoid quadradic behavior allocating a number of //blocks of the same size, we reroot the subtree at the //lower address node of the same size *top = next; cur->low = next->high; next->high = cur; } else if (next && (next->size >= size)) { top = &(cur->low); } else if ((next=cur->high) && (next->size >= size)) { top = &(cur->high); } else break; cur = next; } // cur now points to the lowest address node which can provide size // top points to the pointer to cur in the tree vaddr = cur->start; tassert(((vaddr & (~PAGE_MASK)) == vaddr), err_printf("not aligned?\n")); cur->start += size; cur->size -= size; next = merge(cur->low,cur->high); if (cur->size) { next = add(next,cur); } else { cur->low = freeList; freeList = cur; } *top = next; available -= size; sanity(vaddr,size); #ifdef DEBUG_MEMORY { leakProof.alloc(vaddr,size); #if 0 uval* p=(uval*)vaddr; //don't kill pages for now - simulator too slow //most unitialized bugs caught by clobber in alloc.H for (;p<(uval*)(vaddr+PAGE_SIZE);*(p++)=(uval)0xBFBFBFBFBFBFBFBFLL); #endif /* #if 0 */ } #endif /* #ifdef DEBUG_MEMORY */ #ifdef marcdebug marcCheckAvail(); #endif /* #ifdef marcdebug */ lock.release(); return 0; nospace: // call virtual function possibly overridden by subclass to get more space if (_SUCCESS(getMoreMem(size))) goto retry; tassertWrn(0, "warning allocator out of space: size %lx\n", size); lock.release(); return _SERROR(1474, 0, ENOMEM); }
/** * 添加link的反转文档列表,格式为: * |docid|lf-win0|lf-win1|,,,|linkFreq|...|docid|lf-win0|lf-win1|,,,|linkFreq|...| | * | | | | * begin lastid freq end */ bool link::api::InvLinkDocList::addFrequence(lemur::api::DOCID_T docid, link::api::DIST_T dist) { if (READ_ONLY) return false; // check that we can add at all if (this->size == 0&&this->winCount>MAXNUM_WIN) return false; // check to see if it's a new document if (docid == *lastid) //文档已经存在,说明该link在文档中出现不只一次 { //=====需要该段代码重构,维护性太差(修改MAXNUM_WIN后,需要修改)。================= //if(dist<=winSizes[0]){// |lf-win0|,,, // for(int i=0;i<winCount;i++){ // (*(freq+i))++; // } //} //else if(dist<=winSizes[1]){// |lf-win1|,,, // for(int i=1;i<winCount;i++){ // (*(freq+i))++; // } //} //else if(dist<=winSizes[2]){// |lf-win2|,,, // for(int i=2;i<winCount;i++){ // (*(freq+i))++; // } //} //else if(dist<=winSizes[3]){// |lf-win3|,,, // for(int i=3;i<winCount;i++){ // (*(freq+i))++; // } //} //else if(dist<=winSizes[4]){// |lf-win4|, // for(int i=4;i<winCount;i++){ // (*(freq+i))++; // } //} //=========================================================================== for(int i=0; i<winCount; i++){ if(dist>winSizes[i] && dist<=winSizes[i+1]){// |lf-win i|,,, for(int j=i;j<winCount;j++){ (*(freq+j))++; } break; } } (*(freq+winCount))++;//|linkFreq| } else//文档未在列表中 { //get more mem if needed if ((end-begin+winCount+2)*LOC_Tsize > this->size) { if (!getMoreMem()) return false; } lastid = end; *lastid = docid; freq = lastid+1; for(int i=0;i<winCount;i++){ *(freq+i) = 0; } *(freq+winCount) = 1;//|linkFreq| //=====需要该段代码重构,维护性太差(修改MAXNUM_WIN后,需要修改)。======================= //if(dist<=winSizes[0]){// |lf-win0|,,, // for(int i=0;i<winCount;i++){ // *(freq+i) = 1; // } //} //else if(dist<=winSizes[1]){// |lf-win1|,,, // for(int i=1;i<winCount;i++){ // *(freq+i) = 1; // } //} //else if(dist<=winSizes[2]){// |lf-win2|,,, // for(int i=2;i<winCount;i++){ // *(freq+i) = 1; // } //} //else if(dist<=winSizes[3]){// |lf-win3|,,, // for(int i=3;i<winCount;i++){ // *(freq+i) = 1; // } //} //else if(dist<=winSizes[4]){// |lf-win4|, // for(int i=4;i<winCount;i++){ // *(freq+i) = 1; // } //} //========================================================================= for(int i=0; i<winCount; i++){ if(dist>winSizes[i] && dist<=winSizes[i+1]){// |lf-win i|,,, for(int j=i;j<winCount;j++){ (*(freq+j))++; } break; } } end = freq+winCount+1; df++; } return true; }