/* Insert an integer in the intset */ intset *intsetAdd(intset *is, int64_t value, uint8_t *success) { uint8_t valenc = _intsetValueEncoding(value); uint32_t pos; if (success) *success = 1; /* Upgrade encoding if necessary. If we need to upgrade, we know that * this value should be either appended (if > 0) or prepended (if < 0), * because it lies outside the range of existing values. */ if (valenc > is->encoding) { /* This always succeeds, so we don't need to curry *success. */ return intsetUpgradeAndAdd(is,value); } else { /* Abort if the value is already present in the set. * This call will populate "pos" with the right position to insert * the value when it cannot be found. */ if (intsetSearch(is,value,&pos)) { if (success) *success = 0; return is; } is = intsetResize(is,is->length+1); if (pos < is->length) intsetMoveTail(is,pos,pos+1); } _intsetSet(is,pos,value); is->length++; return is; }
/* * 把 value 从 intset 中移除 * * 移除成功将 *success 设置为 1 ,失败则设置为 0 。 * * T = O(n) */ intset *intsetRemove(intset *is, int64_t value, int *success) { uint8_t valenc = _intsetValueEncoding(value); uint32_t pos; if (success) *success = 0; if (valenc <= intrev32ifbe(is->encoding) && // 编码方式匹配 intsetSearch(is,value,&pos)) // 将位置保存到 pos { uint32_t len = intrev32ifbe(is->length); /* We know we can delete */ if (success) *success = 1; /* Overwrite value with tail and update length */ // 如果 pos 不是 is 的最末尾,那么显式地删除它 // (如果 pos = (len-1) ,那么紧缩空间时值就会自动被『抹除掉』) if (pos < (len-1)) intsetMoveTail(is,pos+1,pos); // 紧缩空间,并更新数量计数器 is = intsetResize(is,len-1); is->length = intrev32ifbe(len-1); } return is; }
/* Determine whether a value belongs to this set * * 检查给定值value是否是集合中的元素, * * 是返回1,不是返回0。 * * T = O(logN) */ uint8_t intsetFind(intset *is, int64_t value) { // 计算value的编码 uint8_t valenc = _intsetValueEncoding(value); // 如果value的编码大于集合的当前编码,那么value一定不存在于集合 // 当value的编码小于等于集合的当前编码时,才再使用intsetSearch进行查找 return valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,NULL); }
/* Delete integer from intset */ intset *intsetRemove(intset *is, int64_t value, int *success) { uint8_t valenc = _intsetValueEncoding(value); uint32_t pos; if (success) *success = 0; if (valenc <= is->encoding && intsetSearch(is,value,&pos)) { /* We know we can delete */ if (success) *success = 1; /* Overwrite value with tail and update length */ if (pos < (is->length-1)) intsetMoveTail(is,pos+1,pos); is = intsetResize(is,is->length-1); is->length--; } return is; }
/* Delete integer from intset */ intset *intsetRemove(intset *is, int64_t value, int *success) { uint8_t valenc = _intsetValueEncoding(value); // value 的编码值 uint32_t pos; if (success) *success = 0; // 先置 0 if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) { uint32_t len = intrev32ifbe(is->length); /* We know we can delete */ if (success) *success = 1; // 找到了 val 的位置 /* Overwrite value with tail and update length */ if (pos < (len-1)) intsetMoveTail(is,pos+1,pos); // pos <- pos+1 覆盖 is = intsetResize(is,len-1); // 重新分配内存( len-1 ) is->length = intrev32ifbe(len-1); // 长度 } return is; }
//将值为value的元素从整数集中删除 intset *intsetRemove(intset *is, int64_t value, int *success) { uint8_t valenc = _intsetValueEncoding(value); uint32_t pos; if (success) *success = 0; if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) { uint32_t len = intrev32ifbe(is->length); /* We know we can delete */ if (success) *success = 1; /* Overwrite value with tail and update length */ //pos为元素所在位置,将从后一位到最后一位得元素都往前移动一位 if (pos < (len-1)) intsetMoveTail(is,pos+1,pos); //减少内存 is = intsetResize(is,len-1); is->length = intrev32ifbe(len-1); } return is; }
/* Insert an integer in the intset */ intset *intsetAdd(intset *is, int64_t value, uint8_t *success) { uint8_t valenc = _intsetValueEncoding(value); uint32_t pos; if (success) *success = 1; /* Upgrade encoding if necessary. If we need to upgrade, we know that * this value should be either appended (if > 0) or prepended (if < 0), * because it lies outside the range of existing values. */ // 需要插入整数的所需内存超出了原有集合整数的范围,即内存类型不同, // 则升级整数类型 if (valenc > intrev32ifbe(is->encoding)) { /* This always succeeds, so we don't need to curry *success. */ return intsetUpgradeAndAdd(is,value); // 正常,分配内存,插入 } else { // intset 内部不允许重复 /* Abort if the value is already present in the set. * This call will populate "pos" with the right position to insert * the value when it cannot be found. */ if (intsetSearch(is,value,&pos)) { if (success) *success = 0; return is; } // realloc is = intsetResize(is,intrev32ifbe(is->length)+1); // 迁移内存,腾出空间给新的数据。intsetMoveTail() 完成内存迁移工作 if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1); } // 在腾出的空间中设置新的数据 _intsetSet(is,pos,value); // 更新 intset size is->length = intrev32ifbe(intrev32ifbe(is->length)+1); return is; }
/* * 将 value 添加到集合中 * * 如果元素已经存在, *success 被设置为 0 , * 如果元素添加成功, *success 被设置为 1 。 * * T = O(n) */ intset *intsetAdd(intset *is, int64_t value, uint8_t *success) { uint8_t valenc = _intsetValueEncoding(value); uint32_t pos; if (success) *success = 1; /* Upgrade encoding if necessary. If we need to upgrade, we know that * this value should be either appended (if > 0) or prepended (if < 0), * because it lies outside the range of existing values. */ // 如果有需要,进行升级并插入新值 if (valenc > intrev32ifbe(is->encoding)) { /* This always succeeds, so we don't need to curry *success. */ return intsetUpgradeAndAdd(is,value); } else { /* Abort if the value is already present in the set. * This call will populate "pos" with the right position to insert * the value when it cannot be found. */ // 如果值已经存在,那么直接返回 // 如果不存在,那么设置 *pos 设置为新元素添加的位置 if (intsetSearch(is,value,&pos)) { if (success) *success = 0; return is; } // 扩张 is ,准备添加新元素 is = intsetResize(is,intrev32ifbe(is->length)+1); // 如果 pos 不是数组中最后一个位置, // 那么对数组中的原有元素进行移动 if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1); } // 添加新元素 _intsetSet(is,pos,value); // 更新元素数量 is->length = intrev32ifbe(intrev32ifbe(is->length)+1); return is; }
/* Delete integer from intset * * 从整数集合中删除值value。 * * *success的值指示删除是否成功: * -因值不存在而造成删除失败时该值为0。 * -删除成功时该值为1。 * * T = O(N) */ intset *intsetRemove(intset *is, int64_t value, int *success) { // 计算value的编码方式 uint8_t valenc = _intsetValueEncoding(value); uint32_t pos; // 默认设置标识值为删除失败 if (success) *success = 0; // 当value的编码大小小于或等于集合的当前编码方式(说明value有可能存在于集合) // 并且intsetSearch的结果为真,那么执行删除 // T = O(N) if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) { // 取出集合当前的元素数量 uint32_t len = intrev32ifbe(is->length); /* We know we can delete */ // 设置标识值为删除成功 if (success) *success = 1; /* Overwrite value with tail and update length */ // 如果value不是位于数组的末尾 // 那么需要对原本位于value之后的元素进行移动 // // 举个例子,如果数组表示如下,而b为删除的目标 // | a | b | c | d | // 那么intsetMoveTail将b之后的所有数据向前移动一个元素的空间,覆盖b原来的数据 // | a | c | d | d | // 之后intsetResize缩小内存大小时,数组末尾多出来的一个元素的空间将被移除 // | a | c | d | if (pos < (len-1)) intsetMoveTail(is,pos+1,pos); // 缩小数组的大小,移除被删除元素占用的空间 // T = O(N) is = intsetResize(is,len-1); // 更新集合的元素数量 is->length = intrev32ifbe(len-1); } return is; }
//将value插入到整数集中 intset *intsetAdd(intset *is, int64_t value, uint8_t *success) { uint8_t valenc = _intsetValueEncoding(value); uint32_t pos; if (success) *success = 1; /* Upgrade encoding if necessary. If we need to upgrade, we know that * this value should be either appended (if > 0) or prepended (if < 0), * because it lies outside the range of existing values. */ if (valenc > intrev32ifbe(is->encoding)) { //如果表示value所需编码比现整数集的编码要大,升级整数集并将元素插入 /* This always succeeds, so we don't need to curry *success. */ return intsetUpgradeAndAdd(is,value); } else { /* Abort if the value is already present in the set. * This call will populate "pos" with the right position to insert * the value when it cannot be found. */ //调用intsetSearch查找元素,如果已经存在则返回0 if (intsetSearch(is,value,&pos)) { if (success) *success = 0; return is; } //扩大内存空间 is = intsetResize(is,intrev32ifbe(is->length)+1); //将原来从pos到末尾的元素都向后移动一位 if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1); } //将value设到pos位上 _intsetSet(is,pos,value); //更新元素个数 is->length = intrev32ifbe(intrev32ifbe(is->length)+1); return is; }
int main(int argc, char **argv) { uint8_t success; int i; intset *is; sranddev(); printf("Value encodings: "); { assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16); assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16); assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32); assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32); assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32); assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32); assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64); assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64); assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64); assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64); ok(); } printf("Basic adding: "); { is = intsetNew(); is = intsetAdd(is,5,&success); assert(success); is = intsetAdd(is,6,&success); assert(success); is = intsetAdd(is,4,&success); assert(success); is = intsetAdd(is,4,&success); assert(!success); ok(); } printf("Large number of random adds: "); { int inserts = 0; is = intsetNew(); for (i = 0; i < 1024; i++) { is = intsetAdd(is,rand()%0x800,&success); if (success) inserts++; } assert(is->length == inserts); checkConsistency(is); ok(); } printf("Upgrade from int16 to int32: "); { is = intsetNew(); is = intsetAdd(is,32,NULL); assert(is->encoding == INTSET_ENC_INT16); is = intsetAdd(is,65535,NULL); assert(is->encoding == INTSET_ENC_INT32); assert(intsetFind(is,32)); assert(intsetFind(is,65535)); checkConsistency(is); is = intsetNew(); is = intsetAdd(is,32,NULL); assert(is->encoding == INTSET_ENC_INT16); is = intsetAdd(is,-65535,NULL); assert(is->encoding == INTSET_ENC_INT32); assert(intsetFind(is,32)); assert(intsetFind(is,-65535)); checkConsistency(is); ok(); } printf("Upgrade from int16 to int64: "); { is = intsetNew(); is = intsetAdd(is,32,NULL); assert(is->encoding == INTSET_ENC_INT16); is = intsetAdd(is,4294967295,NULL); assert(is->encoding == INTSET_ENC_INT64); assert(intsetFind(is,32)); assert(intsetFind(is,4294967295)); checkConsistency(is); is = intsetNew(); is = intsetAdd(is,32,NULL); assert(is->encoding == INTSET_ENC_INT16); is = intsetAdd(is,-4294967295,NULL); assert(is->encoding == INTSET_ENC_INT64); assert(intsetFind(is,32)); assert(intsetFind(is,-4294967295)); checkConsistency(is); ok(); } printf("Upgrade from int32 to int64: "); { is = intsetNew(); is = intsetAdd(is,65535,NULL); assert(is->encoding == INTSET_ENC_INT32); is = intsetAdd(is,4294967295,NULL); assert(is->encoding == INTSET_ENC_INT64); assert(intsetFind(is,65535)); assert(intsetFind(is,4294967295)); checkConsistency(is); is = intsetNew(); is = intsetAdd(is,65535,NULL); assert(is->encoding == INTSET_ENC_INT32); is = intsetAdd(is,-4294967295,NULL); assert(is->encoding == INTSET_ENC_INT64); assert(intsetFind(is,65535)); assert(intsetFind(is,-4294967295)); checkConsistency(is); ok(); } printf("Stress lookups: "); { long num = 100000, size = 10000; int i, bits = 20; long long start; is = createSet(bits,size); checkConsistency(is); start = usec(); for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL); printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start); } printf("Stress add+delete: "); { int i, v1, v2; is = intsetNew(); for (i = 0; i < 0xffff; i++) { v1 = rand() % 0xfff; is = intsetAdd(is,v1,NULL); assert(intsetFind(is,v1)); v2 = rand() % 0xfff; is = intsetRemove(is,v2,NULL); assert(!intsetFind(is,v2)); } checkConsistency(is); ok(); } }
/* Determine whether a value belongs to this set */ uint8_t intsetFind(intset *is, int64_t value) { uint8_t valenc = _intsetValueEncoding(value); return valenc <= is->encoding && intsetSearch(is,value,NULL); }
/* * 查看 value 是否存在于 is * * T = O(lg N) */ uint8_t intsetFind(intset *is, int64_t value) { uint8_t valenc = _intsetValueEncoding(value); return valenc <= intrev32ifbe(is->encoding) && // 编码方式匹配 intsetSearch(is,value,NULL); // 查找 value }
/* Insert an integer in the intset * * 尝试将元素value添加到整数集合中。 * * *success的值指示添加是否成功: * -如果添加成功,那么将*success的值设为1。 * -因为元素已存在而造成添加失败时,将*success的值设为0。 * * T = O(N) */ intset *intsetAdd(intset *is, int64_t value, uint8_t *success) { // 计算编码value所需的长度 uint8_t valenc = _intsetValueEncoding(value); uint32_t pos; // 默认设置插入为成功 if (success) *success = 1; /* Upgrade encoding if necessary. If we need to upgrade, we know that * this value should be either appended (if > 0) or prepended (if < 0), * because it lies outside the range of existing values. */ // 如果value的编码比整数集合现在的编码要大 // 那么表示value必然可以添加到整数集合中 // 并且整数集合需要对自身进行升级,才能满足value所需的编码 if (valenc > intrev32ifbe(is->encoding)) { /* This always succeeds, so we don't need to curry *success. */ // T = O(N) return intsetUpgradeAndAdd(is,value); } else { // 运行到这里,表示整数集合现有的编码方式适用于value /* Abort if the value is already present in the set. * This call will populate "pos" with the right position to insert * the value when it cannot be found. */ // 在整数集合中查找value,看他是否存在: // -如果存在,那么将*success设置为0,并返回未经改动的整数集合 // -如果不存在,那么可以插入value的位置将被保存到pos指针中 // 等待后续程序使用 if (intsetSearch(is,value,&pos)) { if (success) *success = 0; return is; } // 运行到这里,表示value不存在于集合中 // 程序需要将value添加到整数集合中 // 为value在集合中分配空间 is = intsetResize(is,intrev32ifbe(is->length)+1); // 如果新元素不是被添加到底层数组的末端 // 那么需要对现有元素的数据进行移动,空出pos上的位置,用于设置新值 // 举个例子 // 如果数组为: // | x | y | z | ? | // |<----->| // 而新元素n的pos为1,那么数组将移动y和z两个元素 // | x | y | y | z | // |<----->| // 这样就可以将新元素设置到pos上了: // | x | n | y | z | // T = O(N) if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1); } // 将新值设置到底层数组的指定位置中 _intsetSet(is,pos,value); // 增一集合元素数量的计数器 is->length = intrev32ifbe(intrev32ifbe(is->length)+1); // 返回添加新元素后的整数集合 return is; /* p.s.上面的代码可以重构成以下更简单的形式: if (valenc > intrev32(is->encoding)) { return intsetUpgradeAndAdd(is, value); } if (intsetSearch(is, value, &pos)) { if (success) *success = 0; return is; } else { is = intsetResize(is, intrev32ifbe(is->length) + 1); if (pos < intrev32ifbe(is->length)) intsetMoveTail(is, pos, pos + 1); _intsetSet(is, pos, value); is->length = intrev32ifbe(intrev32ifbe(is->length) + 1); return is; } */ }
/* Determine whether a value belongs to this set */ uint8_t intsetFind(intset *is, int64_t value) { uint8_t valenc = _intsetValueEncoding(value); //valenc小于等于当前编码表示元素可以被该前编码表示,那么再调用intsetSearch寻找元素 return valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,NULL); }