static void bitstring_xor_worker(int ds, char *bit_string, long bit_string_length, uint64_t *resultbuffer) {
  long remaininglength = bit_string_length * 8;  // convert bytes to bits
  char *current_bit_string_pos;
  current_bit_string_pos = bit_string;
  long long offset = 0;
  int block_size = xordatastoretable[ds].sizeofablock;
  char *datastorebase;
  datastorebase = (char *) xordatastoretable[ds].datastore;

  int dwords_per_block = block_size / sizeof(uint64_t);

  int bit = 128;


  while (remaininglength >0) {
    if ((*current_bit_string_pos) & bit) {
      XOR_fullblocks(resultbuffer, (uint64_t *) (datastorebase+offset), dwords_per_block);
    }
    offset += block_size;
    bit /= 2;
    remaininglength -=1;
    if (bit == 0) {
      bit = 128;
      current_bit_string_pos++;
    }
  }
}
Esempio n. 2
0
// This function needs to be fast.   It is a good candidate for releasing Python's GIL
static void multi_bitstring_xor_worker(int ds, char *bit_string, long bit_string_length, unsigned int numstrings, __m128i *resultbuffer) {
	long one_bit_string_length = bit_string_length / numstrings; // length of one bit string
	long remaininglength = one_bit_string_length * 8; // convert bytes to bits
	char *current_bit_string_pos;
	current_bit_string_pos = bit_string;
	long long offset = 0;
	int block_size = xordatastoretable[ds].sizeofablock;
	char *datastorebase;
	datastorebase = (char *) xordatastoretable[ds].datastore;

	int dwords_per_block = block_size / sizeof(__m128i);

	unsigned char bit = 128;
	unsigned int i;

	while (remaininglength > 0) {

		for(i = 0; i < numstrings; i++){
			if ( *(current_bit_string_pos + one_bit_string_length * i) & bit) {
				XOR_fullblocks(resultbuffer + dwords_per_block * i, (__m128i *) (datastorebase + offset), dwords_per_block);
			}
		}

		offset += block_size;
		bit /= 2;
		remaininglength -=1;
		if (bit == 0) {
			bit = 128;
			current_bit_string_pos++;
		}
	}
}
// This XORs data with the starting data in dest
static char *fast_XOR(char *dest, const char *data, long stringlength) {
  int leadingmisalignedbytes;
  long fulllengthblocks;
  int remainingbytes;

  // If it's shorter than a block, use char-based XOR
  if (stringlength <= sizeof(uint64_t)) {
    return slow_XOR(dest,data,stringlength);
  }

  // I would guess these should be similarly DWORD aligned...
  if (((long) dest) % sizeof(uint64_t) == ((long) data) % sizeof(uint64_t)) {
    printf("Error, assumed that dest and data are identically DWORD aligned!\n");
    return NULL;
  }


  // Let's XOR any stray bytes at the front...

  // This is the number of bytes that are before we get DWORD aligned
  // To compute this we do (8 - (pos % 8)) % 8)
  leadingmisalignedbytes = (sizeof(uint64_t) - (((long)data) % sizeof(uint64_t))) % sizeof(uint64_t);

  XOR_byteblocks(dest, data, leadingmisalignedbytes);


  // The middle will be done with full sized blocks...
  fulllengthblocks = (stringlength-leadingmisalignedbytes) / sizeof(uint64_t);

  XOR_fullblocks((uint64_t *) (dest+leadingmisalignedbytes), (uint64_t *) (data + leadingmisalignedbytes), fulllengthblocks);



  // XOR anything left over at the end...
  remainingbytes = stringlength - (leadingmisalignedbytes + fulllengthblocks * sizeof(uint64_t));
  XOR_byteblocks(dest+stringlength-remainingbytes, data+stringlength-remainingbytes, remainingbytes);

  return dest;

}