Esempio n. 1
0
/*
 * Penalty method
 */
static int
hemdistsign(bytea *a, bytea *b)
{
  unsigned i,
    dist = 0;
  unsigned char   *as = (unsigned char *)VARDATA(a),
    *bs = (unsigned char *)VARDATA(b);

  if (SIGLEN(a) != SIGLEN(b))
    elog(ERROR, "All fingerprints should be the same length");
#ifndef USE_BUILTIN_POPCOUNT
  for(i=0;i<SIGLEN(a);i++)
    {
      int diff = as[i] ^ bs[i];
      dist += number_of_ones[diff];
    }
#else
  unsigned eidx=SIGLEN(a)/sizeof(unsigned int);
  for(i=0;i<eidx;++i){
    dist += __builtin_popcount(((unsigned int *)as)[i] ^ ((unsigned int *)bs)[i]);
  }
  for(i=eidx*sizeof(unsigned);i<SIGLEN(a);++i){
    int diff = as[i] ^ bs[i];
    dist += number_of_ones[diff];
  }
#endif
  return dist;
}
Esempio n. 2
0
static int
soergeldistsign(bytea *a, bytea *b) {
  if (SIGLEN(a) != SIGLEN(b))
    elog(ERROR, "All fingerprints should be the same length");
  unsigned int union_popcount=0,intersect_popcount=0;
  unsigned int i;
#ifndef USE_BUILTIN_POPCOUNT
  unsigned char   *as = (unsigned char *)VARDATA(a);
  unsigned char   *bs = (unsigned char *)VARDATA(b);
  for (i=0; i<SIGLEN(a); i++) {
    union_popcount += number_of_ones[as[i] | bs[i]];
    intersect_popcount += number_of_ones[as[i] & bs[i]];
  }
#else
  unsigned *as = (unsigned *)VARDATA(a);
  unsigned *bs = (unsigned *)VARDATA(b);
  unsigned eidx=SIGLEN(a)/sizeof(unsigned);
  for(i=0;i<eidx;++i){
    union_popcount += __builtin_popcount(as[i] | bs[i]);
    intersect_popcount += __builtin_popcount(as[i] & bs[i]);
  }
  for(i=eidx*sizeof(unsigned);i<SIGLEN(a);++i){
    union_popcount += number_of_ones[as[i] | bs[i]];
    intersect_popcount += number_of_ones[as[i] & bs[i]];
  }
#endif
  if (union_popcount == 0) {
    return 1;
  }
  return (int)floor(10000*(1.0-intersect_popcount / union_popcount));
}
Esempio n. 3
0
static int
soergeldistsign(bytea *a, bytea *b) {
  unsigned int siglen = SIGLEN(a);

  if (siglen != SIGLEN(b)) {
    elog(ERROR, "All fingerprints should be the same length");
  }

  uint8 *as = (uint8 *)VARDATA(a);
  uint8 *bs = (uint8 *)VARDATA(b);

  return bitstringTanimotoDistance(siglen, as, bs);
}
Esempio n. 4
0
static int
hemdist(bytea *a, bytea *b) {
  if (ISALLTRUE(a)) {
    if (ISALLTRUE(b))
      return 0;
    else
      return SIGLENBIT(b) - bitstringWeight(SIGLEN(b), (uint8 *)VARDATA(b));
  }
  else if (ISALLTRUE(b)) {
    return SIGLENBIT(a) - bitstringWeight(SIGLEN(a), (uint8 *)VARDATA(a));
  }
  return hemdistsign(a, b);
}
Esempio n. 5
0
/*
 * Penalty method
 */
static int
hemdistsign(bytea *a, bytea *b)
{
  int siglen = SIGLEN(a);
  uint8 *as = (uint8 *)VARDATA(a);
  uint8 *bs = (uint8 *)VARDATA(b);
  
  if (siglen != SIGLEN(b)) {
    elog(ERROR, "All fingerprints should be the same length");
  }
  
  return bitstringHemDistance(siglen, as, bs);
}
Esempio n. 6
0
Datum
gmol_union(PG_FUNCTION_ARGS)
{
  GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
  int        *size = (int *) PG_GETARG_POINTER(1);
  int32            i,j;
  int         signlen;
  bytea      *result, *key;
  unsigned char *s, *k;

  key = GETENTRY(entryvec, 0);
  if (ISALLTRUE(key)) {
    *size = VARHDRSZ;
    result = palloc(VARHDRSZ);
    SET_VARSIZE(result, VARHDRSZ);

    PG_RETURN_POINTER(result);
  }

  signlen = SIGLEN(key);
  *size = VARHDRSZ + signlen;
  result = palloc(VARHDRSZ + signlen);
  SET_VARSIZE(result, VARHDRSZ + signlen);
  memcpy( VARDATA(result), VARDATA(key), signlen );

  s = (unsigned char *)VARDATA(result);
  for (i = 1; i < entryvec->n; i++)
    {
      key = GETENTRY(entryvec, i);
      k = (unsigned char *)VARDATA(key);

      if (ISALLTRUE(key)) {
        *size = VARHDRSZ;
        SET_VARSIZE(result, VARHDRSZ);

        PG_RETURN_POINTER(result);
      }

      if (SIGLEN(key) != signlen)
        elog(ERROR, "All fingerprints should be the same length");

      for(j=0;j<signlen;j++)
        s[j] |= k[j];
    }

  PG_RETURN_POINTER(result);
}
Esempio n. 7
0
static int
soergeldist(bytea *a, bytea *b) {
  double d;
  
  if (ISALLTRUE(a)) {
    if (ISALLTRUE(b))
      return 0;
    else
      // FIXME shouldn't it be double(sizebitvec(b))/SIGLENBIT(b); ?
      return SIGLENBIT(b) - bitstringWeight(SIGLEN(b), (uint8 *)VARDATA(b));
  }
  else if (ISALLTRUE(b)) {
    // FIXME shouldn't it be double(sizebitvec(a))/SIGLENBIT(a); ?
    return SIGLENBIT(a) - bitstringWeight(SIGLEN(a), (uint8 *)VARDATA(a));
  }
  return (int) floor(10000*soergeldistsign(a, b));
}
Esempio n. 8
0
static bool
rdkit_consistent(GISTENTRY *entry, StrategyNumber strategy, bytea *key, bytea *query)
{
  double nCommon, nQuery, nKey = 0.0;

  if (ISALLTRUE(query))
    elog(ERROR, "Query malformed");

  /* 
   * Counts basic numbers, but don't count nKey on inner
   * page (see comments below)  
   */
  nQuery = (double)sizebitvec(query);
  if (ISALLTRUE(key)) {
    if (GIST_LEAF(entry))
      nKey = (double)SIGLENBIT(query);
    nCommon = nQuery;
  } else {
    int i, cnt = 0;
    unsigned char *pk = (unsigned char*)VARDATA(key);
    unsigned char *pq = (unsigned char*)VARDATA(query);

    if (SIGLEN(key) != SIGLEN(query))
      elog(ERROR, "All fingerprints should be the same length");

#ifndef USE_BUILTIN_POPCOUNT
    for(i=0;i<SIGLEN(key);i++)
      cnt += number_of_ones[ pk[i] & pq[i] ];
#else
    unsigned eidx=SIGLEN(key)/sizeof(unsigned int);
    for(i=0;i<eidx;++i){
      cnt += __builtin_popcount(((unsigned int *)pk)[i] & ((unsigned int *)pq)[i]);
    }
    for(i=eidx*sizeof(unsigned);i<SIGLEN(key);++i){
      cnt += number_of_ones[ pk[i] & pq[i] ];
    }
#endif      

    nCommon = (double)cnt;
    if (GIST_LEAF(entry))
      nKey = (double)sizebitvec(key);
  }

  return calcConsistency(GIST_LEAF(entry), strategy, nCommon, nCommon, nKey, nQuery);
}
Esempio n. 9
0
Datum
gmol_union(PG_FUNCTION_ARGS) {
  GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
  int *size = (int *) PG_GETARG_POINTER(1);
  
  int i, signlen;
  bytea *result, *key;
  unsigned char *s, *k;
  
  int numentries = entryvec->n;
  
  for (i = 0; i < numentries; ++i) {
    key = GETENTRY(entryvec, i);
    if (ISALLTRUE(key)) {
      *size = VARHDRSZ;
      result = palloc(VARHDRSZ);
      SET_VARSIZE(result, VARHDRSZ);
      PG_RETURN_POINTER(result);
    }
  }
  
  key = GETENTRY(entryvec, 0);
  signlen = SIGLEN(key);
  *size = VARHDRSZ + signlen;
  result = palloc(*size);
  SET_VARSIZE(result, *size);
  memcpy(VARDATA(result), VARDATA(key), signlen);

  s = (uint8 *)VARDATA(result);
  for (i = 1; i < entryvec->n; ++i) {
    key = GETENTRY(entryvec, i);
    k = (uint8 *)VARDATA(key);
    
    if (SIGLEN(key) != signlen) {
      elog(ERROR, "All fingerprints should be the same length");
    }
    
    bitstringUnion(signlen, s, k);
  }

  PG_RETURN_POINTER(result);
}
Esempio n. 10
0
static int32
sizebitvec(bytea *b)
{
  int32    size = 0,
    i;
  unsigned char *sign = (unsigned char*)VARDATA(b);

  for(i=0; i<SIGLEN(b); i++)
    size += number_of_ones[sign[i]];

  return size;
}
Esempio n. 11
0
static bool
rdkit_consistent(GISTENTRY *entry, StrategyNumber strategy, bytea *key, bytea *query)
{
  double nCommon, nQuery, nKey = 0.0;

  if (ISALLTRUE(query)) {
    elog(ERROR, "Query malformed");
  }
  
  /* 
   * Counts basic numbers, but don't count nKey on inner
   * page (see comments below)  
   */
  int siglen = SIGLEN(query);
  uint8 *q = (uint8 *)VARDATA(query);
  nQuery = (double)bitstringWeight(siglen, q);
  
  if (ISALLTRUE(key)) {
    if (GIST_LEAF(entry)) {
      nKey = (double)SIGLENBIT(query);
    }
    nCommon = nQuery;
  }
  else {
    if (siglen != SIGLEN(key)) {
      elog(ERROR, "All fingerprints should be the same length");
    }
    
    uint8 *k = (uint8 *)VARDATA(key);
    nCommon = bitstringIntersectionWeight(siglen, k, q);
    
    if (GIST_LEAF(entry)) {
      nKey = (double)bitstringWeight(siglen, k);
    }
  }

  return calcConsistency(GIST_LEAF(entry), strategy, nCommon, nCommon, nKey, nQuery);
}
Esempio n. 12
0
Datum
gsfp_consistent(PG_FUNCTION_ARGS)
{
  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
  StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
  bool *recheck = (bool *) PG_GETARG_POINTER(4);
  bytea *key = (bytea*)DatumGetPointer(entry->key);
  bytea *query;
  CSfp data;

  fcinfo->flinfo->fn_extra = searchSfpCache(
					    fcinfo->flinfo->fn_extra,
					    fcinfo->flinfo->fn_mcxt,
					    PG_GETARG_DATUM(1), 
					    NULL, &data, &query);

  *recheck = true; /* we use signature, so it's needed to recheck */

  if (ISALLTRUE(key) && !GIST_LEAF(entry)) {
    PG_RETURN_BOOL(true);
  }

  int sum, overlapSum, overlapN;
  countOverlapValues(
		     (ISALLTRUE(key)) ? NULL : key, data, NUMBITS,
		     &sum, &overlapSum, &overlapN
		     );

  int nKey = (ISALLTRUE(key)) ?
    NUMBITS : bitstringWeight(SIGLEN(key), (uint8 *)VARDATA(key));
  
  PG_RETURN_BOOL(calcConsistency(
				 GIST_LEAF(entry), strategy,
				 overlapSum, /* nCommonUp */
				 overlapN, /* nCommonDown */
				 nKey,
				 sum /* nQuery */
				 ));
}
Esempio n. 13
0
/*
 * Compress/decompress
 */
static GISTENTRY*
compressAllTrue(GISTENTRY *entry) 
{
  GISTENTRY *retval = entry;

  bytea *b = (bytea*)DatumGetPointer(entry->key);

  bool allTrue = bitstringAllTrue(SIGLEN(b), (uint8 *)VARDATA(b));

  if (!allTrue) {
    return retval;
  }

  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
  b = palloc(VARHDRSZ);
  SET_VARSIZE(b, VARHDRSZ);
  
  gistentryinit(*retval, PointerGetDatum(b),
		entry->rel, entry->page,
		entry->offset, false);
  
  return retval;
}
Esempio n. 14
0
/*
 * Compress/decompress
 */
static GISTENTRY*
compressAllTrue(GISTENTRY *entry) 
{
  GISTENTRY  *retval = entry;

  bytea   *b = (bytea*)DatumGetPointer(entry->key);
  unsigned char *sign = (unsigned char*)VARDATA(b);
  int i;
                

  for(i=0; i<SIGLEN(b); i++)
    if ( sign[i] != 0xff )
      return retval;

  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
  b = palloc(VARHDRSZ);
  SET_VARSIZE(b, VARHDRSZ);

  gistentryinit(*retval, PointerGetDatum(b),
                entry->rel, entry->page,
                entry->offset, FALSE);

  return retval;
}
Esempio n. 15
0
Datum
greaction_consistent(PG_FUNCTION_ARGS)
{
  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
  StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
  bool *recheck = (bool *) PG_GETARG_POINTER(4);
  bytea *key = (bytea*)DatumGetPointer(entry->key);
  bytea *query;
  bool  res = true;

  fcinfo->flinfo->fn_extra = searchReactionCache(
						 fcinfo->flinfo->fn_extra,
						 fcinfo->flinfo->fn_mcxt,
						 PG_GETARG_DATUM(1),
						 NULL, NULL,&query);
  /*
  ** RDKitContains, RDKitContained, RDKitEquals require a recheck, but 
  ** it defaults to false, so that RDkitSmaller and RDKitGreater can reuse
  ** the RDKitContains, RDKitContained implementation.
  */
  *recheck = false;

  switch (strategy) {
  case RDKitContains:
    *recheck = true;
    /* fallthrough */
  case RDKitSmaller:
    if (!ISALLTRUE(key)) {
      int siglen = SIGLEN(key);

      if (siglen != SIGLEN(query)) {
            elog(ERROR, "All fingerprints should be the same length");
      }
      
      uint8 *k = (uint8 *)VARDATA(key);
      uint8 *q = (uint8 *)VARDATA(query);

      res = bitstringContains(siglen, k, q);
    }
    break;
  case RDKitContained:
    *recheck = true;
    /* fallthrough */
  case RDKitGreater:
    if (!ISALLTRUE(key)) {
      int siglen = SIGLEN(key);
     
      if (siglen != SIGLEN(query)) {
	elog(ERROR, "All fingerprints should be the same length");
      }
      
      uint8 *k = (uint8 *)VARDATA(key);
      uint8 *q = (uint8 *)VARDATA(query);

      if ( GIST_LEAF(entry) ) {
	res = bitstringContains(siglen, q, k);
      }
      else {
	/*
	 * Due to superimposed key on inner page we could only check
	 * overlapping
	 */
	res = bitstringIntersects(siglen, q, k);
      }
    }
    else if (GIST_LEAF(entry)) {
      res = bitstringAllTrue(SIGLEN(query), (uint8 *)VARDATA(query));
    }
    break;
  case RDKitEquals:
    *recheck = true;
    
    if (!ISALLTRUE(key)) {
      int siglen = SIGLEN(key);
      
      if (siglen != SIGLEN(query)) {
	elog(ERROR, "All fingerprints should be the same length");
      }

      uint8 *k = (uint8 *)VARDATA(key);
      uint8 *q = (uint8 *)VARDATA(query);
      
      res =
	bitstringContains(siglen, k, q)
	/* 
	** the original implementation also required the query to
	** contain the key, but (I think) this is only true on the 
	** leaves (FIXME?)
	*/
	&& bitstringContains(siglen, q, k)
	;
    }
    break;
  default:
    elog(ERROR,"Unknown strategy: %d", strategy);
  }
  
  PG_RETURN_BOOL(res);
}
Esempio n. 16
0
Datum
gmol_consistent(PG_FUNCTION_ARGS)
{
  GISTENTRY               *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
  StrategyNumber  strategy = (StrategyNumber) PG_GETARG_UINT16(2);
  bool                    *recheck = (bool *) PG_GETARG_POINTER(4);
  bytea                   *key = (bytea*)DatumGetPointer(entry->key);
  bytea                   *query;
  bool                    res = true;

  int siglen = SIGLEN(key);
  
  fcinfo->flinfo->fn_extra = searchMolCache(
                                            fcinfo->flinfo->fn_extra,
                                            fcinfo->flinfo->fn_mcxt,
                                            PG_GETARG_DATUM(1), 
                                            NULL, NULL,&query);

  /*
  ** recheck is required for all strategies
  */
  *recheck = true;
  
  switch (strategy) {
  case RDKitContains:
    if (!ISALLTRUE(key)) {
      if (siglen != SIGLEN(query)) {
	elog(ERROR, "All fingerprints should be the same length");
      }
      
      uint8 *k = (uint8 *)VARDATA(key);
      uint8 *q = (uint8 *)VARDATA(query);
      
      res = bitstringContains(siglen, k, q);
    }
    break;
  case RDKitContained:
    if (!ISALLTRUE(key)) {
      if (siglen != SIGLEN(query)) {
	elog(ERROR, "All fingerprints should be the same length");
      }
      
      uint8 *k = (uint8 *)VARDATA(key);
      uint8 *q = (uint8 *)VARDATA(query);
      
      if (GIST_LEAF(entry)) {
	res = bitstringContains(siglen, q, k);
      }
      else {
	/*
	 * Due to superimposed key on inner page we could only check
	 * overlapping
	 */
	res = bitstringIntersects(siglen, q, k);
      }
    } 
    else if (GIST_LEAF(entry)) {
      /* 
       * key is all true, it may be contained in query, iff query is also 
       * all true
       */
      res = bitstringAllTrue(siglen, (uint8 *)VARDATA(query));
    }
    break;
  case RDKitEquals:
    if (!ISALLTRUE(key)) {
      /*
      ** verify the necessary condition that key should contain the query
      ** (on leaf nodes, couldn't it also verify that query contains key?)
      */
      if (siglen != SIGLEN(query)) {
	elog(ERROR, "All fingerprints should be the same length");
      }
      
      uint8 *k = (uint8 *)VARDATA(key);
      uint8 *q = (uint8 *)VARDATA(query);
      
      res = bitstringContains(siglen, k, q);
    }
    break;
  default:
    elog(ERROR,"Unknown strategy: %d", strategy);
  }

  PG_RETURN_BOOL(res);
}
Esempio n. 17
0
Datum
gmol_picksplit(PG_FUNCTION_ARGS)
{
  GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
  GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
  OffsetNumber k, j;
  bytea *datum_l, *datum_r;
  int32 size_alpha, size_beta;
  int32 size_waste, waste = -1;
  int32 nbytes;
  OffsetNumber seed_1 = 0, seed_2 = 0;
  OffsetNumber *left, *right;
  OffsetNumber maxoff;
  int i, signlen = 0;
  SPLITCOST *costvector;

  maxoff = entryvec->n - 1;
  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
  v->spl_left = (OffsetNumber *) palloc(nbytes);
  v->spl_right = (OffsetNumber *) palloc(nbytes);

  for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
    if (signlen == 0) {
        signlen = SIGLEN(GETENTRY(entryvec, k));
    }
    for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
      size_waste = hemdist(GETENTRY(entryvec, j), GETENTRY(entryvec, k));
      if (size_waste > waste) {
	waste = size_waste;
	seed_1 = k;
	seed_2 = j;
      }
    }
  }

  if (signlen == 0) {
    signlen = SIGLEN(GETENTRY(entryvec, maxoff));
  }

  left = v->spl_left;
  v->spl_nleft = 0;
  right = v->spl_right;
  v->spl_nright = 0;

  if (signlen == 0 || waste == 0) {
    /* all entries a alltrue  or all the same */
    
    for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k)) {
      if (k <= (maxoff - FirstOffsetNumber + 1) / 2) {
	v->spl_left[v->spl_nleft] = k;
	v->spl_nleft++;
      }
      else {
	v->spl_right[v->spl_nright] = k;
	v->spl_nright++;
      }
    }

    signlen = VARSIZE(GETENTRY(entryvec, FirstOffsetNumber));
                
    datum_l = palloc(signlen);
    memcpy(datum_l, GETENTRY(entryvec, FirstOffsetNumber), signlen);
    v->spl_ldatum = PointerGetDatum(datum_l);
    datum_r = palloc(signlen);
    memcpy(datum_r, GETENTRY(entryvec, FirstOffsetNumber), signlen);
    v->spl_rdatum = PointerGetDatum(datum_r);
    
    Assert( v->spl_nleft + v->spl_nright == maxoff );
    PG_RETURN_POINTER(v);
  }

  if (seed_1 == 0 || seed_2 == 0) {
    seed_1 = 1;
    seed_2 = 2;
  }

  /* form initial .. */
  if (ISALLTRUE(GETENTRY(entryvec, seed_1))) {
    datum_l = palloc(VARHDRSZ);
    SET_VARSIZE(datum_l, VARHDRSZ);
  }
  else {
    datum_l = palloc(signlen + VARHDRSZ);
    memcpy(datum_l , GETENTRY(entryvec, seed_1) , signlen + VARHDRSZ);
  }
  
  if (ISALLTRUE(GETENTRY(entryvec, seed_2))) {
    datum_r = palloc(VARHDRSZ);
    SET_VARSIZE(datum_r, VARHDRSZ);
  }
  else {
    datum_r = palloc(signlen + VARHDRSZ);
    memcpy(datum_r , GETENTRY(entryvec, seed_2) , signlen + VARHDRSZ);
  }

  /* sort before ... */
  costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
  for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
    costvector[j - 1].pos = j;
    size_alpha = hemdist(datum_l, GETENTRY(entryvec, j));
    size_beta  = hemdist(datum_r, GETENTRY(entryvec, j));
    costvector[j - 1].cost = Abs(size_alpha - size_beta);
  }
  qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);

  for (k = 0; k < maxoff; k++) {
    j = costvector[k].pos;
    if (j == seed_1) {
      *left++ = j;
      v->spl_nleft++;
      continue;
    }
    else if (j == seed_2) {
      *right++ = j;
      v->spl_nright++;
      continue;
    }
    
    size_alpha = hemdist(GETENTRY(entryvec, j), datum_l);
    size_beta =  hemdist(GETENTRY(entryvec, j), datum_r);
    
    if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
      if (!ISALLTRUE(datum_l)) {
	if (ISALLTRUE(GETENTRY(entryvec, j))) {
	  datum_l = palloc(VARHDRSZ);
	  SET_VARSIZE(datum_l, VARHDRSZ);
	}
	else {
	  unsigned char
	    *as = (unsigned char *)VARDATA(datum_l),
	    *bs = (unsigned char *)VARDATA(GETENTRY(entryvec, j));
	  
	  for (i=0;i<signlen;i++) {
	    as[i] |= bs[i];
	  }
	}
      }
      *left++ = j;
      v->spl_nleft++;
    }
    else {
      if (!ISALLTRUE(datum_r)) {
	if (ISALLTRUE(GETENTRY(entryvec, j))) {
	  datum_r = palloc(VARHDRSZ);
	  SET_VARSIZE(datum_r, VARHDRSZ);
	}
	else {
	  unsigned char
	    *as = (unsigned char *)VARDATA(datum_r),
	    *bs = (unsigned char *)VARDATA(GETENTRY(entryvec, j));

	  for (i=0;i<signlen;i++) {
	    as[i] |= bs[i];
	  }
	}
      }
      *right++ = j;
      v->spl_nright++;
    }
  }
  
  *right = *left = FirstOffsetNumber;
  v->spl_ldatum = PointerGetDatum(datum_l);
  v->spl_rdatum = PointerGetDatum(datum_r);
  
  Assert( v->spl_nleft + v->spl_nright == maxoff );

  PG_RETURN_POINTER(v);
}
Esempio n. 18
0
Datum
gmol_consistent(PG_FUNCTION_ARGS)
{
  GISTENTRY               *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
  StrategyNumber  strategy = (StrategyNumber) PG_GETARG_UINT16(2);
  bool                    *recheck = (bool *) PG_GETARG_POINTER(4);
  bytea                   *key = (bytea*)DatumGetPointer(entry->key);
  bytea                   *query;
  bool                    res = true;

  fcinfo->flinfo->fn_extra = SearchMolCache(
                                            fcinfo->flinfo->fn_extra,
                                            fcinfo->flinfo->fn_mcxt,
                                            PG_GETARG_DATUM(1), 
                                            NULL, NULL,&query);

  switch(strategy)
    {
    case RDKitContains:
      *recheck = true;

      if (!ISALLTRUE(key))
        {
          int i;
          unsigned char   *k = (unsigned char*)VARDATA(key),
            *q = (unsigned char*)VARDATA(query);

          if (SIGLEN(key) != SIGLEN(query))
            elog(ERROR, "All fingerprints should be the same length");

          for(i=0; res && i<SIGLEN(key); i++)
            if ( (k[i] & q[i]) != q[i])
              res = false;
        }
      break;
    case RDKitContained:
      *recheck = true;

      if (!ISALLTRUE(key))
        {
          int i;
          unsigned char   *k = (unsigned char*)VARDATA(key),
            *q = (unsigned char*)VARDATA(query);

          if (SIGLEN(key) != SIGLEN(query))
            elog(ERROR, "All fingerprints should be the same length");

          if ( GIST_LEAF(entry) )
            {
              for(i=0; res && i<SIGLEN(key); i++)
                if ( (k[i] & q[i]) != k[i])
                  res = false;
            }
          else
            {
              /*
               * Due to superimposed key on inner page we could only check
               * overlapping
               */
              res = false;
              for(i=0; res == false && i<SIGLEN(key); i++)
                if ( k[i] & q[i] )
                  res = true;
            }
        } 
      else if (GIST_LEAF(entry))
        {
          int i;
          unsigned char *q = (unsigned char*)VARDATA(query);

          res = true;
          for(i=0; res && i<SIGLEN(query); i++)
            if ( q[i] != 0xff )
              res = false;
        }
      break;
    case RDKitEquals:
      *recheck = true;

      if (!ISALLTRUE(key))
        {
          int i;
          unsigned char   *k = (unsigned char*)VARDATA(key),
            *q = (unsigned char*)VARDATA(query);

          if (SIGLEN(key) != SIGLEN(query))
            elog(ERROR, "All fingerprints should be the same length");

          for(i=0; res && i<SIGLEN(key); i++){
        	unsigned char temp = k[i] & q[i];
            if ( temp != q[i] || temp != k[i])
              res = false;
          }
        }
      break;
    default:
      elog(ERROR,"Unknown strategy: %d", strategy);
    }

  PG_RETURN_BOOL(res);
}
Esempio n. 19
0
Datum
gbfp_distance(PG_FUNCTION_ARGS)
{
    GISTENTRY      *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
    // bytea          *query = PG_GETARG_DATA_TYPE_P(1);
    StrategyNumber  strategy = (StrategyNumber) PG_GETARG_UINT16(2);
    bytea          *key = (bytea*)DatumGetPointer(entry->key);

    bytea          *query;
    double          nCommon, nCommonUp, nCommonDown, nQuery, distance;
    double          nKey = 0.0;

    fcinfo->flinfo->fn_extra = SearchBitmapFPCache(
                                                   fcinfo->flinfo->fn_extra,
                                                   fcinfo->flinfo->fn_mcxt,
                                                   PG_GETARG_DATUM(1),
                                                   NULL, NULL,&query);

    if (ISALLTRUE(query))
        elog(ERROR, "Query malformed");

    /*
    * Counts basic numbers, but don't count nKey on inner
    * page (see comments below)
    */
    nQuery = (double)sizebitvec(query);
    if (ISALLTRUE(key))
        {

        if (GIST_LEAF(entry)) nKey = (double)SIGLENBIT(query);

        nCommon = nQuery;
        }
    else
        {
        int i, cnt = 0;
        unsigned char *pk = (unsigned char*)VARDATA(key),
            *pq = (unsigned char*)VARDATA(query);

        if (SIGLEN(key) != SIGLEN(query))
            elog(ERROR, "All fingerprints should be the same length");

#ifndef USE_BUILTIN_POPCOUNT
        for(i=0;i<SIGLEN(key);i++)
            cnt += number_of_ones[ pk[i] & pq[i] ];
#else
        unsigned eidx=SIGLEN(key)/sizeof(unsigned int);
        for(i=0;i<SIGLEN(key)/sizeof(unsigned int);++i){
          cnt += __builtin_popcount(((unsigned int *)pk)[i] & ((unsigned int *)pq)[i]);
        }
        for(i=eidx*sizeof(unsigned);i<SIGLEN(key);++i){
          cnt += number_of_ones[ pk[i] & pq[i] ];
        }
#endif        

        nCommon = (double)cnt;
        if (GIST_LEAF(entry))
            nKey = (double)sizebitvec(key);
        }

    nCommonUp = nCommon;
    nCommonDown = nCommon;

    switch(strategy)
    {
        case RDKitOrderByTanimotoStrategy:
        /*
        * Nsame / (Na + Nb - Nsame)
        */
        if (GIST_LEAF(entry))
        {
            distance = nCommonUp / (nKey + nQuery - nCommonUp);
        }

        else
        {
            distance = nCommonUp / nQuery;
        }

        break;

        case RDKitOrderByDiceStrategy:
        /*
        * 2 * Nsame / (Na + Nb)
        */
        if (GIST_LEAF(entry))
        {
            distance = 2.0 * nCommonUp / (nKey + nQuery);
        }

        else
        {
            distance =  2.0 * nCommonUp / (nCommonDown + nQuery);
        }

        break;

        default:
        elog(ERROR,"Unknown strategy: %d", strategy);
    }

    PG_RETURN_FLOAT8(1.0 - distance);
}