/* * Estimate the space needed for a hashtable containing the given number * of entries of given size. * NOTE: this is used to estimate the footprint of hashtables in shared * memory; therefore it does not count HTAB which is in local memory. * NB: assumes that all hash structure parameters have default values! */ long hash_estimate_size(long num_entries, Size entrysize) { long size = 0; long nBuckets, nSegments, nDirEntries, nElementAllocs, elementSize; /* estimate number of buckets wanted */ nBuckets = 1L << my_log2((num_entries - 1) / DEF_FFACTOR + 1); /* # of segments needed for nBuckets */ nSegments = 1L << my_log2((nBuckets - 1) / DEF_SEGSIZE + 1); /* directory entries */ nDirEntries = DEF_DIRSIZE; while (nDirEntries < nSegments) nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */ /* fixed control info */ size += MAXALIGN(sizeof(HASHHDR)); /* but not HTAB, per above */ /* directory */ size += MAXALIGN(nDirEntries * sizeof(HASHSEGMENT)); /* segments */ size += nSegments * MAXALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET)); /* elements --- allocated in groups of HASHELEMENT_ALLOC_INCR */ elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize); nElementAllocs = (num_entries - 1) / HASHELEMENT_ALLOC_INCR + 1; size += nElementAllocs * HASHELEMENT_ALLOC_INCR * elementSize; return size; }
/*********************************************************************** * Mix two arrays in log space. ***********************************************************************/ void mix_log_arrays (float mixing, /* Percent of array2 that will be retained. */ ARRAY_T* array1, ARRAY_T* array2) { int i_item; int num_items; ATYPE mixed_value; check_null_array(array1); check_null_array(array2); /* Verify that the arrays are of the same length. */ check_array_dimensions(TRUE, array1, array2); /* Verify that we've got a reasonable mixing parameter. */ if ((mixing > 1.0) || (mixing < 0.0)) { die("Invalid mixing parameter (%g).\n", mixing); } num_items = get_array_length(array1); for (i_item = 0; i_item < num_items; i_item++) { mixed_value = LOG_SUM(my_log2(1.0 - mixing) + get_array_item(i_item, array1), my_log2(mixing) + get_array_item(i_item, array2)); set_array_item(i_item, mixed_value, array2); } }
/************************************************************************** * get_scaled_lo_prior_dist * * Takes a scaled distribution of priors and creates a scaled distribution of * log odds priors. The parameters for the scaling of the input priors are * in the PRIOR_DIST_T data structure. The output distribution of log odss * priors are scaled to be in the same range as the PSSM log odds using * the input parameters pssm_range, pssm_scale, and pssm_offset. * * Special handling is required for a uniform distribution of priors. * In that case the max_prior == min_prior, and the distribution only * contains one bin. * * Returns a new array containing the scaled log odds priors **************************************************************************/ ARRAY_T *get_scaled_lo_prior_dist( PRIOR_DIST_T *prior_dist, double alpha, int pssm_range, double pssm_scale, double pssm_offset ) { assert(prior_dist != NULL); // Alocate enought space for elements in [0 ... pssm_range] ARRAY_T *scaled_lo_prior_dist = allocate_array(pssm_range + 1); if (prior_dist != NULL) { ARRAY_T *dist_array = get_prior_dist_array(prior_dist); int len_prior_dist = get_array_length(dist_array); double max_prior = get_prior_dist_maximum(prior_dist); double min_prior = get_prior_dist_minimum(prior_dist); double prior_dist_scale = get_prior_dist_scale(prior_dist); double prior_dist_offset = get_prior_dist_offset(prior_dist); init_array(0.0L, scaled_lo_prior_dist); if (max_prior == min_prior) { // Special case for uniform priors double value = 1.0; double lo_prior = my_log2(alpha * max_prior / (1.0L - (alpha * max_prior))); // Convert lo_prior to PSSM scale int scaled_index = raw_to_scaled(lo_prior, 1.0L, pssm_scale, pssm_offset); set_array_item(scaled_index, value, scaled_lo_prior_dist); } else { int prior_index = 0; for (prior_index = 0; prior_index < len_prior_dist; ++prior_index) { double value = get_array_item(prior_index, dist_array); // Convert index giving scaled prior to raw prior. double scaled_prior = ((double) prior_index) + 0.5L; double prior \ = scaled_to_raw(scaled_prior, 1, prior_dist_scale, prior_dist_offset); double lo_prior = my_log2(alpha * prior / (1.0L - (alpha * prior))); // Scale raw lo_prior using parameters from PSSM. int scaled_index = raw_to_scaled(lo_prior, 1.0L, pssm_scale, pssm_offset); if (scaled_index < pssm_range) { double old_value = get_array_item(scaled_index, scaled_lo_prior_dist); set_array_item(scaled_index, value + old_value, scaled_lo_prior_dist); } } } } return scaled_lo_prior_dist; }
/* calculate first power of 2 >= num, bounded to what will fit in an int */ static int next_pow2_int(long num) { if (num > INT_MAX / 2) num = INT_MAX / 2; return 1 << my_log2(num); }
/*********************************************************************** * Calculates the information content of a position of the motif. * * Assumes that alph_size does not include ambigious characters. ***********************************************************************/ static inline double position_information_content( MOTIF_T *a_motif, int position ) { int i; double H, item; ARRAY_T *freqs; H = 0; freqs = get_matrix_row(position, a_motif->freqs); for (i = 0; i < a_motif->alph_size; ++i) { item = get_array_item(i, freqs); H -= item*my_log2(item); } return my_log2(a_motif->alph_size) - H; }
/* * Select an appropriate directory size for a hashtable with the given * maximum number of entries. * This is only needed for hashtables in shared memory, whose directories * cannot be expanded dynamically. * NB: assumes that all hash structure parameters have default values! * * XXX this had better agree with the behavior of init_htab()... */ long hash_select_dirsize(long num_entries) { long nBuckets, nSegments, nDirEntries; /* estimate number of buckets wanted */ nBuckets = 1L << my_log2((num_entries - 1) / DEF_FFACTOR + 1); /* # of segments needed for nBuckets */ nSegments = 1L << my_log2((nBuckets - 1) / DEF_SEGSIZE + 1); /* directory entries */ nDirEntries = DEF_DIRSIZE; while (nDirEntries < nSegments) nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */ return nDirEntries; }
/*********************************************************************** * Convert an array to and from logs (base 2). ***********************************************************************/ void log_array (ARRAY_T* array) { int i_item; int num_items; check_null_array(array); num_items = get_array_length(array); for (i_item = 0; i_item < num_items; i_item++) { set_array_item(i_item, my_log2(get_array_item(i_item, array)), array); } }
/* * Estimate the space needed for a hashtable containing the given number * of entries of given size. * NOTE: this is used to estimate the footprint of hashtables in shared * memory; therefore it does not count HTAB which is in local memory. * NB: assumes that all hash structure parameters have default values! */ Size hash_estimate_size(long num_entries, Size entrysize) { Size size; long nBuckets, nSegments, nDirEntries, nElementAllocs, elementSize, elementAllocCnt; /* estimate number of buckets wanted */ nBuckets = 1L << my_log2((num_entries - 1) / DEF_FFACTOR + 1); /* # of segments needed for nBuckets */ nSegments = 1L << my_log2((nBuckets - 1) / DEF_SEGSIZE + 1); /* directory entries */ nDirEntries = DEF_DIRSIZE; while (nDirEntries < nSegments) nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */ /* fixed control info */ size = MAXALIGN(sizeof(HASHHDR)); /* but not HTAB, per above */ /* directory */ size = add_size(size, mul_size(nDirEntries, sizeof(HASHSEGMENT))); /* segments */ size = add_size(size, mul_size(nSegments, MAXALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET)))); /* elements --- allocated in groups of choose_nelem_alloc() entries */ elementAllocCnt = choose_nelem_alloc(entrysize); nElementAllocs = (num_entries - 1) / elementAllocCnt + 1; elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize); size = add_size(size, mul_size(nElementAllocs, mul_size(elementAllocCnt, elementSize))); return size; }
INT32 bdev_open(struct super_block *sb) { BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info); if (p_bd->opened) return(FFS_SUCCESS); p_bd->sector_size = bdev_logical_block_size(sb->s_bdev); p_bd->sector_size_bits = my_log2(p_bd->sector_size); p_bd->sector_size_mask = p_bd->sector_size - 1; p_bd->num_sectors = i_size_read(sb->s_bdev->bd_inode) >> p_bd->sector_size_bits; p_bd->opened = TRUE; return(FFS_SUCCESS); }
static double SECTION power1(double x, double y) { double z,a,aa,error, t,a1,a2,y1,y2; z = my_log2(x,&aa,&error); t = y*134217729.0; y1 = t - (t-y); y2 = y - y1; t = z*134217729.0; a1 = t - (t-z); a2 = z - a1; a = y*z; aa = ((y1*a1-a)+y1*a2+y2*a1)+y2*a2+aa*y; a1 = a+aa; a2 = (a-a1)+aa; error = error*ABS(y); t = __exp1(a1,a2,1.9e16*error); return (t >= 0)?t:__slowpow(x,y,z); }
/* * Take the counts from an ambiguous character and evenly distribute * them among the corresponding concrete characters. * * This function operates in log space. */ static void dist_ambig(ALPH_T alph, char ambig, char *concrete_chars, ARRAY_T* freqs) { PROB_T ambig_count, concrete_count; int ambig_index, num_concretes, i, concrete_index; // Get the count to be distributed. ambig_index = alph_index(alph, ambig); ambig_count = get_array_item(ambig_index, freqs); // Divide it by the number of corresponding concrete characters. num_concretes = strlen(concrete_chars); ambig_count -= my_log2((PROB_T)num_concretes); // Distribute it in equal portions to the given concrete characters. for (i = 0; i < num_concretes; i++) { concrete_index = alph_index(alph, concrete_chars[i]); concrete_count = get_array_item(concrete_index, freqs); // Add the ambiguous counts. concrete_count = LOG_SUM(concrete_count, ambig_count); set_array_item(concrete_index, concrete_count, freqs); } // Set the ambiguous count to zero. set_array_item(ambig_index, LOG_ZERO, freqs); }
/** * Function for finding and opening either all MCA components, or the one * that was specifically requested via a MCA parameter. */ static int mca_mpool_base_open(mca_base_open_flag_t flags) { /* Open up all available components - and populate the ompi_mpool_base_framework.framework_components list */ if (OMPI_SUCCESS != mca_base_framework_components_open(&ompi_mpool_base_framework, flags)) { return OMPI_ERROR; } /* Initialize the list so that in mca_mpool_base_close(), we can iterate over it (even if it's empty, as in the case of ompi_info) */ OBJ_CONSTRUCT(&mca_mpool_base_modules, opal_list_t); /* get the page size for this architecture*/ mca_mpool_base_page_size = sysconf(_SC_PAGESIZE); mca_mpool_base_page_size_log = my_log2(mca_mpool_base_page_size); /* setup tree for tracking MPI_Alloc_mem */ mca_mpool_base_tree_init(); return OMPI_SUCCESS; }
/************************************************************************** * Convert a given array to or from logs. **************************************************************************/ void convert_to_from_log_array (BOOLEAN_T to_log, ARRAY_T* source_array, ARRAY_T* target_array) { int num_items; int i_item; ATYPE new_value; // If the source is null, just return. if (source_array == NULL) return; num_items = get_array_length(source_array); for (i_item = 0; i_item < num_items; i_item++) { if (to_log) { new_value = my_log2(get_array_item(i_item, source_array)); } else { new_value = EXP2(get_array_item(i_item, source_array)); } set_array_item(i_item, new_value, target_array); } }
/************************************************************************** * get_max_lo_priors * * Returns the maximum log-odds prior from the disbtribution of priors. * **************************************************************************/ double get_max_lo_prior(PRIOR_DIST_T *prior_dist, double alpha) { double max_prior = get_prior_dist_maximum(prior_dist); return my_log2((alpha * max_prior) / (1.0L - alpha * max_prior)); }
/// Parse the .lzma header and display information about it. static bool lzmainfo(const char *name, FILE *f) { uint8_t buf[13]; const size_t size = fread(buf, 1, sizeof(buf), f); if (size != 13) { fprintf(stderr, "%s: %s: %s\n", progname, name, ferror(f) ? strerror(errno) : _("File is too small to be a .lzma file")); return true; } lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; // Parse the first five bytes. switch (lzma_properties_decode(&filter, NULL, buf, 5)) { case LZMA_OK: break; case LZMA_OPTIONS_ERROR: fprintf(stderr, "%s: %s: %s\n", progname, name, _("Not a .lzma file")); return true; case LZMA_MEM_ERROR: fprintf(stderr, "%s: %s\n", progname, strerror(ENOMEM)); exit(EXIT_FAILURE); default: fprintf(stderr, "%s: %s\n", progname, _("Internal error (bug)")); exit(EXIT_FAILURE); } // Uncompressed size uint64_t uncompressed_size = 0; for (size_t i = 0; i < 8; ++i) uncompressed_size |= (uint64_t)(buf[5 + i]) << (i * 8); // Display the results. We don't want to translate these and also // will use MB instead of MiB, because someone could be parsing // this output and we don't want to break that when people move // from LZMA Utils to XZ Utils. if (f != stdin) printf("%s\n", name); printf("Uncompressed size: "); if (uncompressed_size == UINT64_MAX) printf("Unknown"); else printf("%" PRIu64 " MB (%" PRIu64 " bytes)", (uncompressed_size + 512 * 1024) / (1024 * 1024), uncompressed_size); lzma_options_lzma *opt = filter.options; printf("\nDictionary size: " "%" PRIu32 " MB (2^%" PRIu32 " bytes)\n" "Literal context bits (lc): %" PRIu32 "\n" "Literal pos bits (lp): %" PRIu32 "\n" "Number of pos bits (pb): %" PRIu32 "\n", (opt->dict_size + 512 * 1024) / (1024 * 1024), my_log2(opt->dict_size), opt->lc, opt->lp, opt->pb); free(opt); return false; }
static int init_htab (HTAB *hashp, int nelem) { register SEG_OFFSET *segp; register int nbuckets; register int nsegs; int l2; HHDR *hctl; hctl = hashp->hctl; /* * Divide number of elements by the fill factor and determine a desired * number of buckets. Allocate space for the next greater power of * two number of buckets */ nelem = (nelem - 1) / hctl->ffactor + 1; l2 = my_log2(nelem); nbuckets = 1 << l2; hctl->max_bucket = hctl->low_mask = nbuckets - 1; hctl->high_mask = (nbuckets << 1) - 1; nsegs = (nbuckets - 1) / hctl->ssize + 1; nsegs = 1 << my_log2(nsegs); if ( nsegs > hctl->dsize ) { hctl->dsize = nsegs; } /* Use two low order bits of points ???? */ /* if ( !(hctl->mem = bit_alloc ( nbuckets )) ) return(-1); if ( !(hctl->mod = bit_alloc ( nbuckets )) ) return(-1); */ /* allocate a directory */ if (!(hashp->dir)) { hashp->dir = (SEG_OFFSET *)hashp->alloc(hctl->dsize * sizeof(SEG_OFFSET)); if (! hashp->dir) return(-1); } /* Allocate initial segments */ for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++ ) { *segp = seg_alloc(hashp); if ( *segp == (SEG_OFFSET)0 ) { hash_destroy(hashp); return (0); } } # if HASH_DEBUG fprintf(stderr, "%s\n%s%x\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n", "init_htab:", "TABLE POINTER ", hashp, "BUCKET SIZE ", hctl->bsize, "BUCKET SHIFT ", hctl->bshift, "DIRECTORY SIZE ", hctl->dsize, "SEGMENT SIZE ", hctl->ssize, "SEGMENT SHIFT ", hctl->sshift, "FILL FACTOR ", hctl->ffactor, "MAX BUCKET ", hctl->max_bucket, "HIGH MASK ", hctl->high_mask, "LOW MASK ", hctl->low_mask, "NSEGS ", hctl->nsegs, "NKEYS ", hctl->nkeys ); # endif return (0); }
HTAB * hash_create(int nelem, HASHCTL *info, int flags) { register HHDR * hctl; HTAB * hashp; hashp = (HTAB *) MEM_ALLOC((unsigned long) sizeof(HTAB)); memset(hashp, 0, sizeof(HTAB)); if ( flags & HASH_FUNCTION ) { hashp->hash = info->hash; } else { /* default */ hashp->hash = string_hash; } if ( flags & HASH_SHARED_MEM ) { /* ctl structure is preallocated for shared memory tables */ hashp->hctl = (HHDR *) info->hctl; hashp->segbase = (char *) info->segbase; hashp->alloc = info->alloc; hashp->dir = (SEG_OFFSET *)info->dir; /* hash table already exists, we're just attaching to it */ if (flags & HASH_ATTACH) { return(hashp); } } else { /* setup hash table defaults */ hashp->alloc = (dhalloc_ptr) MEM_ALLOC; hashp->dir = NULL; hashp->segbase = NULL; } if (! hashp->hctl) { hashp->hctl = (HHDR *) hashp->alloc((unsigned long)sizeof(HHDR)); if (! hashp->hctl) { return(0); } } if ( !hdefault(hashp) ) return(0); hctl = hashp->hctl; #ifdef HASH_STATISTICS hctl->accesses = hctl->collisions = 0; #endif if ( flags & HASH_BUCKET ) { hctl->bsize = info->bsize; hctl->bshift = my_log2(info->bsize); } if ( flags & HASH_SEGMENT ) { hctl->ssize = info->ssize; hctl->sshift = my_log2(info->ssize); } if ( flags & HASH_FFACTOR ) { hctl->ffactor = info->ffactor; } /* * SHM hash tables have fixed maximum size (allocate * a maximal sized directory). */ if ( flags & HASH_DIRSIZE ) { hctl->max_dsize = my_log2(info->max_size); hctl->dsize = my_log2(info->dsize); } /* hash table now allocates space for key and data * but you have to say how much space to allocate */ if ( flags & HASH_ELEM ) { hctl->keysize = info->keysize; hctl->datasize = info->datasize; } if ( flags & HASH_ALLOC ) { hashp->alloc = info->alloc; } if ( init_htab (hashp, nelem ) ) { hash_destroy(hashp); return(0); } return(hashp); }
/* * hash_create -- create a new dynamic hash table * * tabname: a name for the table (for debugging purposes) * nelem: maximum number of elements expected * *info: additional table parameters, as indicated by flags * flags: bitmask indicating which parameters to take from *info * * Note: for a shared-memory hashtable, nelem needs to be a pretty good * estimate, since we can't expand the table on the fly. But an unshared * hashtable can be expanded on-the-fly, so it's better for nelem to be * on the small side and let the table grow if it's exceeded. An overly * large nelem will penalize hash_seq_search speed without buying much. */ HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags) { HTAB *hashp; HASHHDR *hctl; /* * For shared hash tables, we have a local hash header (HTAB struct) that * we allocate in TopMemoryContext; all else is in shared memory. * * For non-shared hash tables, everything including the hash header is in * a memory context created specially for the hash table --- this makes * hash_destroy very simple. The memory context is made a child of either * a context specified by the caller, or TopMemoryContext if nothing is * specified. */ if (flags & HASH_SHARED_MEM) { /* Set up to allocate the hash header */ CurrentDynaHashCxt = TopMemoryContext; } else { /* Create the hash table's private memory context */ if (flags & HASH_CONTEXT) CurrentDynaHashCxt = info->hcxt; else CurrentDynaHashCxt = TopMemoryContext; CurrentDynaHashCxt = AllocSetContextCreate(CurrentDynaHashCxt, tabname, ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); } /* Initialize the hash header, plus a copy of the table name */ hashp = (HTAB *) DynaHashAlloc(sizeof(HTAB) + strlen(tabname) +1); MemSet(hashp, 0, sizeof(HTAB)); hashp->tabname = (char *) (hashp + 1); strcpy(hashp->tabname, tabname); if (flags & HASH_FUNCTION) hashp->hash = info->hash; else hashp->hash = string_hash; /* default hash function */ /* * If you don't specify a match function, it defaults to string_compare if * you used string_hash (either explicitly or by default) and to memcmp * otherwise. (Prior to PostgreSQL 7.4, memcmp was always used.) */ if (flags & HASH_COMPARE) hashp->match = info->match; else if (hashp->hash == string_hash) hashp->match = (HashCompareFunc) string_compare; else hashp->match = memcmp; /* * Similarly, the key-copying function defaults to strlcpy or memcpy. */ if (flags & HASH_KEYCOPY) hashp->keycopy = info->keycopy; else if (hashp->hash == string_hash) hashp->keycopy = (HashCopyFunc) strlcpy; else hashp->keycopy = memcpy; if (flags & HASH_ALLOC) hashp->alloc = info->alloc; else hashp->alloc = DynaHashAlloc; if (flags & HASH_SHARED_MEM) { /* * ctl structure and directory are preallocated for shared memory * tables. Note that HASH_DIRSIZE and HASH_ALLOC had better be set as * well. */ hashp->hctl = info->hctl; hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR)); hashp->hcxt = NULL; hashp->isshared = true; /* hash table already exists, we're just attaching to it */ if (flags & HASH_ATTACH) { /* make local copies of some heavily-used values */ hctl = hashp->hctl; hashp->keysize = hctl->keysize; hashp->ssize = hctl->ssize; hashp->sshift = hctl->sshift; return hashp; } } else { /* setup hash table defaults */ hashp->hctl = NULL; hashp->dir = NULL; hashp->hcxt = CurrentDynaHashCxt; hashp->isshared = false; } if (!hashp->hctl) { hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR)); if (!hashp->hctl) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); } hashp->frozen = false; hdefault(hashp); hctl = hashp->hctl; if (flags & HASH_PARTITION) { /* Doesn't make sense to partition a local hash table */ Assert(flags & HASH_SHARED_MEM); /* * The number of partitions had better be a power of 2. Also, it must * be less than INT_MAX (see init_htab()), so call the int version of * next_pow2. */ Assert(info->num_partitions == next_pow2_int(info->num_partitions)); hctl->num_partitions = info->num_partitions; } if (flags & HASH_SEGMENT) { hctl->ssize = info->ssize; hctl->sshift = my_log2(info->ssize); /* ssize had better be a power of 2 */ Assert(hctl->ssize == (1L << hctl->sshift)); } if (flags & HASH_FFACTOR) hctl->ffactor = info->ffactor; /* * SHM hash tables have fixed directory size passed by the caller. */ if (flags & HASH_DIRSIZE) { hctl->max_dsize = info->max_dsize; hctl->dsize = info->dsize; } /* * hash table now allocates space for key and data but you have to say how * much space to allocate */ if (flags & HASH_ELEM) { Assert(info->entrysize >= info->keysize); hctl->keysize = info->keysize; hctl->entrysize = info->entrysize; } /* make local copies of heavily-used constant fields */ hashp->keysize = hctl->keysize; hashp->ssize = hctl->ssize; hashp->sshift = hctl->sshift; /* Build the hash directory structure */ if (!init_htab(hashp, nelem)) elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname); /* * For a shared hash table, preallocate the requested number of elements. * This reduces problems with run-time out-of-shared-memory conditions. * * For a non-shared hash table, preallocate the requested number of * elements if it's less than our chosen nelem_alloc. This avoids wasting * space if the caller correctly estimates a small table size. */ if ((flags & HASH_SHARED_MEM) || nelem < hctl->nelem_alloc) { if (!element_alloc(hashp, (int) nelem)) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); } if (flags & HASH_FIXED_SIZE) hashp->isfixed = true; return hashp; }
static bool init_htab(HTAB *hashp, long nelem) { HASHHDR *hctl = hashp->hctl; HASHSEGMENT *segp; int nbuckets; int nsegs; /* * Divide number of elements by the fill factor to determine a desired * number of buckets. Allocate space for the next greater power of * two number of buckets */ nelem = (nelem - 1) / hctl->ffactor + 1; nbuckets = 1 << my_log2(nelem); hctl->max_bucket = hctl->low_mask = nbuckets - 1; hctl->high_mask = (nbuckets << 1) - 1; /* * Figure number of directory segments needed, round up to a power of * 2 */ nsegs = (nbuckets - 1) / hctl->ssize + 1; nsegs = 1 << my_log2(nsegs); /* * Make sure directory is big enough. If pre-allocated directory is * too small, choke (caller screwed up). */ if (nsegs > hctl->dsize) { if (!(hashp->dir)) hctl->dsize = nsegs; else return false; } /* Allocate a directory */ if (!(hashp->dir)) { CurrentDynaHashCxt = hashp->hcxt; hashp->dir = (HASHSEGMENT *) hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT)); if (!hashp->dir) return false; } /* Allocate initial segments */ for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++) { *segp = seg_alloc(hashp); if (*segp == NULL) return false; } #if HASH_DEBUG fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n%s%ld\n", "TABLE POINTER ", hashp, "DIRECTORY SIZE ", hctl->dsize, "SEGMENT SIZE ", hctl->ssize, "SEGMENT SHIFT ", hctl->sshift, "FILL FACTOR ", hctl->ffactor, "MAX BUCKET ", hctl->max_bucket, "HIGH MASK ", hctl->high_mask, "LOW MASK ", hctl->low_mask, "NSEGS ", hctl->nsegs, "NENTRIES ", hctl->nentries); #endif return true; }
/* calculate first power of 2 >= num, bounded to what will fit in a long */ static long next_pow2_long(long num) { /* my_log2's internal range check is sufficient */ return 1L << my_log2(num); }
static base_t my_log2p1(const base_t &i) { return my_log2(i+1.0); }
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags) { HTAB *hashp; HASHHDR *hctl; /* First time through, create a memory context for hash tables */ if (!DynaHashCxt) DynaHashCxt = AllocSetContextCreate(TopMemoryContext, "DynaHash", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* Select allocation context for this hash table */ if (flags & HASH_CONTEXT) CurrentDynaHashCxt = info->hcxt; else CurrentDynaHashCxt = DynaHashCxt; /* Initialize the hash header */ hashp = (HTAB *) MEM_ALLOC(sizeof(HTAB)); if (!hashp) return NULL; MemSet(hashp, 0, sizeof(HTAB)); hashp->tabname = (char *) MEM_ALLOC(strlen(tabname) + 1); strcpy(hashp->tabname, tabname); if (flags & HASH_FUNCTION) hashp->hash = info->hash; else hashp->hash = string_hash; /* default hash function */ /* * If you don't specify a match function, it defaults to strncmp() if * you used string_hash (either explicitly or by default) and to * memcmp() otherwise. (Prior to PostgreSQL 7.4, memcmp() was always * used.) */ if (flags & HASH_COMPARE) hashp->match = info->match; else if (hashp->hash == string_hash) hashp->match = (HashCompareFunc) strncmp; else hashp->match = memcmp; /* * Similarly, the key-copying function defaults to strncpy() or memcpy(). */ if (hashp->hash == string_hash) hashp->keycopy = (HashCopyFunc) strncpy; else hashp->keycopy = memcpy; if (flags & HASH_SHARED_MEM) { /* * ctl structure is preallocated for shared memory tables. Note * that HASH_DIRSIZE had better be set as well. */ hashp->hctl = info->hctl; hashp->dir = info->dir; hashp->alloc = info->alloc; hashp->hcxt = NULL; hashp->isshared = true; /* hash table already exists, we're just attaching to it */ if (flags & HASH_ATTACH) return hashp; } else { /* setup hash table defaults */ hashp->hctl = NULL; hashp->dir = NULL; hashp->alloc = MEM_ALLOC; hashp->hcxt = CurrentDynaHashCxt; hashp->isshared = false; } if (!hashp->hctl) { hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR)); if (!hashp->hctl) return NULL; } if (!hdefault(hashp)) return NULL; hctl = hashp->hctl; #ifdef HASH_STATISTICS hctl->accesses = hctl->collisions = 0; #endif if (flags & HASH_SEGMENT) { hctl->ssize = info->ssize; hctl->sshift = my_log2(info->ssize); /* ssize had better be a power of 2 */ Assert(hctl->ssize == (1L << hctl->sshift)); } if (flags & HASH_FFACTOR) hctl->ffactor = info->ffactor; /* * SHM hash tables have fixed directory size passed by the caller. */ if (flags & HASH_DIRSIZE) { hctl->max_dsize = info->max_dsize; hctl->dsize = info->dsize; } /* * hash table now allocates space for key and data but you have to say * how much space to allocate */ if (flags & HASH_ELEM) { hctl->keysize = info->keysize; hctl->entrysize = info->entrysize; } if (flags & HASH_ALLOC) hashp->alloc = info->alloc; else { /* remaining hash table structures live in child of given context */ hashp->hcxt = AllocSetContextCreate(CurrentDynaHashCxt, "DynaHashTable", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); CurrentDynaHashCxt = hashp->hcxt; } if (!init_htab(hashp, nelem)) { hash_destroy(hashp); return NULL; } return hashp; }
/* ---------------------------------------------------------------- * ExecHashTableCreate * * create an empty hashtable data structure for hashjoin. * ---------------------------------------------------------------- */ HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls) { HashJoinTable hashtable; Plan *outerNode; int nbuckets; int nbatch; int num_skew_mcvs; int log2_nbuckets; int nkeys; int i; ListCell *ho; MemoryContext oldcxt; /* * Get information about the size of the relation to be hashed (it's the * "outer" subtree of this node, but the inner relation of the hashjoin). * Compute the appropriate size of the hash table. */ outerNode = outerPlan(node); ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width, OidIsValid(node->skewTable), &nbuckets, &nbatch, &num_skew_mcvs); #ifdef HJDEBUG printf("nbatch = %d, nbuckets = %d\n", nbatch, nbuckets); #endif /* nbuckets must be a power of 2 */ log2_nbuckets = my_log2(nbuckets); Assert(nbuckets == (1 << log2_nbuckets)); /* * Initialize the hash table control block. * * The hashtable control block is just palloc'd from the executor's * per-query memory context. */ hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData)); hashtable->nbuckets = nbuckets; hashtable->log2_nbuckets = log2_nbuckets; hashtable->buckets = NULL; hashtable->keepNulls = keepNulls; hashtable->skewEnabled = false; hashtable->skewBucket = NULL; hashtable->skewBucketLen = 0; hashtable->nSkewBuckets = 0; hashtable->skewBucketNums = NULL; hashtable->nbatch = nbatch; hashtable->curbatch = 0; hashtable->nbatch_original = nbatch; hashtable->nbatch_outstart = nbatch; hashtable->growEnabled = true; hashtable->totalTuples = 0; hashtable->innerBatchFile = NULL; hashtable->outerBatchFile = NULL; hashtable->spaceUsed = 0; hashtable->spacePeak = 0; hashtable->spaceAllowed = work_mem * 1024L; hashtable->spaceUsedSkew = 0; hashtable->spaceAllowedSkew = hashtable->spaceAllowed * SKEW_WORK_MEM_PERCENT / 100; /* * Get info about the hash functions to be used for each hash key. Also * remember whether the join operators are strict. */ nkeys = list_length(hashOperators); hashtable->outer_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->inner_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool)); i = 0; foreach(ho, hashOperators) { Oid hashop = lfirst_oid(ho); Oid left_hashfn; Oid right_hashfn; if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn)) elog(ERROR, "could not find hash function for hash operator %u", hashop); fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]); fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]); hashtable->hashStrict[i] = op_strict(hashop); i++; }
/* * Compute derived fields of hctl and build the initial directory/segment * arrays */ static bool init_htab(HTAB *hashp, long nelem) { HASHHDR *hctl = hashp->hctl; HASHSEGMENT *segp; long lnbuckets; int nbuckets; int nsegs; /* * initialize mutex if it's a partitioned table */ if (IS_PARTITIONED(hctl)) SpinLockInit(&hctl->mutex); /* * Divide number of elements by the fill factor to determine a desired * number of buckets. Allocate space for the next greater power of two * number of buckets */ lnbuckets = (nelem - 1) / hctl->ffactor + 1; nbuckets = 1 << my_log2(lnbuckets); /* * In a partitioned table, nbuckets must be at least equal to * num_partitions; were it less, keys with apparently different partition * numbers would map to the same bucket, breaking partition independence. * (Normally nbuckets will be much bigger; this is just a safety check.) */ while (nbuckets < hctl->num_partitions) nbuckets <<= 1; hctl->max_bucket = hctl->low_mask = nbuckets - 1; hctl->high_mask = (nbuckets << 1) - 1; /* * Figure number of directory segments needed, round up to a power of 2 */ nsegs = (nbuckets - 1) / hctl->ssize + 1; nsegs = 1 << my_log2(nsegs); /* * Make sure directory is big enough. If pre-allocated directory is too * small, choke (caller screwed up). */ if (nsegs > hctl->dsize) { if (!(hashp->dir)) hctl->dsize = nsegs; else return false; } /* Allocate a directory */ if (!(hashp->dir)) { CurrentDynaHashCxt = hashp->hcxt; hashp->dir = (HASHSEGMENT *) hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT)); if (!hashp->dir) return false; } /* Allocate initial segments */ for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++) { *segp = seg_alloc(hashp); if (*segp == NULL) return false; } /* Choose number of entries to allocate at a time */ hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize); #if HASH_DEBUG fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n%s%ld\n", "TABLE POINTER ", hashp, "DIRECTORY SIZE ", hctl->dsize, "SEGMENT SIZE ", hctl->ssize, "SEGMENT SHIFT ", hctl->sshift, "FILL FACTOR ", hctl->ffactor, "MAX BUCKET ", hctl->max_bucket, "HIGH MASK ", hctl->high_mask, "LOW MASK ", hctl->low_mask, "NSEGS ", hctl->nsegs, "NENTRIES ", hctl->nentries); #endif return true; }
/* ---------------------------------------------------------------- * ExecHashTableCreate * * create an empty hashtable data structure for hashjoin. * ---------------------------------------------------------------- */ HashJoinTable ExecHashTableCreate(HashState *hashState, HashJoinState *hjstate, List *hashOperators, uint64 operatorMemKB) { HashJoinTable hashtable; Plan *outerNode; int nbuckets; int nbatch; int log2_nbuckets; int nkeys; int i; ListCell *ho; MemoryContext oldcxt; START_MEMORY_ACCOUNT(hashState->ps.plan->memoryAccountId); { Hash *node = (Hash *) hashState->ps.plan; /* * Get information about the size of the relation to be hashed (it's the * "outer" subtree of this node, but the inner relation of the hashjoin). * Compute the appropriate size of the hash table. */ outerNode = outerPlan(node); ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width, &nbuckets, &nbatch, operatorMemKB); #ifdef HJDEBUG elog(LOG, "HJ: nbatch = %d, nbuckets = %d\n", nbatch, nbuckets); #endif /* nbuckets must be a power of 2 */ log2_nbuckets = my_log2(nbuckets); Assert(nbuckets == (1 << log2_nbuckets)); /* * Initialize the hash table control block. * * The hashtable control block is just palloc'd from the executor's * per-query memory context. */ hashtable = (HashJoinTable) palloc0(sizeof(HashJoinTableData)); hashtable->nbuckets = nbuckets; hashtable->log2_nbuckets = log2_nbuckets; hashtable->buckets = NULL; hashtable->bloom = NULL; hashtable->nbatch = nbatch; hashtable->curbatch = 0; hashtable->nbatch_original = nbatch; hashtable->nbatch_outstart = nbatch; hashtable->growEnabled = true; hashtable->totalTuples = 0; hashtable->batches = NULL; hashtable->work_set = NULL; hashtable->state_file = NULL; hashtable->spaceAllowed = operatorMemKB * 1024L; hashtable->stats = NULL; hashtable->eagerlyReleased = false; hashtable->hjstate = hjstate; /* * Get info about the hash functions to be used for each hash key. Also * remember whether the join operators are strict. */ nkeys = list_length(hashOperators); hashtable->outer_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->inner_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool)); i = 0; foreach(ho, hashOperators) { Oid hashop = lfirst_oid(ho); Oid left_hashfn; Oid right_hashfn; if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn)) elog(ERROR, "could not find hash function for hash operator %u", hashop); fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]); fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]); hashtable->hashStrict[i] = op_strict(hashop); i++; } /* * Create temporary memory contexts in which to keep the hashtable working * storage. See notes in executor/hashjoin.h. */ hashtable->hashCxt = AllocSetContextCreate(CurrentMemoryContext, "HashTableContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); hashtable->batchCxt = AllocSetContextCreate(hashtable->hashCxt, "HashBatchContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* CDB */ /* track temp buf file allocations in separate context */ hashtable->bfCxt = AllocSetContextCreate(CurrentMemoryContext, "hbbfcxt", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* Allocate data that will live for the life of the hashjoin */ oldcxt = MemoryContextSwitchTo(hashtable->hashCxt); #ifdef HJDEBUG { /* Memory needed to allocate hashtable->batches, which consists of nbatch pointers */ int md_batch_size = (nbatch * sizeof(hashtable->batches[0])) / (1024 * 1024); /* Memory needed to allocate hashtable->batches entries, which consist of nbatch HashJoinBatchData structures */ int md_batch_data_size = (nbatch * sizeof(HashJoinBatchData)) / (1024 * 1024); /* Memory needed to allocate hashtable->buckets, which consists of nbuckets HashJoinTuple structures*/ int md_buckets_size = (nbuckets * sizeof(HashJoinTuple)) / (1024 * 1024); /* Memory needed to allocate hashtable->bloom, which consists of nbuckets int64 values */ int md_bloom_size = (nbuckets * sizeof(uint64)) / (1024 * 1024); /* Total memory needed for the hashtable metadata */ int md_tot = md_batch_size + md_batch_data_size + md_buckets_size + md_bloom_size; elog(LOG, "About to allocate HashTable. HT_MEMORY=%dMB Memory needed for metadata: MDBATCH_ARR=%dMB, MDBATCH_DATA=%dMB, MDBUCKETS_ARR=%dMB, MDBLOOM_ARR=%dMB, TOTAL=%dMB", (int) (hashtable->spaceAllowed / (1024 * 1024)), md_batch_size, md_batch_data_size, md_buckets_size, md_bloom_size, md_tot); elog(LOG, "sizeof(hashtable->batches[0])=%d, sizeof(HashJoinBatchData)=%d, sizeof(HashJoinTuple)=%d, sizeof(uint64)=%d", (int) sizeof(hashtable->batches[0]), (int) sizeof(HashJoinBatchData), (int) sizeof(HashJoinTuple), (int) sizeof(uint64)); } #endif /* array of BatchData ptrs */ hashtable->batches = (HashJoinBatchData **)palloc(nbatch * sizeof(hashtable->batches[0])); /* one BatchData entry per initial batch */ for (i = 0; i < nbatch; i++) hashtable->batches[i] = (HashJoinBatchData *)palloc0(sizeof(HashJoinBatchData)); /* * Prepare context for the first-scan space allocations; allocate the * hashbucket array therein, and set each bucket "empty". */ MemoryContextSwitchTo(hashtable->batchCxt); hashtable->buckets = (HashJoinTuple *) palloc0(nbuckets * sizeof(HashJoinTuple)); if(gp_hashjoin_bloomfilter!=0) hashtable->bloom = (uint64*) palloc0(nbuckets * sizeof(uint64)); MemoryContextSwitchTo(oldcxt); }