static int call_resize_kernel(CUDAScaleContext *ctx, CUfunction func, CUtexref tex, int channels, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch, int pixel_size) { CUdeviceptr src_devptr = (CUdeviceptr)src_dptr; CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr; void *args_uchar[] = { &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height }; CUDA_ARRAY_DESCRIPTOR desc; desc.Width = src_width; desc.Height = src_height; desc.NumChannels = channels; if (pixel_size == 1) { desc.Format = CU_AD_FORMAT_UNSIGNED_INT8; } else { desc.Format = CU_AD_FORMAT_UNSIGNED_INT16; } CHECK_CU(cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch * pixel_size)); CHECK_CU(cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, 0, args_uchar, NULL)); return 0; }
static int xfs_quota_set(quota_t *myquota) { fs_disk_quota_t sysquota; int retval; int block_diff= BLOCK_SIZE / 512; memset(&sysquota, 0, sizeof(fs_disk_quota_t)); /* copy our data into the linux dqblk */ sysquota.d_blk_hardlimit = myquota->block_hard * block_diff; sysquota.d_blk_softlimit = myquota->block_soft * block_diff; // XFS really uses blocks, all other formats in this file use bytes sysquota.d_bcount = DIV_UP(myquota->diskspace_used * block_diff, 1024); sysquota.d_ino_hardlimit = myquota->inode_hard; sysquota.d_ino_softlimit = myquota->inode_soft; sysquota.d_icount = myquota->inode_used; /* For XFS, global grace time limits are set by the values set for root */ sysquota.d_btimer = myquota->block_grace; sysquota.d_itimer = myquota->inode_grace; sysquota.d_fieldmask = FS_DQ_LIMIT_MASK; if (myquota->_do_set_global_block_gracetime || myquota->_do_set_global_inode_gracetime) sysquota.d_fieldmask |= FS_DQ_TIMER_MASK; retval = quotactl(QCMD(Q_XSETQLIM,myquota->_id_type), myquota->_qfile, myquota->_id, (caddr_t) &sysquota); if (retval < 0) { output_error ("Failed setting quota (xfs): %s", strerror(errno)); return(0); } /* success */ return 1; }
__init void module_load() { kprintf("module - loading modules", module_count); for(uint32_t i = 0; i < module_count; i++) { kprintf("module - #%u loaded", i + 1); uint32_t num_pages = DIV_UP(modules[i].end - modules[i].start, PAGE_SIZE); void *virt = map_pages(modules[i].start, num_pages); rootramfs_load(virt, modules[i].end - modules[i].start); //TODO unmap pages } uint32_t freed_pages = 0; for(uint32_t i = 0; i < module_count; i++) { uint32_t first_page = DIV_UP(modules[i].start, PAGE_SIZE); uint32_t last_page = DIV_DOWN(modules[i].end, PAGE_SIZE); claim_pages(first_page, last_page - first_page); } kprintf("module - %u pages reclaimed", freed_pages); }
static void plot_string(struct plot_info *pi, struct plot_data *entry, struct membuffer *b, bool has_ndl) { int pressurevalue, mod, ead, end, eadd; const char *depth_unit, *pressure_unit, *temp_unit, *vertical_speed_unit; double depthvalue, tempvalue, speedvalue, sacvalue; int decimals; const char *unit; depthvalue = get_depth_units(entry->depth, NULL, &depth_unit); put_format(b, translate("gettextFromC", "@: %d:%02d\nD: %.1f%s\n"), FRACTION(entry->sec, 60), depthvalue, depth_unit); if (GET_PRESSURE(entry)) { pressurevalue = get_pressure_units(GET_PRESSURE(entry), &pressure_unit); put_format(b, translate("gettextFromC", "P: %d%s\n"), pressurevalue, pressure_unit); } if (entry->temperature) { tempvalue = get_temp_units(entry->temperature, &temp_unit); put_format(b, translate("gettextFromC", "T: %.1f%s\n"), tempvalue, temp_unit); } speedvalue = get_vertical_speed_units(abs(entry->speed), NULL, &vertical_speed_unit); /* Ascending speeds are positive, descending are negative */ if (entry->speed > 0) speedvalue *= -1; put_format(b, translate("gettextFromC", "V: %.1f%s\n"), speedvalue, vertical_speed_unit); sacvalue = get_volume_units(entry->sac, &decimals, &unit); if (entry->sac && prefs.show_sac) put_format(b, translate("gettextFromC", "SAC: %.*f%s/min\n"), decimals, sacvalue, unit); if (entry->cns) put_format(b, translate("gettextFromC", "CNS: %u%%\n"), entry->cns); if (prefs.pp_graphs.po2) put_format(b, translate("gettextFromC", "pO%s: %.2fbar\n"), UTF8_SUBSCRIPT_2, entry->pressures.o2); if (prefs.pp_graphs.pn2) put_format(b, translate("gettextFromC", "pN%s: %.2fbar\n"), UTF8_SUBSCRIPT_2, entry->pressures.n2); if (prefs.pp_graphs.phe) put_format(b, translate("gettextFromC", "pHe: %.2fbar\n"), entry->pressures.he); if (prefs.mod) { mod = (int)get_depth_units(entry->mod, NULL, &depth_unit); put_format(b, translate("gettextFromC", "MOD: %d%s\n"), mod, depth_unit); } eadd = (int)get_depth_units(entry->eadd, NULL, &depth_unit); if (prefs.ead) { switch (pi->dive_type) { case NITROX: ead = (int)get_depth_units(entry->ead, NULL, &depth_unit); put_format(b, translate("gettextFromC", "EAD: %d%s\nEADD: %d%s\n"), ead, depth_unit, eadd, depth_unit); break; case TRIMIX: end = (int)get_depth_units(entry->end, NULL, &depth_unit); put_format(b, translate("gettextFromC", "END: %d%s\nEADD: %d%s\n"), end, depth_unit, eadd, depth_unit); break; case AIR: case FREEDIVING: /* nothing */ break; } } if (entry->stopdepth) { depthvalue = get_depth_units(entry->stopdepth, NULL, &depth_unit); if (entry->ndl) { /* this is a safety stop as we still have ndl */ if (entry->stoptime) put_format(b, translate("gettextFromC", "Safetystop: %umin @ %.0f%s\n"), DIV_UP(entry->stoptime, 60), depthvalue, depth_unit); else put_format(b, translate("gettextFromC", "Safetystop: unkn time @ %.0f%s\n"), depthvalue, depth_unit); } else { /* actual deco stop */ if (entry->stoptime) put_format(b, translate("gettextFromC", "Deco: %umin @ %.0f%s\n"), DIV_UP(entry->stoptime, 60), depthvalue, depth_unit); else put_format(b, translate("gettextFromC", "Deco: unkn time @ %.0f%s\n"), depthvalue, depth_unit); } } else if (entry->in_deco) { put_string(b, translate("gettextFromC", "In deco\n")); } else if (has_ndl) { put_format(b, translate("gettextFromC", "NDL: %umin\n"), DIV_UP(entry->ndl, 60)); } if (entry->tts) put_format(b, translate("gettextFromC", "TTS: %umin\n"), DIV_UP(entry->tts, 60)); if (entry->stopdepth_calc && entry->stoptime_calc) { depthvalue = get_depth_units(entry->stopdepth_calc, NULL, &depth_unit); put_format(b, translate("gettextFromC", "Deco: %umin @ %.0f%s (calc)\n"), DIV_UP(entry->stoptime_calc, 60), depthvalue, depth_unit); } else if (entry->in_deco_calc) { /* This means that we have no NDL left, * and we have no deco stop, * so if we just accend to the surface slowly * (ascent_mm_per_step / ascent_s_per_step) * everything will be ok. */ put_string(b, translate("gettextFromC", "In deco (calc)\n")); } else if (prefs.calcndltts && entry->ndl_calc != 0) { if(entry->ndl_calc < MAX_PROFILE_DECO) put_format(b, translate("gettextFromC", "NDL: %umin (calc)\n"), DIV_UP(entry->ndl_calc, 60)); else put_format(b, "%s", translate("gettextFromC", "NDL: >2h (calc)\n")); } if (entry->tts_calc) { if (entry->tts_calc < MAX_PROFILE_DECO) put_format(b, translate("gettextFromC", "TTS: %umin (calc)\n"), DIV_UP(entry->tts_calc, 60)); else put_format(b, "%s", translate("gettextFromC", "TTS: >2h (calc)\n")); } if (entry->rbt) put_format(b, translate("gettextFromC", "RBT: %umin\n"), DIV_UP(entry->rbt, 60)); if (entry->ceiling) { depthvalue = get_depth_units(entry->ceiling, NULL, &depth_unit); put_format(b, translate("gettextFromC", "Calculated ceiling %.0f%s\n"), depthvalue, depth_unit); if (prefs.calcalltissues) { int k; for (k = 0; k < 16; k++) { if (entry->ceilings[k]) { depthvalue = get_depth_units(entry->ceilings[k], NULL, &depth_unit); put_format(b, translate("gettextFromC", "Tissue %.0fmin: %.1f%s\n"), buehlmann_N2_t_halflife[k], depthvalue, depth_unit); } } } } if (entry->heartbeat && prefs.hrgraph) put_format(b, translate("gettextFromC", "heartbeat: %d\n"), entry->heartbeat); if (entry->bearing) put_format(b, translate("gettextFromC", "bearing: %d\n"), entry->bearing); if (entry->running_sum) { depthvalue = get_depth_units(entry->running_sum / entry->sec, NULL, &depth_unit); put_format(b, translate("gettextFromC", "mean depth to here %.1f%s\n"), depthvalue, depth_unit); } strip_mb(b); }
int main(int argc, char **argv) { // Start logs printf("[%s] - Starting...\n", argv[0]); //'h_' prefix - CPU (host) memory space float //Results calculated by CPU for reference *h_CallResultCPU, *h_PutResultCPU, //CPU copy of GPU results *h_CallResultGPU, *h_PutResultGPU, //CPU instance of input data *h_StockPrice, *h_OptionStrike, *h_OptionYears; //'d_' prefix - GPU (device) memory space CUdeviceptr //Results calculated by GPU d_CallResult, d_PutResult, //GPU instance of input data d_StockPrice, d_OptionStrike, d_OptionYears; double delta, ref, sum_delta, sum_ref, max_delta, L1norm, gpuTime; StopWatchInterface *hTimer = NULL; int i; sdkCreateTimer(&hTimer); printf("Initializing data...\n"); printf("...allocating CPU memory for options.\n"); h_CallResultCPU = (float *)malloc(OPT_SZ); h_PutResultCPU = (float *)malloc(OPT_SZ); h_CallResultGPU = (float *)malloc(OPT_SZ); h_PutResultGPU = (float *)malloc(OPT_SZ); h_StockPrice = (float *)malloc(OPT_SZ); h_OptionStrike = (float *)malloc(OPT_SZ); h_OptionYears = (float *)malloc(OPT_SZ); char *ptx, *kernel_file; size_t ptxSize; kernel_file = sdkFindFilePath("BlackScholes_kernel.cuh", argv[0]); // Set a Compiler Option to have maximum register to be used by each thread. char *compile_options[1]; compile_options[0] = (char *) malloc(sizeof(char)*(strlen("--maxrregcount=16"))); strcpy((char *)compile_options[0],"--maxrregcount=16"); // Compile the kernel BlackScholes_kernel. compileFileToPTX(kernel_file, 1, (const char **)compile_options, &ptx, &ptxSize); CUmodule module = loadPTX(ptx, argc, argv); CUfunction kernel_addr; checkCudaErrors(cuModuleGetFunction(&kernel_addr, module, "BlackScholesGPU")); printf("...allocating GPU memory for options.\n"); checkCudaErrors(cuMemAlloc(&d_CallResult, OPT_SZ)); checkCudaErrors(cuMemAlloc(&d_PutResult, OPT_SZ)); checkCudaErrors(cuMemAlloc(&d_StockPrice, OPT_SZ)); checkCudaErrors(cuMemAlloc(&d_OptionStrike,OPT_SZ)); checkCudaErrors(cuMemAlloc(&d_OptionYears, OPT_SZ)); printf("...generating input data in CPU mem.\n"); srand(5347); //Generate options set for (i = 0; i < OPT_N; i++) { h_CallResultCPU[i] = 0.0f; h_PutResultCPU[i] = -1.0f; h_StockPrice[i] = RandFloat(5.0f, 30.0f); h_OptionStrike[i] = RandFloat(1.0f, 100.0f); h_OptionYears[i] = RandFloat(0.25f, 10.0f); } printf("...copying input data to GPU mem.\n"); //Copy options data to GPU memory for further processing checkCudaErrors(cuMemcpyHtoD(d_StockPrice, h_StockPrice, OPT_SZ)); checkCudaErrors(cuMemcpyHtoD(d_OptionStrike, h_OptionStrike, OPT_SZ)); checkCudaErrors(cuMemcpyHtoD(d_OptionYears, h_OptionYears, OPT_SZ)); printf("Data init done.\n\n"); printf("Executing Black-Scholes GPU kernel (%i iterations)...\n", NUM_ITERATIONS); sdkResetTimer(&hTimer); sdkStartTimer(&hTimer); dim3 cudaBlockSize( 128, 1, 1); dim3 cudaGridSize(DIV_UP(OPT_N/2, 128),1,1); float risk = RISKFREE; float volatility = VOLATILITY; int optval = OPT_N; void *arr[] = { (void *)&d_CallResult, (void *)&d_PutResult, (void *)&d_StockPrice, (void *)&d_OptionStrike, (void *)&d_OptionYears, (void *)&risk, (void *)&volatility, (void *)&optval }; for (i = 0; i < NUM_ITERATIONS; i++) { checkCudaErrors(cuLaunchKernel(kernel_addr, cudaGridSize.x, cudaGridSize.y, cudaGridSize.z, /* grid dim */ cudaBlockSize.x, cudaBlockSize.y, cudaBlockSize.z, /* block dim */ 0,0, /* shared mem, stream */ &arr[0], /* arguments */ 0)); } checkCudaErrors(cuCtxSynchronize()); sdkStopTimer(&hTimer); gpuTime = sdkGetTimerValue(&hTimer) / NUM_ITERATIONS; //Both call and put is calculated printf("Options count : %i \n", 2 * OPT_N); printf("BlackScholesGPU() time : %f msec\n", gpuTime); printf("Effective memory bandwidth: %f GB/s\n", ((double)(5 * OPT_N * sizeof(float)) * 1E-9) / (gpuTime * 1E-3)); printf("Gigaoptions per second : %f \n\n", ((double)(2 * OPT_N) * 1E-9) / (gpuTime * 1E-3)); printf("BlackScholes, Throughput = %.4f GOptions/s, Time = %.5f s, Size = %u options, NumDevsUsed = %u, Workgroup = %u\n", (((double)(2.0 * OPT_N) * 1.0E-9) / (gpuTime * 1.0E-3)), gpuTime*1e-3, (2 * OPT_N), 1, 128); printf("\nReading back GPU results...\n"); //Read back GPU results to compare them to CPU results checkCudaErrors(cuMemcpyDtoH(h_CallResultGPU, d_CallResult, OPT_SZ)); checkCudaErrors(cuMemcpyDtoH(h_PutResultGPU, d_PutResult, OPT_SZ)); printf("Checking the results...\n"); printf("...running CPU calculations.\n\n"); //Calculate options values on CPU BlackScholesCPU( h_CallResultCPU, h_PutResultCPU, h_StockPrice, h_OptionStrike, h_OptionYears, RISKFREE, VOLATILITY, OPT_N ); printf("Comparing the results...\n"); //Calculate max absolute difference and L1 distance //between CPU and GPU results sum_delta = 0; sum_ref = 0; max_delta = 0; for (i = 0; i < OPT_N; i++) { ref = h_CallResultCPU[i]; delta = fabs(h_CallResultCPU[i] - h_CallResultGPU[i]); if (delta > max_delta) { max_delta = delta; } sum_delta += delta; sum_ref += fabs(ref); } L1norm = sum_delta / sum_ref; printf("L1 norm: %E\n", L1norm); printf("Max absolute error: %E\n\n", max_delta); printf("Shutting down...\n"); printf("...releasing GPU memory.\n"); checkCudaErrors(cuMemFree(d_OptionYears)); checkCudaErrors(cuMemFree(d_OptionStrike)); checkCudaErrors(cuMemFree(d_StockPrice)); checkCudaErrors(cuMemFree(d_PutResult)); checkCudaErrors(cuMemFree(d_CallResult)); printf("...releasing CPU memory.\n"); free(h_OptionYears); free(h_OptionStrike); free(h_StockPrice); free(h_PutResultGPU); free(h_CallResultGPU); free(h_PutResultCPU); free(h_CallResultCPU); sdkDeleteTimer(&hTimer); printf("Shutdown done.\n"); printf("\n[%s] - Test Summary\n", argv[0]); cuProfilerStop(); if (L1norm > 1e-6) { printf("Test failed!\n"); exit(EXIT_FAILURE); } printf("Test passed\n"); exit(EXIT_SUCCESS); }
int main (int argc, char **argv) { u_int64_t old_quota; int id; time_t old_grace; argdata_t *argdata; quota_t *quota; char* tmpstr; /* parse commandline and fill argdata */ argdata = parse_commandline (argc, argv); if ( ! argdata ) { exit (ERR_PARSE); } /* initialize the id to use */ if ( ! argdata->id ) { id = 0; } /* numerical uid starting with ':', don't check uid/gid against system users/groups */ else if ( strlen(argdata->id) > 1 && argdata->id[0] == ':' && isdigit(argdata->id[1]) ) { argdata->id++; // skip leading ':' id = strtol(argdata->id, &tmpstr, 10); } else if ( argdata->id_type == QUOTA_USER ) { id = (int) system_getuid (argdata->id); } else { id = (int) system_getgid (argdata->id); } if ( id < 0 ) { exit (ERR_ARG); } /* get the quota info */ quota = quota_new (argdata->id_type, id, argdata->qfile); if ( ! quota ) { exit (ERR_SYS); } if ( ! quota_get(quota) ) { exit (ERR_SYS); } // FIXME: remote debug //output_info("BLOCKS_TO_KB(quota->block_soft): %llu\n", BLOCKS_TO_KB(quota->block_soft)); //output_info("DIV_UP(quota->block_soft, 1024): %llu\n", DIV_UP(quota->block_soft, 1024)); //output_info("DEBUG: quota->block_soft: %llu\n", quota->block_soft); if (argdata->dump_info) { time_t now = time(NULL); u_int64_t display_blocks_used = 0; output_info (""); output_info ("%s Filesystem blocks quota limit grace files quota limit grace", argdata->id_type == QUOTA_USER ? "uid" : "gid"); // quota->diskspace_used is bytes. Display in Kb display_blocks_used = DIV_UP(quota->diskspace_used, 1024); #ifdef HAVE_INTTYPES_H printf("%d %s %" PRIu64 " %" PRIu64 " %" PRIu64 " %lu %" PRIu64 " %" PRIu64 " %" PRIu64 " %lu\n", #else printf("%d %s %llu %llu %llu %lu %llu %llu %llu %lu\n", #endif id, argdata->qfile, display_blocks_used, BLOCKS_TO_KB(quota->block_soft), BLOCKS_TO_KB(quota->block_hard), #if ANY_BSD || PLATFORM_DARWIN (unsigned long) (( (quota->block_soft && (BYTES_TO_BLOCKS(quota->diskspace_used) >= quota->block_soft)) || (quota->block_hard && (BYTES_TO_BLOCKS(quota->diskspace_used) >= quota->block_hard)) ) ? quota->block_time - now : 0), #else (unsigned long) quota->block_time ? quota->block_time - now : 0, #endif /* ANY_BSD */ quota->inode_used, quota->inode_soft, quota->inode_hard, #if ANY_BSD || PLATFORM_DARWIN (unsigned long) (( (quota->inode_soft && (quota->inode_used >= quota->inode_soft)) || (quota->inode_hard && (quota->inode_used >= quota->inode_hard)) ) ? quota->inode_time - now : 0)); #else (unsigned long) quota->inode_time ? quota->inode_time - now : 0); #endif /* ANY_BSD */ exit(0); }
/* * clique_find_all() * * Find all cliques with weight at least min_weight and at most max_weight. * * g - the graph * min_weight - minimum weight of cliques to search for. If min_weight==0, * searches for maximum weight cliques. * max_weight - maximum weight of cliques to search for. If max_weight==0, * no upper limit is used. If min_weight==0, max_weight must * also be 0. * maximal - require cliques to be maximal cliques * opts - time printing and clique storage options * * Returns the number of cliques found. This can be less than the number * of cliques in the graph iff opts->time_function() or opts->user_function() * returns FALSE (request abort). * * The cliques found are stored in opts->clique_list[] and * opts->user_function() is called with them (if non-NULL). The cliques * stored in opts->clique_list[] are newly allocated, and can be freed * by set_free(). * * Note: Automatically uses clique_unweighted_find_all if all vertex * weights are the same. */ int clique_find_all(graph_t *g, int min_weight, int max_weight, boolean maximal, clique_options *opts) { int i,n; int *table; ENTRANCE_SAVE(); entrance_level++; if (opts==NULL) opts=clique_default_options; ASSERT((sizeof(setelement)*8)==ELEMENTSIZE); ASSERT(g!=NULL); ASSERT(min_weight>=0); ASSERT(max_weight>=0); ASSERT((max_weight==0) || (min_weight <= max_weight)); ASSERT(!((min_weight==0) && (max_weight>0))); ASSERT((opts->reorder_function==NULL) || (opts->reorder_map==NULL)); if ((max_weight>0) && (min_weight>max_weight)) { /* state was not changed */ entrance_level--; return 0; } if (clocks_per_sec==0) clocks_per_sec=sysconf(_SC_CLK_TCK); ASSERT(clocks_per_sec>0); if (!graph_weighted(g)) { min_weight=DIV_UP(min_weight,g->weights[0]); if (max_weight) { max_weight=DIV_DOWN(max_weight,g->weights[0]); if (max_weight < min_weight) { /* state was not changed */ entrance_level--; return 0; } } weight_multiplier = g->weights[0]; entrance_level--; i=clique_unweighted_find_all(g,min_weight,max_weight,maximal, opts); ENTRANCE_RESTORE(); return i; } /* Dynamic allocation */ current_clique=set_new(g->n); best_clique=set_new(g->n); clique_size=malloc(g->n * sizeof(int)); memset(clique_size, 0, g->n * sizeof(int)); /* table allocated later */ temp_list=malloc((g->n+2)*sizeof(int *)); temp_count=0; /* "start clock" */ gettimeofday(&realtimer,NULL); times(&cputimer); /* reorder */ if (opts->reorder_function) { table=opts->reorder_function(g,TRUE); } else if (opts->reorder_map) { table=reorder_duplicate(opts->reorder_map,g->n); } else { table=reorder_ident(g->n); } ASSERT(reorder_is_bijection(table,g->n)); /* First phase */ n=weighted_clique_search_single(table,min_weight,INT_MAX,g,opts); if (n==0) { /* Requested clique has not been found. */ goto cleanreturn; } if (min_weight==0) { min_weight=n; max_weight=n; maximal=FALSE; /* They're maximum cliques already. */ } if (max_weight==0) max_weight=INT_MAX; for (i=0; i < g->n; i++) if ((clique_size[table[i]] >= min_weight) || (clique_size[table[i]] == 0)) break; /* Second phase */ n=weighted_clique_search_all(table,i,min_weight,max_weight,maximal, g,opts); cleanreturn: /* Free resources */ for (i=0; i < temp_count; i++) free(temp_list[i]); free(temp_list); free(table); set_free(current_clique); set_free(best_clique); free(clique_size); ENTRANCE_RESTORE(); entrance_level--; return n; }
/* * clique_find_single() * * Returns a clique with weight at least min_weight and at most max_weight. * * g - the graph * min_weight - minimum weight of clique to search for. If min_weight==0, * searches for a maximum weight clique. * max_weight - maximum weight of clique to search for. If max_weight==0, * no upper limit is used. If min_weight==0, max_weight must * also be 0. * maximal - require returned clique to be maximal * opts - time printing options * * Returns the set of vertices forming the clique, or NULL if a clique * of requested weight/maximality does not exist in the graph (or if * opts->time_function() requests abort). * * The returned clique is newly allocated and can be freed by set_free(). * * Note: Does NOT use opts->user_function() or opts->clique_list[]. * Note: Automatically uses clique_unweighted_find_single if all vertex * weights are the same. */ set_t clique_find_single(graph_t *g,int min_weight,int max_weight, boolean maximal, clique_options *opts) { int i; int *table; set_t s; ENTRANCE_SAVE(); entrance_level++; if (opts==NULL) opts=clique_default_options; ASSERT((sizeof(setelement)*8)==ELEMENTSIZE); ASSERT(g!=NULL); ASSERT(min_weight>=0); ASSERT(max_weight>=0); ASSERT((max_weight==0) || (min_weight <= max_weight)); ASSERT(!((min_weight==0) && (max_weight>0))); ASSERT((opts->reorder_function==NULL) || (opts->reorder_map==NULL)); if ((max_weight>0) && (min_weight>max_weight)) { /* state was not changed */ entrance_level--; return NULL; } if (clocks_per_sec==0) clocks_per_sec=sysconf(_SC_CLK_TCK); ASSERT(clocks_per_sec>0); /* Check whether we can use unweighted routines. */ if (!graph_weighted(g)) { min_weight=DIV_UP(min_weight,g->weights[0]); if (max_weight) { max_weight=DIV_DOWN(max_weight,g->weights[0]); if (max_weight < min_weight) { /* state was not changed */ entrance_level--; return NULL; } } weight_multiplier = g->weights[0]; entrance_level--; s=clique_unweighted_find_single(g,min_weight,max_weight, maximal,opts); ENTRANCE_RESTORE(); return s; } /* Dynamic allocation */ current_clique=set_new(g->n); best_clique=set_new(g->n); clique_size=malloc(g->n * sizeof(int)); memset(clique_size, 0, g->n * sizeof(int)); /* table allocated later */ temp_list=malloc((g->n+2)*sizeof(int *)); temp_count=0; clique_list_count=0; /* "start clock" */ gettimeofday(&realtimer,NULL); times(&cputimer); /* reorder */ if (opts->reorder_function) { table=opts->reorder_function(g,TRUE); } else if (opts->reorder_map) { table=reorder_duplicate(opts->reorder_map,g->n); } else { table=reorder_ident(g->n); } ASSERT(reorder_is_bijection(table,g->n)); if (max_weight==0) max_weight=INT_MAX; if (weighted_clique_search_single(table,min_weight,max_weight, g,opts)==0) { /* Requested clique has not been found. */ set_free(best_clique); best_clique=NULL; goto cleanreturn; } if (maximal && (min_weight>0)) { maximalize_clique(best_clique,g); if (graph_subgraph_weight(g,best_clique) > max_weight) { clique_options localopts; localopts.time_function = opts->time_function; localopts.output = opts->output; localopts.user_function = false_function; localopts.clique_list = &best_clique; localopts.clique_list_length = 1; for (i=0; i < g->n-1; i++) if ((clique_size[table[i]] >= min_weight) || (clique_size[table[i]] == 0)) break; if (!weighted_clique_search_all(table,i,min_weight, max_weight,maximal, g,&localopts)) { set_free(best_clique); best_clique=NULL; } } } cleanreturn: s=best_clique; /* Free resources */ for (i=0; i < temp_count; i++) free(temp_list[i]); free(temp_list); temp_list=NULL; temp_count=0; free(table); set_free(current_clique); current_clique=NULL; free(clique_size); clique_size=NULL; ENTRANCE_RESTORE(); entrance_level--; return s; }
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch, int pixel_size) { CUDAScaleContext *s = ctx->priv; CudaFunctions *cu = s->hwctx->internal->cuda_dl; CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr; CUtexObject tex = 0; void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height }; int ret; CUDA_TEXTURE_DESC tex_desc = { .filterMode = CU_TR_FILTER_MODE_LINEAR, .flags = CU_TRSF_READ_AS_INTEGER, }; CUDA_RESOURCE_DESC res_desc = { .resType = CU_RESOURCE_TYPE_PITCH2D, .res.pitch2D.format = pixel_size == 1 ? CU_AD_FORMAT_UNSIGNED_INT8 : CU_AD_FORMAT_UNSIGNED_INT16, .res.pitch2D.numChannels = channels, .res.pitch2D.width = src_width, .res.pitch2D.height = src_height, .res.pitch2D.pitchInBytes = src_pitch * pixel_size, .res.pitch2D.devPtr = (CUdeviceptr)src_dptr, }; ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL)); if (ret < 0) goto exit; ret = CHECK_CU(cu->cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL)); exit: if (tex) CHECK_CU(cu->cuTexObjectDestroy(tex)); return ret; } static int scalecuda_resize(AVFilterContext *ctx, AVFrame *out, AVFrame *in) { AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data; CUDAScaleContext *s = ctx->priv; switch (in_frames_ctx->sw_format) { case AV_PIX_FMT_YUV420P: call_resize_kernel(ctx, s->cu_func_uchar, 1, in->data[0], in->width, in->height, in->linesize[0], out->data[0], out->width, out->height, out->linesize[0], 1); call_resize_kernel(ctx, s->cu_func_uchar, 1, in->data[1], in->width/2, in->height/2, in->linesize[0]/2, out->data[1], out->width/2, out->height/2, out->linesize[0]/2, 1); call_resize_kernel(ctx, s->cu_func_uchar, 1, in->data[2], in->width/2, in->height/2, in->linesize[0]/2, out->data[2], out->width/2, out->height/2, out->linesize[0]/2, 1); break; case AV_PIX_FMT_YUV444P: call_resize_kernel(ctx, s->cu_func_uchar, 1, in->data[0], in->width, in->height, in->linesize[0], out->data[0], out->width, out->height, out->linesize[0], 1); call_resize_kernel(ctx, s->cu_func_uchar, 1, in->data[1], in->width, in->height, in->linesize[0], out->data[1], out->width, out->height, out->linesize[0], 1); call_resize_kernel(ctx, s->cu_func_uchar, 1, in->data[2], in->width, in->height, in->linesize[0], out->data[2], out->width, out->height, out->linesize[0], 1); break; case AV_PIX_FMT_YUV444P16: call_resize_kernel(ctx, s->cu_func_ushort, 1, in->data[0], in->width, in->height, in->linesize[0] / 2, out->data[0], out->width, out->height, out->linesize[0] / 2, 2); call_resize_kernel(ctx, s->cu_func_ushort, 1, in->data[1], in->width, in->height, in->linesize[1] / 2, out->data[1], out->width, out->height, out->linesize[1] / 2, 2); call_resize_kernel(ctx, s->cu_func_ushort, 1, in->data[2], in->width, in->height, in->linesize[2] / 2, out->data[2], out->width, out->height, out->linesize[2] / 2, 2); break; case AV_PIX_FMT_NV12: call_resize_kernel(ctx, s->cu_func_uchar, 1, in->data[0], in->width, in->height, in->linesize[0], out->data[0], out->width, out->height, out->linesize[0], 1); call_resize_kernel(ctx, s->cu_func_uchar2, 2, in->data[1], in->width/2, in->height/2, in->linesize[1], out->data[1], out->width/2, out->height/2, out->linesize[1]/2, 1); break; case AV_PIX_FMT_P010LE: call_resize_kernel(ctx, s->cu_func_ushort, 1, in->data[0], in->width, in->height, in->linesize[0]/2, out->data[0], out->width, out->height, out->linesize[0]/2, 2); call_resize_kernel(ctx, s->cu_func_ushort2, 2, in->data[1], in->width / 2, in->height / 2, in->linesize[1]/2, out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4, 2); break; case AV_PIX_FMT_P016LE: call_resize_kernel(ctx, s->cu_func_ushort, 1, in->data[0], in->width, in->height, in->linesize[0] / 2, out->data[0], out->width, out->height, out->linesize[0] / 2, 2); call_resize_kernel(ctx, s->cu_func_ushort2, 2, in->data[1], in->width / 2, in->height / 2, in->linesize[1] / 2, out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4, 2); break; default: return AVERROR_BUG; } return 0; }