Exemplo n.º 1
0
static int call_resize_kernel(CUDAScaleContext *ctx, CUfunction func, CUtexref tex, int channels,
                              uint8_t *src_dptr, int src_width, int src_height, int src_pitch,
                              uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch,
                              int pixel_size)
{
    CUdeviceptr src_devptr = (CUdeviceptr)src_dptr;
    CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr;
    void *args_uchar[] = { &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height };
    CUDA_ARRAY_DESCRIPTOR desc;

    desc.Width  = src_width;
    desc.Height = src_height;
    desc.NumChannels = channels;
    if (pixel_size == 1) {
        desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
    } else {
        desc.Format = CU_AD_FORMAT_UNSIGNED_INT16;
    }

    CHECK_CU(cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch * pixel_size));
    CHECK_CU(cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
                            BLOCKX, BLOCKY, 1, 0, 0, args_uchar, NULL));

    return 0;
}
Exemplo n.º 2
0
static int xfs_quota_set(quota_t *myquota) {
   fs_disk_quota_t sysquota;
   int retval;
   int block_diff= BLOCK_SIZE / 512;

   memset(&sysquota, 0, sizeof(fs_disk_quota_t));
   /* copy our data into the linux dqblk */
   sysquota.d_blk_hardlimit = myquota->block_hard * block_diff;
   sysquota.d_blk_softlimit = myquota->block_soft * block_diff;
 // XFS really uses blocks, all other formats in this file use bytes
   sysquota.d_bcount	    = DIV_UP(myquota->diskspace_used * block_diff, 1024);
   sysquota.d_ino_hardlimit = myquota->inode_hard;
   sysquota.d_ino_softlimit = myquota->inode_soft;
   sysquota.d_icount        = myquota->inode_used;
/* For XFS, global grace time limits are set by the values set for root */
   sysquota.d_btimer        = myquota->block_grace;
   sysquota.d_itimer        = myquota->inode_grace;
   sysquota.d_fieldmask	    = FS_DQ_LIMIT_MASK;
   if (myquota->_do_set_global_block_gracetime || myquota->_do_set_global_inode_gracetime)
      sysquota.d_fieldmask |= FS_DQ_TIMER_MASK;

   retval = quotactl(QCMD(Q_XSETQLIM,myquota->_id_type), myquota->_qfile,
		     myquota->_id, (caddr_t) &sysquota);
   if (retval < 0) {
      output_error ("Failed setting quota (xfs): %s", strerror(errno));
      return(0);
   }

   /* success */
   return 1;
}
Exemplo n.º 3
0
__init void module_load() {
    kprintf("module - loading modules", module_count);

    for(uint32_t i = 0; i < module_count; i++) {
        kprintf("module - #%u loaded", i + 1);
        uint32_t num_pages = DIV_UP(modules[i].end - modules[i].start, PAGE_SIZE);
        void *virt = map_pages(modules[i].start, num_pages);
        rootramfs_load(virt, modules[i].end - modules[i].start);
        //TODO unmap pages
    }

    uint32_t freed_pages = 0;

    for(uint32_t i = 0; i < module_count; i++) {
        uint32_t first_page = DIV_UP(modules[i].start, PAGE_SIZE);
        uint32_t last_page = DIV_DOWN(modules[i].end, PAGE_SIZE);
        claim_pages(first_page, last_page - first_page);
    }

    kprintf("module - %u pages reclaimed", freed_pages);
}
Exemplo n.º 4
0
static void plot_string(struct plot_info *pi, struct plot_data *entry, struct membuffer *b, bool has_ndl)
{
	int pressurevalue, mod, ead, end, eadd;
	const char *depth_unit, *pressure_unit, *temp_unit, *vertical_speed_unit;
	double depthvalue, tempvalue, speedvalue, sacvalue;
	int decimals;
	const char *unit;

	depthvalue = get_depth_units(entry->depth, NULL, &depth_unit);
	put_format(b, translate("gettextFromC", "@: %d:%02d\nD: %.1f%s\n"), FRACTION(entry->sec, 60), depthvalue, depth_unit);
	if (GET_PRESSURE(entry)) {
		pressurevalue = get_pressure_units(GET_PRESSURE(entry), &pressure_unit);
		put_format(b, translate("gettextFromC", "P: %d%s\n"), pressurevalue, pressure_unit);
	}
	if (entry->temperature) {
		tempvalue = get_temp_units(entry->temperature, &temp_unit);
		put_format(b, translate("gettextFromC", "T: %.1f%s\n"), tempvalue, temp_unit);
	}
	speedvalue = get_vertical_speed_units(abs(entry->speed), NULL, &vertical_speed_unit);
	/* Ascending speeds are positive, descending are negative */
	if (entry->speed > 0)
		speedvalue *= -1;
	put_format(b, translate("gettextFromC", "V: %.1f%s\n"), speedvalue, vertical_speed_unit);
	sacvalue = get_volume_units(entry->sac, &decimals, &unit);
	if (entry->sac && prefs.show_sac)
		put_format(b, translate("gettextFromC", "SAC: %.*f%s/min\n"), decimals, sacvalue, unit);
	if (entry->cns)
		put_format(b, translate("gettextFromC", "CNS: %u%%\n"), entry->cns);
	if (prefs.pp_graphs.po2)
		put_format(b, translate("gettextFromC", "pO%s: %.2fbar\n"), UTF8_SUBSCRIPT_2, entry->pressures.o2);
	if (prefs.pp_graphs.pn2)
		put_format(b, translate("gettextFromC", "pN%s: %.2fbar\n"), UTF8_SUBSCRIPT_2, entry->pressures.n2);
	if (prefs.pp_graphs.phe)
		put_format(b, translate("gettextFromC", "pHe: %.2fbar\n"), entry->pressures.he);
	if (prefs.mod) {
		mod = (int)get_depth_units(entry->mod, NULL, &depth_unit);
		put_format(b, translate("gettextFromC", "MOD: %d%s\n"), mod, depth_unit);
	}
	eadd = (int)get_depth_units(entry->eadd, NULL, &depth_unit);
	if (prefs.ead) {
		switch (pi->dive_type) {
		case NITROX:
			ead = (int)get_depth_units(entry->ead, NULL, &depth_unit);
			put_format(b, translate("gettextFromC", "EAD: %d%s\nEADD: %d%s\n"), ead, depth_unit, eadd, depth_unit);
			break;
		case TRIMIX:
			end = (int)get_depth_units(entry->end, NULL, &depth_unit);
			put_format(b, translate("gettextFromC", "END: %d%s\nEADD: %d%s\n"), end, depth_unit, eadd, depth_unit);
			break;
		case AIR:
		case FREEDIVING:
			/* nothing */
			break;
		}
	}
	if (entry->stopdepth) {
		depthvalue = get_depth_units(entry->stopdepth, NULL, &depth_unit);
		if (entry->ndl) {
			/* this is a safety stop as we still have ndl */
			if (entry->stoptime)
				put_format(b, translate("gettextFromC", "Safetystop: %umin @ %.0f%s\n"), DIV_UP(entry->stoptime, 60),
					   depthvalue, depth_unit);
			else
				put_format(b, translate("gettextFromC", "Safetystop: unkn time @ %.0f%s\n"),
					   depthvalue, depth_unit);
		} else {
			/* actual deco stop */
			if (entry->stoptime)
				put_format(b, translate("gettextFromC", "Deco: %umin @ %.0f%s\n"), DIV_UP(entry->stoptime, 60),
					   depthvalue, depth_unit);
			else
				put_format(b, translate("gettextFromC", "Deco: unkn time @ %.0f%s\n"),
					   depthvalue, depth_unit);
		}
	} else if (entry->in_deco) {
		put_string(b, translate("gettextFromC", "In deco\n"));
	} else if (has_ndl) {
		put_format(b, translate("gettextFromC", "NDL: %umin\n"), DIV_UP(entry->ndl, 60));
	}
	if (entry->tts)
		put_format(b, translate("gettextFromC", "TTS: %umin\n"), DIV_UP(entry->tts, 60));
	if (entry->stopdepth_calc && entry->stoptime_calc) {
		depthvalue = get_depth_units(entry->stopdepth_calc, NULL, &depth_unit);
		put_format(b, translate("gettextFromC", "Deco: %umin @ %.0f%s (calc)\n"), DIV_UP(entry->stoptime_calc, 60),
			   depthvalue, depth_unit);
	} else if (entry->in_deco_calc) {
		/* This means that we have no NDL left,
		 * and we have no deco stop,
		 * so if we just accend to the surface slowly
		 * (ascent_mm_per_step / ascent_s_per_step)
		 * everything will be ok. */
		put_string(b, translate("gettextFromC", "In deco (calc)\n"));
	} else if (prefs.calcndltts && entry->ndl_calc != 0) {
		if(entry->ndl_calc < MAX_PROFILE_DECO)
			put_format(b, translate("gettextFromC", "NDL: %umin (calc)\n"), DIV_UP(entry->ndl_calc, 60));
		else
			put_format(b, "%s", translate("gettextFromC", "NDL: >2h (calc)\n"));
	}
	if (entry->tts_calc) {
		if (entry->tts_calc < MAX_PROFILE_DECO)
			put_format(b, translate("gettextFromC", "TTS: %umin (calc)\n"), DIV_UP(entry->tts_calc, 60));
		else
			put_format(b, "%s", translate("gettextFromC", "TTS: >2h (calc)\n"));
	}
	if (entry->rbt)
		put_format(b, translate("gettextFromC", "RBT: %umin\n"), DIV_UP(entry->rbt, 60));
	if (entry->ceiling) {
		depthvalue = get_depth_units(entry->ceiling, NULL, &depth_unit);
		put_format(b, translate("gettextFromC", "Calculated ceiling %.0f%s\n"), depthvalue, depth_unit);
		if (prefs.calcalltissues) {
			int k;
			for (k = 0; k < 16; k++) {
				if (entry->ceilings[k]) {
					depthvalue = get_depth_units(entry->ceilings[k], NULL, &depth_unit);
					put_format(b, translate("gettextFromC", "Tissue %.0fmin: %.1f%s\n"), buehlmann_N2_t_halflife[k], depthvalue, depth_unit);
				}
			}
		}
	}
	if (entry->heartbeat && prefs.hrgraph)
		put_format(b, translate("gettextFromC", "heartbeat: %d\n"), entry->heartbeat);
	if (entry->bearing)
		put_format(b, translate("gettextFromC", "bearing: %d\n"), entry->bearing);
	if (entry->running_sum) {
		depthvalue = get_depth_units(entry->running_sum / entry->sec, NULL, &depth_unit);
		put_format(b, translate("gettextFromC", "mean depth to here %.1f%s\n"), depthvalue, depth_unit);
	}

	strip_mb(b);
}
int main(int argc, char **argv)
{
    // Start logs
    printf("[%s] - Starting...\n", argv[0]);

    //'h_' prefix - CPU (host) memory space
    float
    //Results calculated by CPU for reference
    *h_CallResultCPU,
    *h_PutResultCPU,
    //CPU copy of GPU results
    *h_CallResultGPU,
    *h_PutResultGPU,
    //CPU instance of input data
    *h_StockPrice,
    *h_OptionStrike,
    *h_OptionYears;

    //'d_' prefix - GPU (device) memory space
    CUdeviceptr
    //Results calculated by GPU
    d_CallResult,
    d_PutResult,

    //GPU instance of input data
    d_StockPrice,
    d_OptionStrike,
    d_OptionYears;

    double
    delta, ref, sum_delta, sum_ref, max_delta, L1norm, gpuTime;

    StopWatchInterface *hTimer = NULL;
    int i;

    sdkCreateTimer(&hTimer);

    printf("Initializing data...\n");
    printf("...allocating CPU memory for options.\n");

    h_CallResultCPU = (float *)malloc(OPT_SZ);
    h_PutResultCPU  = (float *)malloc(OPT_SZ);
    h_CallResultGPU = (float *)malloc(OPT_SZ);
    h_PutResultGPU  = (float *)malloc(OPT_SZ);
    h_StockPrice    = (float *)malloc(OPT_SZ);
    h_OptionStrike  = (float *)malloc(OPT_SZ);
    h_OptionYears   = (float *)malloc(OPT_SZ);


    char *ptx, *kernel_file;
    size_t ptxSize;
    kernel_file = sdkFindFilePath("BlackScholes_kernel.cuh", argv[0]);

    // Set a Compiler Option to have maximum register to be used by each thread.
    char *compile_options[1];
    compile_options[0] = (char *) malloc(sizeof(char)*(strlen("--maxrregcount=16")));
    strcpy((char *)compile_options[0],"--maxrregcount=16");

    // Compile the kernel BlackScholes_kernel.
    compileFileToPTX(kernel_file, 1, (const char **)compile_options, &ptx, &ptxSize);
    CUmodule module = loadPTX(ptx, argc, argv);

    CUfunction kernel_addr;
    checkCudaErrors(cuModuleGetFunction(&kernel_addr, module, "BlackScholesGPU"));

    printf("...allocating GPU memory for options.\n");
    checkCudaErrors(cuMemAlloc(&d_CallResult, OPT_SZ));
    checkCudaErrors(cuMemAlloc(&d_PutResult, OPT_SZ));
    checkCudaErrors(cuMemAlloc(&d_StockPrice, OPT_SZ));
    checkCudaErrors(cuMemAlloc(&d_OptionStrike,OPT_SZ));
    checkCudaErrors(cuMemAlloc(&d_OptionYears, OPT_SZ));

    printf("...generating input data in CPU mem.\n");
    srand(5347);

    //Generate options set
    for (i = 0; i < OPT_N; i++)
    {
        h_CallResultCPU[i] = 0.0f;
        h_PutResultCPU[i]  = -1.0f;
        h_StockPrice[i]    = RandFloat(5.0f, 30.0f);
        h_OptionStrike[i]  = RandFloat(1.0f, 100.0f);
        h_OptionYears[i]   = RandFloat(0.25f, 10.0f);
    }

    printf("...copying input data to GPU mem.\n");
    //Copy options data to GPU memory for further processing
    checkCudaErrors(cuMemcpyHtoD(d_StockPrice, h_StockPrice, OPT_SZ));
    checkCudaErrors(cuMemcpyHtoD(d_OptionStrike, h_OptionStrike, OPT_SZ));
    checkCudaErrors(cuMemcpyHtoD(d_OptionYears, h_OptionYears, OPT_SZ));

    printf("Data init done.\n\n");
    printf("Executing Black-Scholes GPU kernel (%i iterations)...\n", NUM_ITERATIONS);

    sdkResetTimer(&hTimer);
    sdkStartTimer(&hTimer);

    dim3 cudaBlockSize( 128, 1, 1);
    dim3 cudaGridSize(DIV_UP(OPT_N/2, 128),1,1);

    float risk = RISKFREE;
    float volatility = VOLATILITY;
    int optval = OPT_N;

    void *arr[] = { (void *)&d_CallResult, (void *)&d_PutResult, (void *)&d_StockPrice,
        (void *)&d_OptionStrike, (void *)&d_OptionYears, (void *)&risk, (void *)&volatility, (void *)&optval };

    for (i = 0; i < NUM_ITERATIONS; i++)
    {

        checkCudaErrors(cuLaunchKernel(kernel_addr,
                                            cudaGridSize.x, cudaGridSize.y, cudaGridSize.z, /* grid dim */
                                            cudaBlockSize.x, cudaBlockSize.y, cudaBlockSize.z, /* block dim */
                                            0,0, /* shared mem, stream */
                                            &arr[0], /* arguments */
                                            0));

    }

    checkCudaErrors(cuCtxSynchronize());

    sdkStopTimer(&hTimer);
    gpuTime = sdkGetTimerValue(&hTimer) / NUM_ITERATIONS;

    //Both call and put is calculated
    printf("Options count             : %i     \n", 2 * OPT_N);
    printf("BlackScholesGPU() time    : %f msec\n", gpuTime);
    printf("Effective memory bandwidth: %f GB/s\n", ((double)(5 * OPT_N * sizeof(float)) * 1E-9) / (gpuTime * 1E-3));
    printf("Gigaoptions per second    : %f     \n\n", ((double)(2 * OPT_N) * 1E-9) / (gpuTime * 1E-3));
    printf("BlackScholes, Throughput = %.4f GOptions/s, Time = %.5f s, Size = %u options, NumDevsUsed = %u, Workgroup = %u\n",
           (((double)(2.0 * OPT_N) * 1.0E-9) / (gpuTime * 1.0E-3)), gpuTime*1e-3, (2 * OPT_N), 1, 128);

    printf("\nReading back GPU results...\n");

    //Read back GPU results to compare them to CPU results
    checkCudaErrors(cuMemcpyDtoH(h_CallResultGPU, d_CallResult, OPT_SZ));
    checkCudaErrors(cuMemcpyDtoH(h_PutResultGPU, d_PutResult, OPT_SZ));

    printf("Checking the results...\n");
    printf("...running CPU calculations.\n\n");

    //Calculate options values on CPU
    BlackScholesCPU(
        h_CallResultCPU,
        h_PutResultCPU,
        h_StockPrice,
        h_OptionStrike,
        h_OptionYears,
        RISKFREE,
        VOLATILITY,
        OPT_N
    );

    printf("Comparing the results...\n");
    //Calculate max absolute difference and L1 distance
    //between CPU and GPU results
    sum_delta = 0;
    sum_ref   = 0;
    max_delta = 0;

    for (i = 0; i < OPT_N; i++)
    {
        ref   = h_CallResultCPU[i];
        delta = fabs(h_CallResultCPU[i] - h_CallResultGPU[i]);

        if (delta > max_delta)
        {
            max_delta = delta;
        }

        sum_delta += delta;
        sum_ref   += fabs(ref);
    }

    L1norm = sum_delta / sum_ref;
    printf("L1 norm: %E\n", L1norm);
    printf("Max absolute error: %E\n\n", max_delta);

    printf("Shutting down...\n");
    printf("...releasing GPU memory.\n");

    checkCudaErrors(cuMemFree(d_OptionYears));
    checkCudaErrors(cuMemFree(d_OptionStrike));
    checkCudaErrors(cuMemFree(d_StockPrice));
    checkCudaErrors(cuMemFree(d_PutResult));
    checkCudaErrors(cuMemFree(d_CallResult));

    printf("...releasing CPU memory.\n");

    free(h_OptionYears);
    free(h_OptionStrike);
    free(h_StockPrice);
    free(h_PutResultGPU);
    free(h_CallResultGPU);
    free(h_PutResultCPU);
    free(h_CallResultCPU);

    sdkDeleteTimer(&hTimer);
    printf("Shutdown done.\n");

    printf("\n[%s] - Test Summary\n", argv[0]);

    cuProfilerStop();

    if (L1norm > 1e-6)
    {
        printf("Test failed!\n");
        exit(EXIT_FAILURE);
    }

    printf("Test passed\n");
    exit(EXIT_SUCCESS);
}
Exemplo n.º 6
0
int main (int argc, char **argv) {
    u_int64_t old_quota;
    int id;
    time_t old_grace;
    argdata_t *argdata;
    quota_t *quota;
    char* tmpstr;



    /* parse commandline and fill argdata */
    argdata = parse_commandline (argc, argv);
    if ( ! argdata ) {
        exit (ERR_PARSE);
    }


    /* initialize the id to use */
    if ( ! argdata->id ) {
        id = 0;
    }
    /* numerical uid starting with ':', don't check uid/gid against system users/groups */
    else if ( strlen(argdata->id) > 1 && argdata->id[0] == ':' && isdigit(argdata->id[1]) ) {
        argdata->id++; // skip leading ':'
        id = strtol(argdata->id, &tmpstr, 10);
    }
    else if ( argdata->id_type == QUOTA_USER ) {
        id = (int) system_getuid (argdata->id);
    }
    else {
        id = (int) system_getgid (argdata->id);
    }
    if ( id < 0 ) {
        exit (ERR_ARG);
    }


    /* get the quota info */
    quota = quota_new (argdata->id_type, id, argdata->qfile);
    if ( ! quota ) {
        exit (ERR_SYS);
    }

    if ( ! quota_get(quota) ) {
        exit (ERR_SYS);
    }

// FIXME: remote debug
//output_info("BLOCKS_TO_KB(quota->block_soft): %llu\n", BLOCKS_TO_KB(quota->block_soft));
//output_info("DIV_UP(quota->block_soft, 1024): %llu\n", DIV_UP(quota->block_soft, 1024));
//output_info("DEBUG: quota->block_soft: %llu\n", quota->block_soft);

    if (argdata->dump_info) {
        time_t now = time(NULL);
        u_int64_t display_blocks_used = 0;

        output_info ("");
        output_info ("%s Filesystem blocks quota limit grace files quota limit grace",
                     argdata->id_type == QUOTA_USER ? "uid" : "gid");

        // quota->diskspace_used is bytes. Display in Kb
        display_blocks_used = DIV_UP(quota->diskspace_used, 1024);

#ifdef HAVE_INTTYPES_H
        printf("%d %s %" PRIu64 " %" PRIu64 " %" PRIu64 " %lu %" PRIu64 " %" PRIu64 " %" PRIu64 " %lu\n",
#else
        printf("%d %s %llu %llu %llu %lu %llu %llu %llu %lu\n",
#endif
               id,
               argdata->qfile,
               display_blocks_used,
               BLOCKS_TO_KB(quota->block_soft),
               BLOCKS_TO_KB(quota->block_hard),
#if ANY_BSD || PLATFORM_DARWIN
               (unsigned long)
               ((
                    (quota->block_soft && (BYTES_TO_BLOCKS(quota->diskspace_used) >= quota->block_soft))
                    ||
                    (quota->block_hard && (BYTES_TO_BLOCKS(quota->diskspace_used) >= quota->block_hard))
                ) ? quota->block_time - now : 0),
#else
               (unsigned long) quota->block_time ? quota->block_time - now : 0,
#endif /* ANY_BSD */
               quota->inode_used,
               quota->inode_soft,
               quota->inode_hard,
#if ANY_BSD || PLATFORM_DARWIN
               (unsigned long)
               ((
                    (quota->inode_soft && (quota->inode_used >= quota->inode_soft))
                    ||
                    (quota->inode_hard && (quota->inode_used >= quota->inode_hard))
                ) ? quota->inode_time - now : 0));

#else
               (unsigned long) quota->inode_time ? quota->inode_time - now : 0);
#endif /* ANY_BSD */
        exit(0);
    }
Exemplo n.º 7
0
/*
 * clique_find_all()
 *
 * Find all cliques with weight at least min_weight and at most max_weight.
 *
 *   g          - the graph
 *   min_weight - minimum weight of cliques to search for.  If min_weight==0,
 *                searches for maximum weight cliques.
 *   max_weight - maximum weight of cliques to search for.  If max_weight==0,
 *                no upper limit is used.  If min_weight==0, max_weight must
 *                also be 0.
 *   maximal    - require cliques to be maximal cliques
 *   opts       - time printing and clique storage options
 *
 * Returns the number of cliques found.  This can be less than the number
 * of cliques in the graph iff opts->time_function() or opts->user_function()
 * returns FALSE (request abort).
 *
 * The cliques found are stored in opts->clique_list[] and
 * opts->user_function() is called with them (if non-NULL).  The cliques
 * stored in opts->clique_list[] are newly allocated, and can be freed
 * by set_free().
 *
 * Note: Automatically uses clique_unweighted_find_all if all vertex
 *       weights are the same.
 */
int clique_find_all(graph_t *g, int min_weight, int max_weight,
		    boolean maximal, clique_options *opts) {
	int i,n;
	int *table;

	ENTRANCE_SAVE();
	entrance_level++;

	if (opts==NULL)
		opts=clique_default_options;

	ASSERT((sizeof(setelement)*8)==ELEMENTSIZE);
	ASSERT(g!=NULL);
	ASSERT(min_weight>=0);
	ASSERT(max_weight>=0);
	ASSERT((max_weight==0) || (min_weight <= max_weight));
	ASSERT(!((min_weight==0) && (max_weight>0)));
	ASSERT((opts->reorder_function==NULL) || (opts->reorder_map==NULL));

	if ((max_weight>0) && (min_weight>max_weight)) {
		/* state was not changed */
		entrance_level--;
		return 0;
	}

	if (clocks_per_sec==0)
		clocks_per_sec=sysconf(_SC_CLK_TCK);
	ASSERT(clocks_per_sec>0);

	if (!graph_weighted(g)) {
		min_weight=DIV_UP(min_weight,g->weights[0]);
		if (max_weight) {
			max_weight=DIV_DOWN(max_weight,g->weights[0]);
			if (max_weight < min_weight) {
				/* state was not changed */
				entrance_level--;
				return 0;
			}
		}
		
		weight_multiplier = g->weights[0];
		entrance_level--;
		i=clique_unweighted_find_all(g,min_weight,max_weight,maximal,
					     opts);
		ENTRANCE_RESTORE();
		return i;
	}

	/* Dynamic allocation */
	current_clique=set_new(g->n);
	best_clique=set_new(g->n);
	clique_size=malloc(g->n * sizeof(int));
	memset(clique_size, 0, g->n * sizeof(int));
	/* table allocated later */
	temp_list=malloc((g->n+2)*sizeof(int *));
	temp_count=0;

	/* "start clock" */
	gettimeofday(&realtimer,NULL);
	times(&cputimer);

	/* reorder */
	if (opts->reorder_function) {
		table=opts->reorder_function(g,TRUE);
	} else if (opts->reorder_map) {
		table=reorder_duplicate(opts->reorder_map,g->n);
	} else {
		table=reorder_ident(g->n);
	}
	ASSERT(reorder_is_bijection(table,g->n));

	/* First phase */
	n=weighted_clique_search_single(table,min_weight,INT_MAX,g,opts);
	if (n==0) {
		/* Requested clique has not been found. */
		goto cleanreturn;
	}

	if (min_weight==0) {
		min_weight=n;
		max_weight=n;
		maximal=FALSE;  /* They're maximum cliques already. */
	}
	if (max_weight==0)
		max_weight=INT_MAX;

	for (i=0; i < g->n; i++)
		if ((clique_size[table[i]] >= min_weight) ||
		    (clique_size[table[i]] == 0))
			break;

	/* Second phase */
	n=weighted_clique_search_all(table,i,min_weight,max_weight,maximal,
				     g,opts);

      cleanreturn:
	/* Free resources */
	for (i=0; i < temp_count; i++)
		free(temp_list[i]);
	free(temp_list);
	free(table);
	set_free(current_clique);
	set_free(best_clique);
	free(clique_size);

	ENTRANCE_RESTORE();
	entrance_level--;

	return n;
}
Exemplo n.º 8
0
/*
 * clique_find_single()
 *
 * Returns a clique with weight at least min_weight and at most max_weight.
 *
 *   g          - the graph
 *   min_weight - minimum weight of clique to search for.  If min_weight==0,
 *                searches for a maximum weight clique.
 *   max_weight - maximum weight of clique to search for.  If max_weight==0,
 *                no upper limit is used.  If min_weight==0, max_weight must
 *                also be 0.
 *   maximal    - require returned clique to be maximal
 *   opts       - time printing options
 *
 * Returns the set of vertices forming the clique, or NULL if a clique
 * of requested weight/maximality does not exist in the graph  (or if
 * opts->time_function() requests abort).
 *
 * The returned clique is newly allocated and can be freed by set_free().
 *
 * Note: Does NOT use opts->user_function() or opts->clique_list[].
 * Note: Automatically uses clique_unweighted_find_single if all vertex
 *       weights are the same.
 */
set_t clique_find_single(graph_t *g,int min_weight,int max_weight,
			 boolean maximal, clique_options *opts) {
	int i;
	int *table;
	set_t s;

	ENTRANCE_SAVE();
	entrance_level++;

	if (opts==NULL)
		opts=clique_default_options;

	ASSERT((sizeof(setelement)*8)==ELEMENTSIZE);
	ASSERT(g!=NULL);
	ASSERT(min_weight>=0);
	ASSERT(max_weight>=0);
	ASSERT((max_weight==0) || (min_weight <= max_weight));
	ASSERT(!((min_weight==0) && (max_weight>0)));
	ASSERT((opts->reorder_function==NULL) || (opts->reorder_map==NULL));

	if ((max_weight>0) && (min_weight>max_weight)) {
		/* state was not changed */
		entrance_level--;
		return NULL;
	}

	if (clocks_per_sec==0)
		clocks_per_sec=sysconf(_SC_CLK_TCK);
	ASSERT(clocks_per_sec>0);

	/* Check whether we can use unweighted routines. */
	if (!graph_weighted(g)) {
		min_weight=DIV_UP(min_weight,g->weights[0]);
		if (max_weight) {
			max_weight=DIV_DOWN(max_weight,g->weights[0]);
			if (max_weight < min_weight) {
				/* state was not changed */
				entrance_level--;
				return NULL;
			}
		}

		weight_multiplier = g->weights[0];
		entrance_level--;
		s=clique_unweighted_find_single(g,min_weight,max_weight,
						maximal,opts);
		ENTRANCE_RESTORE();
		return s;
	}

	/* Dynamic allocation */
	current_clique=set_new(g->n);
	best_clique=set_new(g->n);
	clique_size=malloc(g->n * sizeof(int));
	memset(clique_size, 0, g->n * sizeof(int));
	/* table allocated later */
	temp_list=malloc((g->n+2)*sizeof(int *));
	temp_count=0;

	clique_list_count=0;

	/* "start clock" */
	gettimeofday(&realtimer,NULL);
	times(&cputimer);

	/* reorder */
	if (opts->reorder_function) {
		table=opts->reorder_function(g,TRUE);
	} else if (opts->reorder_map) {
		table=reorder_duplicate(opts->reorder_map,g->n);
	} else {
		table=reorder_ident(g->n);
	}
	ASSERT(reorder_is_bijection(table,g->n));

	if (max_weight==0)
		max_weight=INT_MAX;

	if (weighted_clique_search_single(table,min_weight,max_weight,
					  g,opts)==0) {
		/* Requested clique has not been found. */
		set_free(best_clique);
		best_clique=NULL;
		goto cleanreturn;
	}
	if (maximal && (min_weight>0)) {
		maximalize_clique(best_clique,g);
		if (graph_subgraph_weight(g,best_clique) > max_weight) {
			clique_options localopts;

			localopts.time_function = opts->time_function;
			localopts.output = opts->output;
			localopts.user_function = false_function;
			localopts.clique_list = &best_clique;
			localopts.clique_list_length = 1;

			for (i=0; i < g->n-1; i++)
				if ((clique_size[table[i]] >= min_weight) ||
				    (clique_size[table[i]] == 0))
					break;
			if (!weighted_clique_search_all(table,i,min_weight,
							max_weight,maximal,
							g,&localopts)) {
				set_free(best_clique);
				best_clique=NULL;
			}
		}
	}

 cleanreturn:
	s=best_clique;

	/* Free resources */
	for (i=0; i < temp_count; i++)
		free(temp_list[i]);
	free(temp_list);
	temp_list=NULL;
	temp_count=0;
	free(table);
	set_free(current_clique);
	current_clique=NULL;
	free(clique_size);
	clique_size=NULL;

	ENTRANCE_RESTORE();
	entrance_level--;

	return s;
}
Exemplo n.º 9
0
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels,
                              uint8_t *src_dptr, int src_width, int src_height, int src_pitch,
                              uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch,
                              int pixel_size)
{
    CUDAScaleContext *s = ctx->priv;
    CudaFunctions *cu = s->hwctx->internal->cuda_dl;
    CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr;
    CUtexObject tex = 0;
    void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height };
    int ret;

    CUDA_TEXTURE_DESC tex_desc = {
        .filterMode = CU_TR_FILTER_MODE_LINEAR,
        .flags = CU_TRSF_READ_AS_INTEGER,
    };

    CUDA_RESOURCE_DESC res_desc = {
        .resType = CU_RESOURCE_TYPE_PITCH2D,
        .res.pitch2D.format = pixel_size == 1 ?
                              CU_AD_FORMAT_UNSIGNED_INT8 :
                              CU_AD_FORMAT_UNSIGNED_INT16,
        .res.pitch2D.numChannels = channels,
        .res.pitch2D.width = src_width,
        .res.pitch2D.height = src_height,
        .res.pitch2D.pitchInBytes = src_pitch * pixel_size,
        .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
    };

    ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL));
    if (ret < 0)
        goto exit;

    ret = CHECK_CU(cu->cuLaunchKernel(func,
                                      DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
                                      BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL));

exit:
    if (tex)
        CHECK_CU(cu->cuTexObjectDestroy(tex));

    return ret;
}

static int scalecuda_resize(AVFilterContext *ctx,
                            AVFrame *out, AVFrame *in)
{
    AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
    CUDAScaleContext *s = ctx->priv;

    switch (in_frames_ctx->sw_format) {
    case AV_PIX_FMT_YUV420P:
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[0], in->width, in->height, in->linesize[0],
                           out->data[0], out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[1], in->width/2, in->height/2, in->linesize[0]/2,
                           out->data[1], out->width/2, out->height/2, out->linesize[0]/2,
                           1);
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[2], in->width/2, in->height/2, in->linesize[0]/2,
                           out->data[2], out->width/2, out->height/2, out->linesize[0]/2,
                           1);
        break;
    case AV_PIX_FMT_YUV444P:
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[0], in->width, in->height, in->linesize[0],
                           out->data[0], out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[1], in->width, in->height, in->linesize[0],
                           out->data[1], out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[2], in->width, in->height, in->linesize[0],
                           out->data[2], out->width, out->height, out->linesize[0],
                           1);
        break;
    case AV_PIX_FMT_YUV444P16:
        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                           in->data[0], in->width, in->height, in->linesize[0] / 2,
                           out->data[0], out->width, out->height, out->linesize[0] / 2,
                           2);
        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                           in->data[1], in->width, in->height, in->linesize[1] / 2,
                           out->data[1], out->width, out->height, out->linesize[1] / 2,
                           2);
        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                           in->data[2], in->width, in->height, in->linesize[2] / 2,
                           out->data[2], out->width, out->height, out->linesize[2] / 2,
                           2);
        break;
    case AV_PIX_FMT_NV12:
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[0], in->width, in->height, in->linesize[0],
                           out->data[0], out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(ctx, s->cu_func_uchar2, 2,
                           in->data[1], in->width/2, in->height/2, in->linesize[1],
                           out->data[1], out->width/2, out->height/2, out->linesize[1]/2,
                           1);
        break;
    case AV_PIX_FMT_P010LE:
        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                           in->data[0], in->width, in->height, in->linesize[0]/2,
                           out->data[0], out->width, out->height, out->linesize[0]/2,
                           2);
        call_resize_kernel(ctx, s->cu_func_ushort2, 2,
                           in->data[1], in->width / 2, in->height / 2, in->linesize[1]/2,
                           out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4,
                           2);
        break;
    case AV_PIX_FMT_P016LE:
        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                           in->data[0], in->width, in->height, in->linesize[0] / 2,
                           out->data[0], out->width, out->height, out->linesize[0] / 2,
                           2);
        call_resize_kernel(ctx, s->cu_func_ushort2, 2,
                           in->data[1], in->width / 2, in->height / 2, in->linesize[1] / 2,
                           out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4,
                           2);
        break;
    default:
        return AVERROR_BUG;
    }

    return 0;
}