cpuCloverField::~cpuCloverField() { if (create != QUDA_REFERENCE_FIELD_CREATE) { if (clover) host_free(clover); if (norm) host_free(norm); if (cloverInv) host_free(cloverInv); if (invNorm) host_free(invNorm); } }
static int getNumaAffinity(int my_gpu, int *cpu_cores, int* ncores) { FILE *nvidia_info, *pci_bus_info; size_t nbytes = 255; char *my_line; char nvidia_info_path[255], pci_bus_info_path[255]; char bus_info[255]; // the nvidia driver populates this path for each gpu sprintf(nvidia_info_path,"/proc/driver/nvidia/gpus/%d/information", my_gpu); nvidia_info= fopen(nvidia_info_path,"r"); if (nvidia_info == NULL){ return -1; } my_line= (char *) safe_malloc(nbytes +1); while (!feof(nvidia_info)){ if ( -1 == getline(&my_line, &nbytes, nvidia_info)){ break; }else{ // the first 7 char of the Bus Location will lead to the corresponding // path under /sys/class/pci_bus/ , cpulistaffinity showing cores on that // bus is located there if ( 1 == sscanf(my_line,"Bus Location: %s", bus_info )){ sprintf(pci_bus_info_path,"/sys/class/pci_bus/%.7s/cpulistaffinity", bus_info); } } } // open the cpulistaffinity file on the pci_bus for "my_gpu" pci_bus_info= fopen(pci_bus_info_path,"r"); if (pci_bus_info == NULL){ //printfQuda("Warning: opening file %s failed\n", pci_bus_info_path); host_free(my_line); fclose(nvidia_info); return -1; } while (!feof(pci_bus_info)){ if ( -1 == getline(&my_line, &nbytes, pci_bus_info)){ break; } else{ int rc = process_core_string_list(my_line, cpu_cores, ncores); if(rc < 0){ warningQuda("Failed to process the line \"%s\"", my_line); host_free(my_line); fclose(nvidia_info); return -1; } } } host_free(my_line); return 0; }
void cpuColorSpinorField::freeGhostBuffer(void) { if(!initGhostFaceBuffer) return; for(int i=0;i < 4; i++){ host_free(fwdGhostFaceBuffer[i]); fwdGhostFaceBuffer[i] = NULL; host_free(backGhostFaceBuffer[i]); backGhostFaceBuffer[i] = NULL; host_free(fwdGhostFaceSendBuffer[i]); fwdGhostFaceSendBuffer[i] = NULL; host_free(backGhostFaceSendBuffer[i]); backGhostFaceSendBuffer[i] = NULL; } initGhostFaceBuffer = 0; }
void cpuColorSpinorField::destroy() { if (init) { if (fieldOrder == QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) for (int i=0; i<x[nDim-1]; i++) host_free(((void**)v)[i]); host_free(v); init = false; } if (siteSubset == QUDA_FULL_SITE_SUBSET) { if (even) delete even; if (odd) delete odd; } }
void static loadParityHw(ParityHw ret, void *hw, QudaPrecision cpu_prec) { if (ret.precision == QUDA_DOUBLE_PRECISION && cpu_prec != QUDA_DOUBLE_PRECISION) { errorQuda("CUDA double precision requires CPU double precision"); } if (ret.precision != QUDA_HALF_PRECISION) { void *packedHw1 = pinned_malloc(ret.bytes); if (ret.precision == QUDA_DOUBLE_PRECISION) { packParityHw((double2*)packedHw1, (double*)hw, ret.volume); } else { if (cpu_prec == QUDA_DOUBLE_PRECISION) { packParityHw((float2*)packedHw1, (double*)hw, ret.volume); } else { packParityHw((float2*)packedHw1, (float*)hw, ret.volume); } } cudaMemcpy(ret.data, packedHw1, ret.bytes, cudaMemcpyHostToDevice); host_free(packedHw1); } else { //half precision /* ParityHw tmp = allocateParityHw(ret.X, QUDA_SINGLE_PRECISION); loadParityHw(tmp, hw, cpu_prec, dirac_order); copyCuda(ret, tmp); freeParityHw(tmp); */ } }
static void retrieveParityHw(void *res, ParityHw hw, QudaPrecision cpu_prec) { if (hw.precision != QUDA_HALF_PRECISION) { void *packedHw1 = pinned_malloc(hw.bytes); cudaMemcpy(packedHw1, hw.data, hw.bytes, cudaMemcpyDeviceToHost); if (hw.precision == QUDA_DOUBLE_PRECISION) { unpackParityHw((double*)res, (double2*)packedHw1, hw.volume); } else { if (cpu_prec == QUDA_DOUBLE_PRECISION) { unpackParityHw((double*)res, (float2*)packedHw1, hw.volume); } else { unpackParityHw((float*)res, (float2*)packedHw1, hw.volume); } } host_free(packedHw1); } else { //half precision /* ParityHw tmp = allocateParityHw(hw.X, QUDA_SINGLE_PRECISION); copyCuda(tmp, hw); retrieveParityHw(res, tmp, cpu_prec, dirac_order); freeParityHw(tmp); */ } }
void cpuColorSpinorField::destroy() { if (precision == QUDA_DOUBLE_PRECISION) { delete order_double; } else if (precision == QUDA_SINGLE_PRECISION) { delete order_single; } else { errorQuda("Precision %d not supported", precision); } if (init) { if (fieldOrder == QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) for (int i=0; i<x[nDim-1]; i++) host_free(((void**)v)[i]); host_free(v); init = false; } }
void comm_init(void) { int i; static int firsttime=1; if (!firsttime) return; firsttime = 0; gethostname(hostname, 128); hostname[127] = '\0'; MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); int gpus_per_node = getGpuCount(); comm_partition(); back_nbr = (rank -1 + size)%size; fwd_nbr = (rank +1)%size; num_nodes=size / getGpuCount(); if(num_nodes ==0) { num_nodes=1; } //determine which gpu this MPI process is going to use char* hostname_recv_buf = (char*)safe_malloc(128*size); int rc = MPI_Allgather(hostname, 128, MPI_CHAR, hostname_recv_buf, 128, MPI_CHAR, MPI_COMM_WORLD); if (rc != MPI_SUCCESS){ printf("ERROR: MPI_Allgather failed for hostname\n"); comm_exit(1); } which_gpu=0; for(i=0;i < size; i++){ if (i == rank){ break; } if (strncmp(hostname, hostname_recv_buf + 128*i, 128) == 0){ which_gpu ++; } } if (which_gpu >= gpus_per_node){ printf("ERROR: invalid gpu(%d) to use in rank=%d mpi process\n", which_gpu, rank); comm_exit(1); } srand(rank*999); host_free(hostname_recv_buf); return; }
cpuGaugeField::~cpuGaugeField() { if (create == QUDA_NULL_FIELD_CREATE || create == QUDA_ZERO_FIELD_CREATE) { if (order == QUDA_QDP_GAUGE_ORDER) { for (int d=0; d<nDim; d++) { if (gauge[d]) host_free(gauge[d]); } if (gauge) host_free(gauge); } else { if (gauge) host_free(gauge); } } else { // QUDA_REFERENCE_FIELD_CREATE if (order == QUDA_QDP_GAUGE_ORDER){ if (gauge) host_free(gauge); } } if (link_type != QUDA_ASQTAD_MOM_LINKS) { for (int i=0; i<nDim; i++) { if (ghost[i]) host_free(ghost[i]); } } }
/* Deallocate all data and structues for given data instance. */ LOCAL void delete_NIDDB IFN1 ( int, record_id /* (I ) Record ID (ie virtualising byte value) */ ) { int i; IHP *instance_ptr; /* Ensure instance is active (for terminate callback). */ swap_NIDDB(record_id); instance_ptr = vrecs[record_id].vr_pinst_tbl; /* Deallocate all data areas in instance_ptr table. */ for (i = 0; i < MAX_INSTANCES; i++, instance_ptr++) { if ( *instance_ptr != (IHP)0 ) { /* Action any terminate callback */ if ( terminate_callback[i] != (NIDDB_TM_CALLBACK)0 ) { (terminate_callback[i])(); } /* free up memory */ host_free(*instance_ptr); } } /* Deallocate the instance_ptr table itself */ host_free((IHP)vrecs[record_id].vr_pinst_tbl); /* Initialise virtual record entry */ vrecs[record_id].vr_pinst_tbl = (IHP *)0; vrecs[record_id].vr_inst_handle = (IU32)0; }
void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data) { int initialized; MPI_CHECK( MPI_Initialized(&initialized) ); if (!initialized) { errorQuda("MPI has not been initialized"); } MPI_CHECK( MPI_Comm_rank(MPI_COMM_WORLD, &rank) ); MPI_CHECK( MPI_Comm_size(MPI_COMM_WORLD, &size) ); int grid_size = 1; for (int i = 0; i < ndim; i++) { grid_size *= dims[i]; } if (grid_size != size) { errorQuda("Communication grid size declared via initCommsGridQuda() does not match" " total number of MPI ranks (%d != %d)", grid_size, size); } Topology *topo = comm_create_topology(ndim, dims, rank_from_coords, map_data); comm_set_default_topology(topo); // determine which GPU this MPI rank will use char *hostname = comm_hostname(); char *hostname_recv_buf = (char *)safe_malloc(128*size); MPI_CHECK( MPI_Allgather(hostname, 128, MPI_CHAR, hostname_recv_buf, 128, MPI_CHAR, MPI_COMM_WORLD) ); gpuid = 0; for (int i = 0; i < rank; i++) { if (!strncmp(hostname, &hostname_recv_buf[128*i], 128)) { gpuid++; } } host_free(hostname_recv_buf); int device_count; cudaGetDeviceCount(&device_count); if (device_count == 0) { errorQuda("No CUDA devices found"); } if (gpuid >= device_count) { errorQuda("Too few GPUs available on %s", hostname); } }
// This does the exchange of the gauge field ghost zone and places it // into the ghost array. void cpuGaugeField::exchangeGhost() { if (ghostExchange) return; void *send[QUDA_MAX_DIM]; for (int d=0; d<nDim; d++) send[d] = safe_malloc(nFace*surface[d]*reconstruct*precision); // get the links into contiguous buffers extractGaugeGhost(*this, send); // communicate between nodes FaceBuffer faceBuf(x, nDim, reconstruct, nFace, precision); faceBuf.exchangeLink(ghost, send, QUDA_CPU_FIELD_LOCATION); for (int d=0; d<nDim; d++) host_free(send[d]); ghostExchange = true; }
void host_load(void) { FILE *f; int ip; char name[80]; char *p = name; if (!(f = fopen("nethost.cache", "r"))) return; if (hostcache_list) host_free(); while (fscanf(f, "%x %s\n", &ip, p) == 2) host_add(ip, name); fclose(f); }
status_t CameraAcc::acc_read_fw(const char* filename, fw_info &fw) { LOG1("@%s", __FUNCTION__); FILE* file; if (!filename) return UNKNOWN_ERROR; LOG1("filename=%s", filename); fw.size = 0; fw.data = NULL; file = fopen(filename, "rb"); if (!file) return UNKNOWN_ERROR; fseek(file, 0, SEEK_END); fw.size = ftell(file); fseek(file, 0, SEEK_SET); if (fw.size == 0) { fclose(file); return UNKNOWN_ERROR; } fw.data = host_alloc(fw.size); if (fw.data == NULL) { fclose(file); return UNKNOWN_ERROR; } if (fread(fw.data, 1, fw.size, file) != fw.size) { fclose(file); host_free(fw.data); return UNKNOWN_ERROR; } fclose(file); return NO_ERROR; }
void cpuColorSpinorField::exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *dummy1, const MemoryLocation *dummy2, bool dummy3, bool dummy4) const { // allocate ghost buffer if not yet allocated allocateGhostBuffer(nFace); void **sendbuf = static_cast<void**>(safe_malloc(nDimComms * 2 * sizeof(void*))); for (int i=0; i<nDimComms; i++) { sendbuf[2*i + 0] = backGhostFaceSendBuffer[i]; sendbuf[2*i + 1] = fwdGhostFaceSendBuffer[i]; ghost_buf[2*i + 0] = backGhostFaceBuffer[i]; ghost_buf[2*i + 1] = fwdGhostFaceBuffer[i]; } packGhost(sendbuf, parity, nFace, dagger); exchange(ghost_buf, sendbuf, nFace); host_free(sendbuf); }
/* Deallocate per Virtual Machine data area for Device Driver. */ GLOBAL void NIDDB_Deallocate_Instance_Data IFN1 ( IHP *, handle /* Handle to data area */ ) { int i; if ( !allocation_allowed ) { /* We are still managing instances for Windows, or at least we think we are. Has the user escaped from Windows without our VxD being informed, or is the Insignia Device Driver giving us a bum steer? */ always_trace0("NIDDB: Unexpected call to NIDDB_Deallocate_Instance_Data."); /* We might give the Insignia Device Driver the benefit of the doubt and act like a System_Exit message. - Then again we might just ignore em. */ return; } /* Find index to master_ptrs, etc. */ i = handle - &master_ptrs[0]; if ( i < 0 || i >= MAX_INSTANCES ) { always_trace0("NIDDB: Bad handle passed to NIDDB_Deallocate_Instance_Data."); return; } /* Free data area */ host_free(master_ptrs[i]); /* Initialise entry */ master_ptrs[i] = snapshot_ptrs[i] = (IHP)0; instance_size[i] = (IU32)0; create_callback[i] = (NIDDB_CR_CALLBACK)0; terminate_callback[i] = (NIDDB_TM_CALLBACK)0; return; }
/* * UMA backend page allocator for the jumbo frame zones. * * Allocates kernel virtual memory that is backed by contiguous physical * pages. */ static void * mbuf_jumbo_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) { /* Inform UMA that this allocator uses kernel_map/object. */ void *p; /* Returned page */ struct vm_page_list * page_list; vm_page_t pages; unsigned long size, page_num, pfn; int i; size= round_page(bytes); page_num = size >> PAGE_SHIFT; *flags = UMA_SLAB_FREEBSD_KERNEL; p = (void *)host_malloc(size, PAGE_SIZE); // kmem_malloc(kmem_map, bytes, wait); pfn = ((unsigned long)p) >> PAGE_SHIFT; if(p != NULL){ pages = (vm_page_t)host_malloc(sizeof(struct vm_page) * page_num, -1); if(pages != NULL){ for(i = 0; i < page_num; i ++, pfn ++){ page_list = &page_slab_hash[pfn % MAX_UPTCP_PAGENUM]; pages[i].page_addr = (uint8_t*)(pfn << PAGE_SHIFT); pages[i].flags = 0; pages[i].object= NULL; SLIST_INSERT_HEAD(page_list, &pages[i], page_link); } } else { host_free(p); return NULL; } } return (p); }
static void gauge_force_test(void) { int max_length = 6; initQuda(device); setVerbosityQuda(QUDA_VERBOSE,"",stdout); qudaGaugeParam = newQudaGaugeParam(); qudaGaugeParam.X[0] = xdim; qudaGaugeParam.X[1] = ydim; qudaGaugeParam.X[2] = zdim; qudaGaugeParam.X[3] = tdim; setDims(qudaGaugeParam.X); qudaGaugeParam.anisotropy = 1.0; qudaGaugeParam.cpu_prec = link_prec; qudaGaugeParam.cuda_prec = link_prec; qudaGaugeParam.cuda_prec_sloppy = link_prec; qudaGaugeParam.reconstruct = link_recon; qudaGaugeParam.reconstruct_sloppy = link_recon; qudaGaugeParam.type = QUDA_SU3_LINKS; // in this context, just means these are site links qudaGaugeParam.gauge_order = gauge_order; qudaGaugeParam.t_boundary = QUDA_PERIODIC_T; qudaGaugeParam.gauge_fix = QUDA_GAUGE_FIXED_NO; qudaGaugeParam.ga_pad = 0; qudaGaugeParam.mom_ga_pad = 0; size_t gSize = qudaGaugeParam.cpu_prec; void* sitelink; void* sitelink_1d; #ifdef GPU_DIRECT sitelink_1d = pinned_malloc(4*V*gaugeSiteSize*gSize); #else sitelink_1d = safe_malloc(4*V*gaugeSiteSize*gSize); #endif // this is a hack to have site link generated in 2d // then copied to 1d array in "MILC" format void* sitelink_2d[4]; #ifdef GPU_DIRECT for(int i=0;i<4;i++) sitelink_2d[i] = pinned_malloc(V*gaugeSiteSize*qudaGaugeParam.cpu_prec); #else for(int i=0;i<4;i++) sitelink_2d[i] = safe_malloc(V*gaugeSiteSize*qudaGaugeParam.cpu_prec); #endif // fills the gauge field with random numbers createSiteLinkCPU(sitelink_2d, qudaGaugeParam.cpu_prec, 0); //copy the 2d sitelink to 1d milc format for(int dir = 0; dir < 4; dir++){ for(int i=0; i < V; i++){ char* src = ((char*)sitelink_2d[dir]) + i * gaugeSiteSize* qudaGaugeParam.cpu_prec; char* dst = ((char*)sitelink_1d) + (4*i+dir)*gaugeSiteSize*qudaGaugeParam.cpu_prec ; memcpy(dst, src, gaugeSiteSize*qudaGaugeParam.cpu_prec); } } if (qudaGaugeParam.gauge_order == QUDA_MILC_GAUGE_ORDER){ sitelink = sitelink_1d; }else if (qudaGaugeParam.gauge_order == QUDA_QDP_GAUGE_ORDER) { sitelink = (void**)sitelink_2d; } else { errorQuda("Unsupported gauge order %d", qudaGaugeParam.gauge_order); } #ifdef MULTI_GPU void* sitelink_ex_2d[4]; void* sitelink_ex_1d; sitelink_ex_1d = pinned_malloc(4*V_ex*gaugeSiteSize*gSize); for(int i=0;i < 4;i++) sitelink_ex_2d[i] = pinned_malloc(V_ex*gaugeSiteSize*gSize); int X1= Z[0]; int X2= Z[1]; int X3= Z[2]; int X4= Z[3]; for(int i=0; i < V_ex; i++){ int sid = i; int oddBit=0; if(i >= Vh_ex){ sid = i - Vh_ex; oddBit = 1; } int za = sid/E1h; int x1h = sid - za*E1h; int zb = za/E2; int x2 = za - zb*E2; int x4 = zb/E3; int x3 = zb - x4*E3; int x1odd = (x2 + x3 + x4 + oddBit) & 1; int x1 = 2*x1h + x1odd; if( x1< 2 || x1 >= X1 +2 || x2< 2 || x2 >= X2 +2 || x3< 2 || x3 >= X3 +2 || x4< 2 || x4 >= X4 +2){ continue; } x1 = (x1 - 2 + X1) % X1; x2 = (x2 - 2 + X2) % X2; x3 = (x3 - 2 + X3) % X3; x4 = (x4 - 2 + X4) % X4; int idx = (x4*X3*X2*X1+x3*X2*X1+x2*X1+x1)>>1; if(oddBit){ idx += Vh; } for(int dir= 0; dir < 4; dir++){ char* src = (char*)sitelink_2d[dir]; char* dst = (char*)sitelink_ex_2d[dir]; memcpy(dst+i*gaugeSiteSize*gSize, src+idx*gaugeSiteSize*gSize, gaugeSiteSize*gSize); }//dir }//i for(int dir = 0; dir < 4; dir++){ for(int i=0; i < V_ex; i++){ char* src = ((char*)sitelink_ex_2d[dir]) + i * gaugeSiteSize* qudaGaugeParam.cpu_prec; char* dst = ((char*)sitelink_ex_1d) + (4*i+dir)*gaugeSiteSize*qudaGaugeParam.cpu_prec ; memcpy(dst, src, gaugeSiteSize*qudaGaugeParam.cpu_prec); } } #endif void* mom = safe_malloc(4*V*momSiteSize*gSize); void* refmom = safe_malloc(4*V*momSiteSize*gSize); memset(mom, 0, 4*V*momSiteSize*gSize); //initialize some data in cpuMom createMomCPU(mom, qudaGaugeParam.cpu_prec); memcpy(refmom, mom, 4*V*momSiteSize*gSize); double loop_coeff_d[sizeof(loop_coeff_f)/sizeof(float)]; for(unsigned int i=0;i < sizeof(loop_coeff_f)/sizeof(float); i++){ loop_coeff_d[i] = loop_coeff_f[i]; } void* loop_coeff; if(qudaGaugeParam.cuda_prec == QUDA_SINGLE_PRECISION){ loop_coeff = (void*)&loop_coeff_f[0]; }else{ loop_coeff = loop_coeff_d; } double eb3 = 0.3; int num_paths = sizeof(path_dir_x)/sizeof(path_dir_x[0]); int** input_path_buf[4]; for(int dir =0; dir < 4; dir++){ input_path_buf[dir] = (int**)safe_malloc(num_paths*sizeof(int*)); for(int i=0;i < num_paths;i++){ input_path_buf[dir][i] = (int*)safe_malloc(length[i]*sizeof(int)); if(dir == 0) memcpy(input_path_buf[dir][i], path_dir_x[i], length[i]*sizeof(int)); else if(dir ==1) memcpy(input_path_buf[dir][i], path_dir_y[i], length[i]*sizeof(int)); else if(dir ==2) memcpy(input_path_buf[dir][i], path_dir_z[i], length[i]*sizeof(int)); else if(dir ==3) memcpy(input_path_buf[dir][i], path_dir_t[i], length[i]*sizeof(int)); } } if (tune) { printfQuda("Tuning...\n"); setTuning(QUDA_TUNE_YES); } struct timeval t0, t1; double timeinfo[3]; /* Multiple execution to exclude warmup time in the first run*/ for (int i =0;i < attempts; i++){ gettimeofday(&t0, NULL); computeGaugeForceQuda(mom, sitelink, input_path_buf, length, loop_coeff_d, num_paths, max_length, eb3, &qudaGaugeParam, timeinfo); gettimeofday(&t1, NULL); } double total_time = t1.tv_sec - t0.tv_sec + 0.000001*(t1.tv_usec - t0.tv_usec); //The number comes from CPU implementation in MILC, gauge_force_imp.c int flops=153004; if (verify_results){ for(int i = 0;i < attempts;i++){ #ifdef MULTI_GPU //last arg=0 means no optimization for communication, i.e. exchange data in all directions //even they are not partitioned int R[4] = {2, 2, 2, 2}; exchange_cpu_sitelink_ex(qudaGaugeParam.X, R, (void**)sitelink_ex_2d, QUDA_QDP_GAUGE_ORDER, qudaGaugeParam.cpu_prec, 0, 4); gauge_force_reference(refmom, eb3, sitelink_2d, sitelink_ex_2d, qudaGaugeParam.cpu_prec, input_path_buf, length, loop_coeff, num_paths); #else gauge_force_reference(refmom, eb3, sitelink_2d, NULL, qudaGaugeParam.cpu_prec, input_path_buf, length, loop_coeff, num_paths); #endif } int res; res = compare_floats(mom, refmom, 4*V*momSiteSize, 1e-3, qudaGaugeParam.cpu_prec); strong_check_mom(mom, refmom, 4*V, qudaGaugeParam.cpu_prec); printf("Test %s\n",(1 == res) ? "PASSED" : "FAILED"); } double perf = 1.0* flops*V/(total_time*1e+9); double kernel_perf = 1.0*flops*V/(timeinfo[1]*1e+9); printf("init and cpu->gpu time: %.2f ms, kernel time: %.2f ms, gpu->cpu and cleanup time: %.2f total time =%.2f ms\n", timeinfo[0]*1e+3, timeinfo[1]*1e+3, timeinfo[2]*1e+3, total_time*1e+3); printf("kernel performance: %.2f GFLOPS, overall performance : %.2f GFLOPS\n", kernel_perf, perf); for(int dir = 0; dir < 4; dir++){ for(int i=0;i < num_paths; i++) host_free(input_path_buf[dir][i]); host_free(input_path_buf[dir]); } host_free(sitelink_1d); for(int dir=0;dir < 4;dir++) host_free(sitelink_2d[dir]); #ifdef MULTI_GPU host_free(sitelink_ex_1d); for(int dir=0; dir < 4; dir++) host_free(sitelink_ex_2d[dir]); #endif host_free(mom); host_free(refmom); endQuda(); }
void comm_free(void *handle) { host_free((MPI_Request*)handle); }
/* Allocate data structures required for new data instance. */ LOCAL IBOOL allocate_NIDDB IFN2 ( IU32, inst_handle, /* (I ) Windows handle for Virtual Machine */ int *, record_id /* ( 0) Record ID (ie virtualising byte value) */ ) { int v; int i; IHP *p; IHP *instance_ptr; /* Search for empty virtual record */ for (v = 0; v < MAX_VMS; v++) { if ( vrecs[v].vr_pinst_tbl == (IHP *)0 ) break; /* found empty slot */ } /* Ensure we found empty slot */ if ( v == MAX_VMS ) { /* No free slot! */ always_trace0("NIDDB: Too many Virtual Machines being requested."); return FALSE; } /* Allocate new instance table - ensure it is zero */ if ( (instance_ptr = (IHP *)host_calloc(1, sizeof(master_ptrs))) == (IHP *)0 ) { /* No room at the inn */ return FALSE; } /* Allocate new data areas */ for (i = 0, p = instance_ptr; i < MAX_INSTANCES; i++, p++) { /* Use master pointer as the 'creation template' */ if ( master_ptrs[i] != (IHP *)0 ) { if ( (*p = (IHP)host_malloc(instance_size[i])) == (IHP)0 ) { /* No room at the inn */ /* Clean up any blocks which may have been allocated */ for (i = 0, p = instance_ptr; i < MAX_INSTANCES; i++, p++) { if ( *p != (IHP)0 ) host_free(*p); } return FALSE; } } } /* Finally fill in virtual record */ vrecs[v].vr_inst_handle = inst_handle; vrecs[v].vr_pinst_tbl = instance_ptr; *record_id = v; return TRUE; }
void loadLinkToGPU(cudaGaugeField* cudaGauge, cpuGaugeField* cpuGauge, QudaGaugeParam* param) { if (cudaGauge->Precision() != cpuGauge->Precision()){ errorQuda("Mismatch between CPU precision and CUDA precision"); } QudaPrecision prec = cudaGauge->Precision(); #ifdef MULTI_GPU const int* Z = cudaGauge->X(); #endif int pad = cudaGauge->Pad(); int Vsh_x = param->X[1]*param->X[2]*param->X[3]/2; int Vsh_y = param->X[0]*param->X[2]*param->X[3]/2; int Vsh_z = param->X[0]*param->X[1]*param->X[3]/2; int Vsh_t = param->X[0]*param->X[1]*param->X[2]/2; static void* ghost_cpuGauge[4]; static void* ghost_cpuGauge_diag[16]; #ifdef MULTI_GPU static int allocated = 0; int Vs[4] = {2*Vsh_x, 2*Vsh_y, 2*Vsh_z, 2*Vsh_t}; if (!allocated) { for(int i=0;i < 4; i++) { size_t ghost_bytes = 8*Vs[i]*gaugeSiteSize*prec; #ifdef GPU_DIRECT ghost_cpuGauge[i] = pinned_malloc(ghost_bytes); #else ghost_cpuGauge[i] = safe_malloc(ghost_bytes); #endif } /* * nu | | * |_____| * mu */ for(int nu=0;nu < 4;nu++){ for(int mu=0; mu < 4;mu++){ if(nu == mu){ ghost_cpuGauge_diag[nu*4+mu] = NULL; }else{ //the other directions int dir1, dir2; for(dir1= 0; dir1 < 4; dir1++){ if(dir1 !=nu && dir1 != mu){ break; } } for(dir2=0; dir2 < 4; dir2++){ if(dir2 != nu && dir2 != mu && dir2 != dir1){ break; } } //int rc = posix_memalign((void**)&ghost_cpuGauge_diag[nu*4+mu], ALIGNMENT, Z[dir1]*Z[dir2]*gaugeSiteSize*prec); size_t nbytes = Z[dir1]*Z[dir2]*gaugeSiteSize*prec; #ifdef GPU_DIRECT ghost_cpuGauge_diag[nu*4+mu] = pinned_malloc(nbytes); #else ghost_cpuGauge_diag[nu*4+mu] = safe_malloc(nbytes); #endif memset(ghost_cpuGauge_diag[nu*4+mu], 0, nbytes); } } } allocated = 1; } int optflag=1; // driver for for packalllink exchange_cpu_sitelink(param->X, (void**)cpuGauge->Gauge_p(), ghost_cpuGauge, ghost_cpuGauge_diag, prec, param, optflag); #endif do_loadLinkToGPU(param->X, cudaGauge->Even_p(), cudaGauge->Odd_p(), (void**)cpuGauge->Gauge_p(), ghost_cpuGauge, ghost_cpuGauge_diag, cudaGauge->Reconstruct(), cudaGauge->Bytes(), cudaGauge->VolumeCB(), pad, Vsh_x, Vsh_y, Vsh_z, Vsh_t, prec, cpuGauge->Order()); #ifdef MULTI_GPU if(!(param->preserve_gauge & QUDA_FAT_PRESERVE_COMM_MEM)) { for(int i=0;i < 4;i++){ host_free(ghost_cpuGauge[i]); } for(int i=0;i <4; i++){ for(int j=0;j <4; j++){ if (i != j) host_free(ghost_cpuGauge_diag[i*4+j]); } } allocated = 0; } #endif }
CloverField::~CloverField() { host_free(trlog); }
void host_shutdown(void) { host_save(); host_free(); }
static int mb_config_add_host (oconfig_item_t *ci) /* {{{ */ { mb_host_t *host; int status; int i; host = malloc (sizeof (*host)); if (host == NULL) return (ENOMEM); memset (host, 0, sizeof (*host)); host->slaves = NULL; status = cf_util_get_string_buffer (ci, host->host, sizeof (host->host)); if (status != 0) return (status); if (host->host[0] == 0) return (EINVAL); for (i = 0; i < ci->children_num; i++) { oconfig_item_t *child = ci->children + i; status = 0; if (strcasecmp ("Address", child->key) == 0) { char buffer[NI_MAXHOST]; status = cf_util_get_string_buffer (child, buffer, sizeof (buffer)); if (status == 0) status = mb_config_set_host_address (host, buffer); } else if (strcasecmp ("Port", child->key) == 0) { host->port = cf_util_get_port_number (child); if (host->port <= 0) status = -1; } else if (strcasecmp ("Interval", child->key) == 0) status = cf_util_get_cdtime (child, &host->interval); else if (strcasecmp ("Slave", child->key) == 0) /* Don't set status: Gracefully continue if a slave fails. */ mb_config_add_slave (host, child); else { ERROR ("Modbus plugin: Unknown configuration option: %s", child->key); status = -1; } if (status != 0) break; } /* for (i = 0; i < ci->children_num; i++) */ assert (host->host[0] != 0); if (host->host[0] == 0) { ERROR ("Modbus plugin: Data block \"%s\": No type has been specified.", host->host); status = -1; } if (status == 0) { user_data_t ud; char name[1024]; struct timespec interval = { 0, 0 }; ud.data = host; ud.free_func = host_free; ssnprintf (name, sizeof (name), "modbus-%s", host->host); CDTIME_T_TO_TIMESPEC (host->interval, &interval); plugin_register_complex_read (/* group = */ NULL, name, /* callback = */ mb_read, /* interval = */ (host->interval > 0) ? &interval : NULL, &ud); } else { host_free (host); } return (status); } /* }}} int mb_config_add_host */
void comm_free(MsgHandle *mh) { host_free(mh); }
void comm_free(MsgHandle *mh) { QMP_free_msghandle(mh->handle); QMP_free_msgmem(mh->mem); host_free(mh); }
void comm_destroy_topology(Topology *topo) { host_free(topo->ranks); host_free(topo->coords); host_free(topo); }