int main(int argc, char *argv[]) { double* matA = _mm_malloc(WIDTH*HEIGHT*sizeof(double), 64); double* matB = _mm_malloc((WIDTH*HEIGHT)*sizeof(double), 64); double* prod = _mm_malloc(WIDTH*HEIGHT*sizeof(double), 64); double* prod_ref = _mm_malloc(WIDTH*HEIGHT*sizeof(double), 64); int read_flag = read_matrix(TEST_FILENAME, prod_ref, matA, matB); if (read_flag == 1) printf("Cannot open test file\n"); else if (read_flag == 2) printf("Error while reading data from test file"); else if (read_flag == 3) printf("Error while closing the test file"); if (read_flag) return 0; uint64_t start = timestamp_us(); matmul_optimize(prod, matA, matB); /* run the optimization functions. */ uint64_t time = timestamp_us() - start; if (compare_matrix(prod, prod_ref)) { printf("%lu incorrect\n", time); } else { printf("%lu\n", time); } _mm_free(prod_ref); _mm_free(prod); _mm_free(matB); _mm_free(matA); return 0; }
int main(int argc, char **argv) { int i; float *a, *b; double t; a = (float *)_mm_malloc(sizeof(float) * N, 16); b = (float *)_mm_malloc(sizeof(float) * N, 16); for (i = 0; i < N; i++) { a[i] = 1.0; } t = hpctimer_getwtime(); for (i = 0; i < NREPS; i++) { fun_def(a, b, N); // fun_sse(a, b, N); } t = hpctimer_getwtime() - t; t = t / NREPS; //print_vec(b, N); printf("Elapsed time: %.6f sec.\n", t); _mm_free(a); _mm_free(b); return 0; }
static void STX_Cleanup(void) { _mm_free(stxbuf); _mm_free(paraptr); _mm_free(poslookup); _mm_free(mh); }
static void ult_nn_lrn_fp_both_dealloc( int16_t* &input, int16_t* &output, int16_t* &input_ref, int16_t* &output_ref) { if (input != 0) { _mm_free(input); input = 0; } if (output != 0) { _mm_free(output); output = 0; } if (input_ref != 0) { _mm_free(input_ref); input_ref = 0; } if (output_ref != 0) { _mm_free(output_ref); output_ref = 0; } }
void S3M_Cleanup(void) { _mm_free(s3mbuf); _mm_free(paraptr); _mm_free(poslookup); _mm_free(mh); _mm_free(origpositions); }
BluePaintBSDF::~BluePaintBSDF() { _mm_free(xy); _mm_free(z); _mm_free(e); for (int i = 0; i < numBxDFs; i++) delete bxdfs[i]; delete[] bxdfs; }
static void NDS_SW_Exit(void) { MikMod9_SendCommand(NDS_SW_CMD_EXIT << 28); VC_Exit(); _mm_free(ipc->buffer); ipc->buffer = NULL; _mm_free(ipc); ipc = NULL; }
void free_matrix(struct matrix_t* m) { msize_t n; for (n = 0; n < m->n_rows; n++) { _mm_free(m->data[n]); } _mm_free((void *)m->data); _mm_free(m); }
void sobel5x5( const uint8_t* in, uint8_t* out_v, uint8_t* out_h, int w, int h ) { int16_t* temp_h = (int16_t*)( _mm_malloc( w*h*sizeof( int16_t ), 16 ) ); int16_t* temp_v = (int16_t*)( _mm_malloc( w*h*sizeof( int16_t ), 16 ) ); detail::convolve_cols_5x5( in, temp_v, temp_h, w, h ); detail::convolve_12021_row_5x5_16bit( temp_v, out_v, w, h ); detail::convolve_14641_row_5x5_16bit( temp_h, out_h, w, h ); _mm_free( temp_h ); _mm_free( temp_v ); }
void copyOutResults(std::vector<cv::Mat> &_outputPlanes) { copyToCVMatF(outputPlanes, _outputPlanes[0], ioHeight, ioWidth, ioWidth); if (weights != NULL) _mm_free(weights); if (packed_weights != NULL) _mm_free(packed_weights); if (inputPlanes != NULL) _mm_free(inputPlanes); if (outputPlanes != NULL) _mm_free(outputPlanes); if (biases != NULL) _mm_free(biases); }
ieImageMem::~ieImageMem() { if (pbBitmap) { pbBitmap -= nBitmapOffs; _mm_free(pbBitmap); pbBitmap = nullptr; } if (pCLUT) { _mm_free(pCLUT); pCLUT = nullptr; } }
/** * main application * * @param argc number of cli arguments * @param argv values of cli arguments */ int main(int argc, char* argv[]) { if (argc != 6) { std::cout << "cg_max_iterations" << std::endl; std::cout << "cg_eps" << std::endl; std::cout << "mpiGridX" << std::endl; std::cout << "mpiGridY" << std::endl; std::cout << "gridwidth" << std::endl; std::cout << std::endl; std::cout << "example:" << std::endl; std::cout << "./app 10 1e-5 2 5 128" << std::endl; return -1; } // input parameters size_t cg_max_iterations = atoi(argv[1]); double cg_eps = atof(argv[2]); const int mpiGridX = atoi(argv[3]); const int mpiGridY = atoi(argv[4]); grid_points_1d = adaptMeshSize(mpiGridX, mpiGridY, atoi(argv[5])); std::printf("max_iter: %d, eps: %f, grid: (%d, %d), n: %d \n", static_cast<int>(cg_max_iterations), cg_eps, mpiGridX, mpiGridY, static_cast<int>(grid_points_1d)); double* gridS = (double*)_mm_malloc(grid_points_1d*grid_points_1d*sizeof(double), 64); double* bS = (double*)_mm_malloc(grid_points_1d*grid_points_1d*sizeof(double), 64); // TEST single // initialize the gird and rights hand side init_grid(gridS); init_b(bS); // solve Poisson equation using CG method Timer tS; tS.start(); single::solve(gridS, bS, cg_max_iterations, cg_eps); double timeS = tS.stop(); std::cout << std::endl << "Needed time single: " << timeS << " s" << std::endl << std::endl; _mm_free(gridS); _mm_free(bS); return 0; }
Neighbor::~Neighbor() { #ifdef ALIGNMALLOC if(numneigh) _mm_free(numneigh); if(neighbors) _mm_free(neighbors); #else if(numneigh) free(numneigh); if(neighbors) free(neighbors); #endif if(bincount) free(bincount); if(bins) free(bins); }
int main(int argc, char *argv[]) { /* Initialize the matrices with some "random" data. */ init(); run_multiply(); _mm_free(mat_a); _mm_free(vec_b); _mm_free(vec_c); _mm_free(vec_ref); return 0; }
static void ult_nn_convolution_fixedpoint_comp_both_dealloc( int16_t* &input, int16_t* &output, int32_t* &biases, int16_t* &kernel, int16_t* &input_ref, int16_t* &output_ref, int32_t* &biases_ref, int16_t* &kernel_ref) { if (input != 0) { _mm_free(input); input = 0; } if (output != 0) { _mm_free(output); output = 0; } if (biases != 0) { _mm_free(biases); biases = 0; } if (kernel != 0) { _mm_free(kernel); kernel = 0; } if (input_ref != 0) { _mm_free(input_ref); input_ref = 0; } if (output_ref != 0) { _mm_free(output_ref); output_ref = 0; } if (biases_ref != 0) { _mm_free(biases_ref); biases_ref = 0; } if (kernel_ref != 0) { _mm_free(kernel_ref); kernel_ref = 0; } }
static void ult_nn_fc_both_dealloc( int16_t* &input, T_output_type* &output, int32_t* &biases, int16_t* &kernel, int16_t* &input_ref, T_output_type* &output_ref, int32_t* &biases_ref, int16_t* &kernel_ref) { if (input != 0) { _mm_free(input); input = 0; } if (output != 0) { _mm_free(output); output = 0; } if (biases != 0) { _mm_free(biases); biases = 0; } if (kernel != 0) { _mm_free(kernel); kernel = 0; } if (input_ref != 0) { _mm_free(input_ref); input_ref = 0; } if (output_ref != 0) { _mm_free(output_ref); output_ref = 0; } if (biases_ref != 0) { _mm_free(biases_ref); biases_ref = 0; } if (kernel_ref != 0) { _mm_free(kernel_ref); kernel_ref = 0; } }
inline UMatrix2D<T>& UMatrix2D<T>::operator = (UMatrix2D<T>& M) { #ifdef _SAFE_ACCESS_ CheckLocker cl1(GetLocker()); CheckLocker cl2(M.GetLocker()); #endif //_SAFE_ACCESS_ nX = M.GetX(); nY = M.GetY(); if(mt == MXT_MEM) { if(Ptr != NULL) #ifdef __ICC _mm_free(Ptr); #else free(Ptr); #endif //__ICC #ifdef __ICC Ptr = (T*)(_mm_malloc(sizeof(T)*nX*nY,_ALIGN)); #else Ptr = (T*)(malloc(sizeof(T)*nX*nY)); #endif memcpy(Ptr,M.GetMatrixPtr(),sizeof(T)*nX*nY); } else { Ptr = M.GetMatrixPtr(); } ms = M.GetMatrixState(); return *this; }
/* * Does a single fwd+bwd fft of size n and checks it against python. * To test fftw replace fft_mkl with fft_fftw and set flag to MKL_ALIGN. */ void test_mkl(int n, enum mkl_align_flag flag){ double *space = (double *)_mm_malloc((4*n+2)*sizeof(double), 64); double *v; switch(flag){ case MKL_ALIGN: v = space; break; case MKL_NOALIGN: v = space+1; break; } double *w = v + 2*n; for(int i=0; i < n; i++){ w[2*i] = v[2*i] = rand()*1.0/RAND_MAX - 0.5; w[2*i+1] = v[2*i+1] = rand()*1.0/RAND_MAX - 0.5; } verify_dir("DBG/"); array_out(v, 2, n, "DBG/v.dat"); fft_mkl fft(n); fft.fwd(v); array_out(v, 2, n, "DBG/vf.dat"); system("test_fft.py DBG/v.dat DBG/vf.dat"); fft.bwd(v); array_diff(v, w, 2*n); double rerror = array_max(v, 2*n)/array_max(w, 2*n); std::cout<<"\n\tfwd+bwd error in complex mkl 1D fft"<<std::endl; std::cout<<"\tn = "<<n<<std::endl; std::cout<<"\trel error = "<<rerror<<std::endl; _mm_free(space); }
/* xvm_free: * Free a vector allocated by xvm_new. */ void xvm_free(double x[]) { #if defined(__SSE2__) && !defined(XVM_ANSI) _mm_free(x); #else free(x); #endif }
void *libfat_get_sector(struct libfat_filesystem *fs, libfat_sector_t n) { struct libfat_sector *ls; for (ls = fs->sectors; ls; ls = ls->next) { if (ls->n == n) return ls->data; /* Found in cache */ } /* Not found in cache */ ls = _mm_malloc(sizeof(struct libfat_sector) + LIBFAT_SECTOR_SIZE, 16); if (!ls) { libfat_flush(fs); ls = _mm_malloc(sizeof(struct libfat_sector) + LIBFAT_SECTOR_SIZE, 16); if (!ls) return NULL; /* Can't allocate memory */ } if (fs->read(fs->readptr, ls->data, LIBFAT_SECTOR_SIZE, n) != LIBFAT_SECTOR_SIZE) { _mm_free(ls); return NULL; /* I/O error */ } ls->n = n; ls->next = fs->sectors; fs->sectors = ls; return ls->data; }
void deinit_pcl_dgemm (void) { #ifdef __INTEL_OFFLOAD if (!usemic) return; #pragma offload target(mic : 0) \ in(pcl_a_mic: length(max_pcl_matrix_size*max_pcl_matrix_size) FREE align(64)) \ in(pcl_b_mic: length(max_pcl_matrix_size*max_pcl_matrix_size) FREE align(64)) \ in(pcl_c_mic: length(max_pcl_matrix_size*max_pcl_matrix_size) FREE align(64)) _mm_free (pcl_a_mic); _mm_free (pcl_b_mic); _mm_free (pcl_c_mic); #endif }
BOOL Identify(HANDLE hPhysical) { ATA_PASSTHROUGH_CMD Command = {0}; IDENTIFY_DEVICE_DATA* idd; int i, r; Command.AtaCmd = ATA_IDENTIFY_DEVICE; // You'll get an error here if your compiler does not properly pack the IDENTIFY struct COMPILE_TIME_ASSERT(sizeof(IDENTIFY_DEVICE_DATA) == 512); idd = (IDENTIFY_DEVICE_DATA*)_mm_malloc(sizeof(IDENTIFY_DEVICE_DATA), 0x10); if (idd == NULL) return FALSE; for (i=0; i<ARRAYSIZE(pt); i++) { r = pt[i].fn(hPhysical, &Command, idd, sizeof(IDENTIFY_DEVICE_DATA), SPT_TIMEOUT_VALUE); if (r == SPT_SUCCESS) { uprintf("Success using %s\n", pt[i].type); if (idd->CommandSetSupport.SmartCommands) { DumpBufferHex(idd, sizeof(IDENTIFY_DEVICE_DATA)); uprintf("SMART support detected!\n"); } else { uprintf("No SMART support\n"); } break; } uprintf("No joy with: %s (%s)\n", pt[i].type, SptStrerr(r)); } if (i >= ARRAYSIZE(pt)) uprintf("NO ATA FOR YOU!\n"); _mm_free(idd); return TRUE; }
void leibniz3(){ int nmic; mic_init(nmic); assrt(nmic > 0); long n = 1l*1000*1000*800; long nbytes = n*8; printf(" nbytes = %ld\n",nbytes); double* v = (double *)_mm_malloc(nbytes, 64); leibniz_init(v, n); printf(" host pointer v = %p \n", v); #pragma offload target(mic:0) \ in(v:length(n) align(64) alloc_if(1) free_if(0)) {} #pragma offload target(mic:0) nocopy(v:length(n) alloc_if(0) free_if(0)) hostmic_scale(v, n); #pragma offload target(mic:0) \ out(v:length(n) align(64) alloc_if(0) free_if(0)) hostmic_scale(v, n); hostmic_scale(v, n); double sum; #pragma offload target(mic:0) \ in(v:length(n) align(64) alloc_if(0) free_if(1)) sum = hostmic_sum(v, n); printf(" sum = %f\n", sum); _mm_free(v); mic_exit(); }
void leibniz2(){ int nmic; mic_init(nmic); long n = 1l*1000*1000*800; long nbytes = n*8; double* v = (double *)_mm_malloc(nbytes, 64); leibniz_init(v, n); double sum=-1; #pragma offload target(mic:0) \ in(v:length(n) align(64)) \ signal(v) { hostmic_scale(v, n); hostmic_scale(v, n); hostmic_scale(v, n); sum = hostmic_sum(v, n); } #pragma offload_wait target(mic:0) wait(v) printf(" leibniz2: sum = %f\n", sum); _mm_free(v); mic_exit(); }
static void pipe_Exit(void) { #if defined unix || (defined __APPLE__ && defined __MACH__) int pstat; pid_t pid2; #endif VC_Exit(); _mm_free(audiobuffer); if(pipeout) { _mm_delete_file_writer(pipeout); pipeout=NULL; } if(pipefile) { #if !defined unix && (!defined __APPLE__ || !defined __MACH__) #ifdef __WATCOMC__ _pclose(pipefile); #else pclose(pipefile); #endif #ifdef __EMX__ _fsetmode(stdout,"t"); #endif #else fclose(pipefile); do { pid2=waitpid(pid,&pstat,0); } while (pid2==-1 && errno==EINTR); #endif pipefile=NULL; } }
void time_chain() { /* * 10^9 random entries */ long int *list; long int n = 1000*1000*1000; list = (long int*)_mm_malloc(n*sizeof(long int), 64); for(long int i=0; i < n; i++) list[i] = rand(); printf("\t\t\t chained access of array of size 10^9\n"); printf("\t\t\t each entry is in [0,RAND_MAX]\n"); printf("\t\t\t10^9/RAND_MAX = %f\n",1.0*n/RAND_MAX); int count = 6000; printf("\t\t\t number of accesses = %d\n", count); TimeStamp clk; clk.tic(); double xx = chain_walk(list, n, count); double cycles = clk.toc(); printf("\tcycles per access = %f\n", cycles/count); int repeats = countrepeats(list, n, count); printf("\tnumber of repeats = %d\n", repeats); double prob = probNoR(n, count); printf("\ttheor prob of 0 repeats = %f\n\n", prob); _mm_free(list); }
static void release(struct EngineThread *eng) { SSE_CTX *ctx = eng->priv; result128_free(ctx->res); ssresult_free(ctx->sres); _mm_free(ctx); }
static BOOL NDS_SW_Init(void) { md_mode|=DMODE_SOFT_MUSIC|DMODE_SOFT_SNDFX; md_mode &= ~DMODE_STEREO; ipc = (NDS_SW_IPC*)_mm_malloc(sizeof(NDS_SW_IPC)); if (ipc == NULL) { MikMod_errno = MMERR_OUT_OF_MEMORY; return 1; } ipc->buffer = (SBYTE*)_mm_malloc(BUFFERSIZE); if (ipc->buffer == NULL) { _mm_free(ipc); ipc = NULL; MikMod_errno = MMERR_OUT_OF_MEMORY; return 1; } if (VC_Init()) { return 1; } ipc->bufferSize = BUFFERSIZE; ipc->sampleRate = md_mixfreq; ipc->format = (md_mode & DMODE_16BITS) ? 16 : 8; MikMod9_SendCommand(NDS_SW_CMD_INIT << 28 | (u32)ipc); return 0; }
void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t #if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS ) arg_alloc_size #endif ) const { if ( arg_alloc_ptr ) { if ( m_alloc_mech == STD_MALLOC ) { void * alloc_ptr = *(reinterpret_cast<void **>(arg_alloc_ptr) -1); free( alloc_ptr ); } #if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC ) else if ( m_alloc_mech == INTEL_MM_ALLOC ) { _mm_free( arg_alloc_ptr ); } #endif #if defined( KOKKOS_ENABLE_POSIX_MEMALIGN ) else if ( m_alloc_mech == POSIX_MEMALIGN ) { free( arg_alloc_ptr ); } #endif #if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS ) else if ( m_alloc_mech == POSIX_MMAP ) { munmap( arg_alloc_ptr , arg_alloc_size ); } #endif } }
void FreeMemory ( void * ptr, size_t capacity ) { if( !m_memory_init ) InitPool (); else if (IsPoolFull()) { _mm_free(ptr); } // find the best place (insertion sort) FreeMemoryHolder* it(m_free_memory); FreeMemoryHolder * const it_end(&(m_free_memory[HMM_MAX_FREE_OBJECTS])); do { if (it->m_ptr == nullptr || it->m_capacity > capacity) { break; } } while (++it != it_end); // move other containers up by 1 index FreeMemoryHolder* it2(it_end); FreeMemoryHolder* it3(it2 - 2); while (--it2 != it) { it2->Copy(it3); it3->Zero(); --it3; } it->m_ptr = ptr; it->m_capacity = capacity; }