Tileset::Tileset(const sf::Image& image, const Dimension& tileSize, const Insets& margin, const Insets& spacing) : m_image(image), m_texture(), m_tileSize(tileSize), m_margin(margin), m_spacing(spacing), m_gridSize(0, 0) { if (!m_texture.loadFromImage(m_image)) throw std::ios_base::failure("Failed to load image"); computeGridSize(); }
Tileset::Tileset(const std::string& filename, const Dimension& tileSize, const Insets& margin, const Insets& spacing) : m_image(), m_texture(), m_tileSize(tileSize), m_margin(margin), m_spacing(spacing), m_gridSize(0, 0) { if (!m_image.loadFromFile(filename)) throw std::ios_base::failure("Failed to load resource " + filename); if (!m_texture.loadFromImage(m_image)) throw std::ios_base::failure("Failed to load resource " + filename); computeGridSize(); }
bool SubdivPatch1Base::updateEdgeLevels(const float edge_level[4], const int subdiv[4], const SubdivMesh *const mesh, const int simd_width) { /* calculate edge levels */ float new_level[4]; computeEdgeLevels(edge_level,subdiv,new_level); /* calculate if tessellation pattern changed */ bool grid_changed = false; for (size_t i=0; i<4; i++) { grid_changed |= (int)new_level[i] != (int)level[i]; level[i] = new_level[i]; } /* compute grid resolution */ Vec2i res = computeGridSize(level); grid_u_res = res.x; grid_v_res = res.y; grid_size_simd_blocks = ((grid_u_res*grid_v_res+simd_width-1)&(-simd_width)) / simd_width; #if defined(__MIC__) grid_bvh_size_64b_blocks = getSubTreeSize64bBlocks( 0 ); const size_t grid_size_xyzuv = (grid_size_simd_blocks * simd_width) * 4; grid_subtree_size_64b_blocks = grid_bvh_size_64b_blocks + ((grid_size_xyzuv+15) / 16); #endif /* need stiching? */ flags &= ~TRANSITION_PATCH; const int int_edge_points0 = (int)level[0] + 1; const int int_edge_points1 = (int)level[1] + 1; const int int_edge_points2 = (int)level[2] + 1; const int int_edge_points3 = (int)level[3] + 1; if (int_edge_points0 < (int)grid_u_res || int_edge_points2 < (int)grid_u_res || int_edge_points1 < (int)grid_v_res || int_edge_points3 < (int)grid_v_res) { flags |= TRANSITION_PATCH; } return grid_changed; }
int main(int argc, char *argv[]) { QCoreApplication a(argc, argv); int size = 20000000; int vt = 5; //example // size =9; int* vector = (int *) malloc (size * sizeof(int)); int* vectorCheck = (int *) malloc (size * sizeof(int)); /* //example vector[0] = 1; vector[1] = 3; vector[2] = 5; vector[3] = 2; vector[4] = 7; vector[5] = 9; vector[6] = 6; vector[7] = 2; vector[8] = 3; */ int number = 0; for (int i = 0; i < size; i++){ if (i % (vt * 128) == 0) number++; vector[i] = rand() % 10; } /* for (int i=0; i<size; i++) printf(" %d ", vector[i]); printf("\n"); */ pickCudaDevice(); checkCudaError(); int* d_vector; cudaMalloc((void **) &d_vector, size * sizeof(int)); checkCudaError(); int* d_result; cudaMalloc((void **) &d_result, size * sizeof(int)); checkCudaError(); int* d_vectorCheck; cudaMalloc((void **) &d_vectorCheck, size * sizeof(int)); checkCudaError(); int* d_resultCheck; cudaMalloc((void **) &d_resultCheck, size * sizeof(int)); checkCudaError(); uint numThreads, numBlocks; cudaMemcpy(d_vector,vector,size * sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(d_vectorCheck,vector,size * sizeof(int), cudaMemcpyHostToDevice); computeGridSize(iDivUp(size,VT),NTHREADS,numBlocks,numThreads); printf("Start kernel\n"); //reduce_wrapper(numBlocks,numThreads,d_result,d_vector,size, vt); //checkCudaError(); //gpu time measurement cudaEvent_t gstart_exScan,gstop_exScan; cudaEventCreate(&gstart_exScan); cudaEventCreate(&gstop_exScan); cudaEventRecord(gstart_exScan, 0); exclusiveScan_wrapper2(numBlocks, numThreads, d_result, d_vector, size, VT); cudaEventRecord(gstop_exScan, 0); cudaEventSynchronize(gstop_exScan); float gpu_time_exScan; cudaEventElapsedTime(&gpu_time_exScan, gstart_exScan, gstop_exScan); printf("Our GPU version has finished, it took %f ms\n",gpu_time_exScan ); cudaEventDestroy(gstart_exScan); //cleaning up a bit cudaEventDestroy(gstop_exScan); checkCudaError(); //gpu time measurement cudaEvent_t gstart,gstop; cudaEventCreate(&gstart); cudaEventCreate(&gstop); cudaEventRecord(gstart, 0); exclusiveScan_thrust(d_vectorCheck, d_vectorCheck + size, d_resultCheck, 0); cudaEventRecord(gstop, 0); cudaEventSynchronize(gstop); float gpu_time; cudaEventElapsedTime(&gpu_time, gstart, gstop); printf("Thrust version has finished, it took %f ms\n",gpu_time ); cudaEventDestroy(gstart); //cleaning up a bit cudaEventDestroy(gstop); checkCudaError(); printf("End kernel\n"); cudaMemcpy(vector,d_result,size * sizeof(int), cudaMemcpyDeviceToHost); cudaMemcpy(vectorCheck,d_resultCheck,size * sizeof(int), cudaMemcpyDeviceToHost); /* for (int i=0; i<size; i++) printf(" %d ", vectorCheck[i]); printf("\n"); */ /* for (int i=0; i<size; i++) printf(" %d ", vector[i]); printf("\n"); */ printf("Difference %d\n", vectorsDifference(vector,vectorCheck,size)); if(areVectorsEqual(vector,vectorCheck,size) == 0) printf("Vectors are equal!!\n"); else printf("Vectors are NOT equal :( \n"); checkCudaError(); cudaFree(d_vector); free(vector); cudaFree(d_result); cudaFree(d_resultCheck); cudaFree(d_vectorCheck); free(vectorCheck); return 0; }