/** @deprecated : use createGPUmodulefromfile */ CUmodule * CudaCompiler::compileFromFile(const String fileName, GPU * gpu) { if(fileName == NULL) { return 0; } QFileInfo cu( fileName ); if (! cu.isReadable()) { return 0; } else { /* Get filename */ int result; String res; compileCUtoPTX( &fileName, result, &res ); if(result != NVCC_OK ) { qDebug() << "Error during compilation from file " << fileName << " [" << result <<"]"; return 0; } else { qDebug()<< " cu -> ptx ok"; } String ptxfile = cu.baseName() + ".ptx"; QFile file(ptxfile); QFileInfo ptx(ptxfile); if (ptx.isReadable()) { ByteArray * bar = ptxDump(&file); return compilePTX((uchar*) bar->data(), gpu); delete bar; } } return NULL; }
Pte * walkpgmap(Pml4e *pgmap, void *va, int alloc) { Pdpe *pdirpt; Pde *pgdir; Pte *pgtab; pdirpt = pgmapget(pgmap, pmx(va), alloc); if (pdirpt == nil) return nil; pgdir = pgmapget(pdirpt, pdpx(va), alloc); if (pgdir == nil) return nil; pgtab = pgmapget(pgdir, pdx(va), alloc); if (pgtab == nil) return nil; return &pgtab[ptx(va)]; }
/************************************************************************* Testing Nearest Neighbor Search on uniformly distributed hypercube NormType: 0, 1, 2 D: space dimension N: points count *************************************************************************/ static void testkdtuniform(const ap::real_2d_array& xy, const int& n, const int& nx, const int& ny, const int& normtype, bool& kdterrors) { double errtol; ap::integer_1d_array tags; ap::real_1d_array ptx; ap::real_1d_array tmpx; ap::boolean_1d_array tmpb; kdtree treex; kdtree treexy; kdtree treext; ap::real_2d_array qx; ap::real_2d_array qxy; ap::integer_1d_array qtags; ap::real_1d_array qr; int kx; int kxy; int kt; int kr; double eps; int i; int j; int k; int task; bool isequal; double r; int q; int qcount; qcount = 10; // // Tol - roundoff error tolerance (for '>=' comparisons) // errtol = 100000*ap::machineepsilon; // // fill tags // tags.setlength(n); for(i = 0; i <= n-1; i++) { tags(i) = i; } // // build trees // kdtreebuild(xy, n, nx, 0, normtype, treex); kdtreebuild(xy, n, nx, ny, normtype, treexy); kdtreebuildtagged(xy, tags, n, nx, 0, normtype, treext); // // allocate arrays // tmpx.setlength(nx); tmpb.setlength(n); qx.setlength(n, nx); qxy.setlength(n, nx+ny); qtags.setlength(n); qr.setlength(n); ptx.setlength(nx); // // test general K-NN queries (with self-matches): // * compare results from different trees (must be equal) and // check that correct (value,tag) pairs are returned // * test results from XT tree - let R be radius of query result. // then all points not in result must be not closer than R. // for(q = 1; q <= qcount; q++) { // // Select K: 1..N // if( ap::fp_greater(ap::randomreal(),0.5) ) { k = 1+ap::randominteger(n); } else { k = 1; } // // Select point (either one of the points, or random) // if( ap::fp_greater(ap::randomreal(),0.5) ) { i = ap::randominteger(n); ap::vmove(&ptx(0), 1, &xy(i, 0), 1, ap::vlen(0,nx-1)); } else { for(i = 0; i <= nx-1; i++) { ptx(i) = 2*ap::randomreal()-1; } } // // Test: // * consistency of results from different queries // * points in query are IN the R-sphere (or at the boundary), // and points not in query are outside of the R-sphere (or at the boundary) // * distances are correct and are ordered // kx = kdtreequeryknn(treex, ptx, k, true); kxy = kdtreequeryknn(treexy, ptx, k, true); kt = kdtreequeryknn(treext, ptx, k, true); if( kx!=k||kxy!=k||kt!=k ) { kdterrors = true; return; } kx = 0; kxy = 0; kt = 0; kdtreequeryresultsx(treex, qx, kx); kdtreequeryresultsxy(treexy, qxy, kxy); kdtreequeryresultstags(treext, qtags, kt); kdtreequeryresultsdistances(treext, qr, kr); if( kx!=k||kxy!=k||kt!=k||kr!=k ) { kdterrors = true; return; } kdterrors = kdterrors||kdtresultsdifferent(xy, n, qx, qxy, qtags, k, nx, ny); for(i = 0; i <= n-1; i++) { tmpb(i) = true; } r = 0; for(i = 0; i <= k-1; i++) { tmpb(qtags(i)) = false; ap::vmove(&tmpx(0), 1, &ptx(0), 1, ap::vlen(0,nx-1)); ap::vsub(&tmpx(0), 1, &qx(i, 0), 1, ap::vlen(0,nx-1)); r = ap::maxreal(r, vnorm(tmpx, nx, normtype)); } for(i = 0; i <= n-1; i++) { if( tmpb(i) ) { ap::vmove(&tmpx(0), 1, &ptx(0), 1, ap::vlen(0,nx-1)); ap::vsub(&tmpx(0), 1, &xy(i, 0), 1, ap::vlen(0,nx-1)); kdterrors = kdterrors||ap::fp_less(vnorm(tmpx, nx, normtype),r*(1-errtol)); } } for(i = 0; i <= k-2; i++) { kdterrors = kdterrors||ap::fp_greater(qr(i),qr(i+1)); } for(i = 0; i <= k-1; i++) { ap::vmove(&tmpx(0), 1, &ptx(0), 1, ap::vlen(0,nx-1)); ap::vsub(&tmpx(0), 1, &xy(qtags(i), 0), 1, ap::vlen(0,nx-1)); kdterrors = kdterrors||ap::fp_greater(fabs(vnorm(tmpx, nx, normtype)-qr(i)),errtol); } } // // test general approximate K-NN queries (with self-matches): // * compare results from different trees (must be equal) and // check that correct (value,tag) pairs are returned // * test results from XT tree - let R be radius of query result. // then all points not in result must be not closer than R/(1+Eps). // for(q = 1; q <= qcount; q++) { // // Select K: 1..N // if( ap::fp_greater(ap::randomreal(),0.5) ) { k = 1+ap::randominteger(n); } else { k = 1; } // // Select Eps // eps = 0.5+ap::randomreal(); // // Select point (either one of the points, or random) // if( ap::fp_greater(ap::randomreal(),0.5) ) { i = ap::randominteger(n); ap::vmove(&ptx(0), 1, &xy(i, 0), 1, ap::vlen(0,nx-1)); } else { for(i = 0; i <= nx-1; i++) { ptx(i) = 2*ap::randomreal()-1; } } // // Test: // * consistency of results from different queries // * points in query are IN the R-sphere (or at the boundary), // and points not in query are outside of the R-sphere (or at the boundary) // * distances are correct and are ordered // kx = kdtreequeryaknn(treex, ptx, k, true, eps); kxy = kdtreequeryaknn(treexy, ptx, k, true, eps); kt = kdtreequeryaknn(treext, ptx, k, true, eps); if( kx!=k||kxy!=k||kt!=k ) { kdterrors = true; return; } kx = 0; kxy = 0; kt = 0; kdtreequeryresultsx(treex, qx, kx); kdtreequeryresultsxy(treexy, qxy, kxy); kdtreequeryresultstags(treext, qtags, kt); kdtreequeryresultsdistances(treext, qr, kr); if( kx!=k||kxy!=k||kt!=k||kr!=k ) { kdterrors = true; return; } kdterrors = kdterrors||kdtresultsdifferent(xy, n, qx, qxy, qtags, k, nx, ny); for(i = 0; i <= n-1; i++) { tmpb(i) = true; } r = 0; for(i = 0; i <= k-1; i++) { tmpb(qtags(i)) = false; ap::vmove(&tmpx(0), 1, &ptx(0), 1, ap::vlen(0,nx-1)); ap::vsub(&tmpx(0), 1, &qx(i, 0), 1, ap::vlen(0,nx-1)); r = ap::maxreal(r, vnorm(tmpx, nx, normtype)); } for(i = 0; i <= n-1; i++) { if( tmpb(i) ) { ap::vmove(&tmpx(0), 1, &ptx(0), 1, ap::vlen(0,nx-1)); ap::vsub(&tmpx(0), 1, &xy(i, 0), 1, ap::vlen(0,nx-1)); kdterrors = kdterrors||ap::fp_less(vnorm(tmpx, nx, normtype),r*(1-errtol)/(1+eps)); } } for(i = 0; i <= k-2; i++) { kdterrors = kdterrors||ap::fp_greater(qr(i),qr(i+1)); } for(i = 0; i <= k-1; i++) { ap::vmove(&tmpx(0), 1, &ptx(0), 1, ap::vlen(0,nx-1)); ap::vsub(&tmpx(0), 1, &xy(qtags(i), 0), 1, ap::vlen(0,nx-1)); kdterrors = kdterrors||ap::fp_greater(fabs(vnorm(tmpx, nx, normtype)-qr(i)),errtol); } } // // test general R-NN queries (with self-matches): // * compare results from different trees (must be equal) and // check that correct (value,tag) pairs are returned // * test results from XT tree - let R be radius of query result. // then all points not in result must be not closer than R. // for(q = 1; q <= qcount; q++) { // // Select R // if( ap::fp_greater(ap::randomreal(),0.3) ) { r = ap::maxreal(ap::randomreal(), ap::machineepsilon); } else { r = ap::machineepsilon; } // // Select point (either one of the points, or random) // if( ap::fp_greater(ap::randomreal(),0.5) ) { i = ap::randominteger(n); ap::vmove(&ptx(0), 1, &xy(i, 0), 1, ap::vlen(0,nx-1)); } else { for(i = 0; i <= nx-1; i++) { ptx(i) = 2*ap::randomreal()-1; } } // // Test: // * consistency of results from different queries // * points in query are IN the R-sphere (or at the boundary), // and points not in query are outside of the R-sphere (or at the boundary) // * distances are correct and are ordered // kx = kdtreequeryrnn(treex, ptx, r, true); kxy = kdtreequeryrnn(treexy, ptx, r, true); kt = kdtreequeryrnn(treext, ptx, r, true); if( kxy!=kx||kt!=kx ) { kdterrors = true; return; } kx = 0; kxy = 0; kt = 0; kdtreequeryresultsx(treex, qx, kx); kdtreequeryresultsxy(treexy, qxy, kxy); kdtreequeryresultstags(treext, qtags, kt); kdtreequeryresultsdistances(treext, qr, kr); if( kxy!=kx||kt!=kx||kr!=kx ) { kdterrors = true; return; } kdterrors = kdterrors||kdtresultsdifferent(xy, n, qx, qxy, qtags, kx, nx, ny); for(i = 0; i <= n-1; i++) { tmpb(i) = true; } for(i = 0; i <= kx-1; i++) { tmpb(qtags(i)) = false; } for(i = 0; i <= n-1; i++) { ap::vmove(&tmpx(0), 1, &ptx(0), 1, ap::vlen(0,nx-1)); ap::vsub(&tmpx(0), 1, &xy(i, 0), 1, ap::vlen(0,nx-1)); if( tmpb(i) ) { kdterrors = kdterrors||ap::fp_less(vnorm(tmpx, nx, normtype),r*(1-errtol)); } else { kdterrors = kdterrors||ap::fp_greater(vnorm(tmpx, nx, normtype),r*(1+errtol)); } } for(i = 0; i <= kx-2; i++) { kdterrors = kdterrors||ap::fp_greater(qr(i),qr(i+1)); } } // // Test self-matching: // * self-match - nearest neighbor of each point in XY is the point itself // * no self-match - nearest neighbor is NOT the point itself // if( n>1 ) { // // test for N=1 have non-general form, but it is not really needed // for(task = 0; task <= 1; task++) { for(i = 0; i <= n-1; i++) { ap::vmove(&ptx(0), 1, &xy(i, 0), 1, ap::vlen(0,nx-1)); kx = kdtreequeryknn(treex, ptx, 1, task==0); kdtreequeryresultsx(treex, qx, kx); if( kx!=1 ) { kdterrors = true; return; } isequal = true; for(j = 0; j <= nx-1; j++) { isequal = isequal&&ap::fp_eq(qx(0,j),ptx(j)); } if( task==0 ) { kdterrors = kdterrors||!isequal; } else { kdterrors = kdterrors||isequal; } } } } }
GPUModule * CudaCompiler::createGPUModuleFromFile(const String fileName, GPU * gpu) { qDebug() << "createGPUModuleFromFile " << fileName; if(fileName == NULL) { return 0; } FileInfo kcu = FileInfo( fileName ); if (! kcu.isReadable()){ qDebug() << "can't read "<< fileName; return 0; } { int result; String res; compileCUtoPTX( &fileName, result, &res ); if(result != NVCC_OK ) { qDebug() << "Error during module compilation of " << fileName << " \n [" << result <<"]"; return 0; } // Cuda Kernel compiled. // qDebug()<< " cu -> ptx ok"; // find the kernel main function // the first __global__ one is considered as the entry point QFile file(fileName); if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { qDebug() << "could not open "<< fileName; return NULL; } // extract the main kernel function QStringList list; QStringList signature; String content(file.readAll()); // to strip the comments out : //QRegExp lcomments("//.*\n"); //QRegExp ncomments("/\\*.*\\*/"); //lcomments.setMinimal(true); //ncomments.setMinimal(true); //content.remove(lcomments); //content.remove(ncomments); list = content.split(QRegExp("(\\s+,?\\s*)|(,\\s+)|\\s*\\)\\s*|\\s*\\(\\s*|\n+")); // ensure the kernel is declared as [extern "C"] int i=0; i=list.indexOf("extern"); if(i==-1 || list.at(i+1)!="\"C\"") { qDebug() << fileName << "\nThere is no \'extern \"C\"\' entry point in this kernel." << "\nthey are mandatory to prevent gcc name mangling (ie keep the function name in PTX)"; return NULL; } QString entryPoint; i=list.indexOf("__global__"); // return type entryPoint=list.at(i+2); // except if returning an unsigned variable //qDebug() << "=== " << entryPoint; i++; QString tok; while(list.at(i)!="{" && i<list.size()-1) { tok=list.at(i); if(tok=="unsigned") { tok="u"+list.at(i+1); i++; } if(list.at(i+1).startsWith("*")) tok+="*"; signature << tok; i+=1; } qDebug() << entryPoint << ":" << signature; file.close(); String ptxfilename = kcu.baseName() + ".ptx"; QFile ptxfile(ptxfilename); QFileInfo ptx(ptxfilename); if (ptx.isReadable()) { ByteArray * bar = ptxDump(&ptxfile); CUmodule * module = compilePTX((uchar*) bar->data(), gpu); delete bar; CUfunction * func = new CUfunction(); CUDCHK( cuModuleGetFunction( func , *module, entryPoint.toStdString().c_str()) ); GPUModule * gm = new GPUModule(entryPoint, signature, new FileInfo(kcu), func, module, content); return gm; } else { qDebug() << "generated ptx file is not readable : "<< fileName; return NULL; } } }