void regkey_t::query( KEY_FULL_INFORMATION& info, UNICODE_STRING& keycls ) { trace("full information\n"); info.LastWriteTime.QuadPart = 0LL; info.TitleIndex = 0; info.ClassOffset = FIELD_OFFSET( KEY_FULL_INFORMATION, Class ); info.ClassLength = cls.Length; info.SubKeys = num_subkeys(info.MaxNameLen, info.MaxClassLen); info.Values = num_values(info.MaxValueNameLen, info.MaxValueDataLen); keycls = cls; trace("class = %pus\n", &cls ); }
void CsrMatrixGpu::createStructure(const Triangle1* const elements, const size_t num_elem) { const size_t max_rowlength(20); size_t* num_nonzeros = new size_t[_numrows]; for (size_t i(0); i < _numrows; ++i) num_nonzeros[i] = 0; size_t* colind = new size_t[max_rowlength*_numrows]; for (size_t i(0); i < num_elem; ++i) { size_t nodes[3]; nodes[0] = elements[i].nodeA; nodes[1] = elements[i].nodeB; nodes[2] = elements[i].nodeC; for (size_t node1(0); node1 < 3; ++node1) { for (size_t node2(0); node2 < 3; ++node2) { size_t a(nodes[node1]); size_t b(nodes[node2]); size_t j(0); while (j < num_nonzeros[a] && colind[a*max_rowlength + j] != b) ++j; if (num_nonzeros[a] == j) { ++(num_nonzeros[a]); assert(num_nonzeros[a] <= max_rowlength); colind[a*max_rowlength + j] = b; } } } } for (size_t i(0); i < _numrows; ++i) for (size_t a(num_nonzeros[i]-1); a > 0; --a) for (size_t b(0); b < a; ++b) if (colind[i*max_rowlength + b] > colind[i*max_rowlength + b+1]) { size_t tmp(colind[i*max_rowlength + b]); colind[i*max_rowlength + b] = colind[i*max_rowlength + b+1]; colind[i*max_rowlength + b+1] = tmp; } size_t* h_rowptr = new size_t[_numrows+1]; size_t num_values(0); for (size_t i(0); i < _numrows; ++i) { h_rowptr[i] = num_values; num_values += num_nonzeros[i]; } h_rowptr[_numrows] = num_values; free_cuda(_colind); malloc_cuda(&_colind, num_values*sizeof(size_t)); size_t* h_colind = new size_t[num_values]; size_t current_pos(0); for (size_t row(0); row < _numrows; ++row) for (size_t col(0); col < num_nonzeros[row]; ++col) h_colind[current_pos++] = colind[row*max_rowlength + col]; free_cuda(_values); malloc_cuda(&_values, num_values*sizeof(float)); float* h_values = new float[num_values]; for (size_t i(0); i < num_values; ++i) h_values[i] = 0.0; memcpy_cuda(_colind, h_colind, num_values*sizeof(size_t), h2d); memcpy_cuda(_rowptr, h_rowptr, (_numrows+1)*sizeof(size_t), h2d); memcpy_cuda(_values, h_values, num_values*sizeof(float), h2d); delete[] num_nonzeros; delete[] colind; delete[] h_rowptr; delete[] h_colind; delete[] h_values; //cudaDeviceSynchronize(); // needed? }