int main(int argc, char *argv[]) { int i; matrix_t mat[2]; /* A, B*/ matrix_t *C; int m; for (i = 0; i < 2; i++) { str2mat(argv[i+1], &(mat[i]), ":", ","); } C = pmat(&(mat[0]), &(mat[1])); for (i = 0; i < 2; i++) { if (i == 0) { printf("A=\n"); } else if (i == 1){ printf("B=\n"); } for (m = 0; m < mat[i].row; m++) { pdarray((mat[i].value)[m], mat[i].column); } } printf("A*B=\n"); for (m = 0; m < C->row; m++) { pdarray((C->value)[m], C->column); } return 0; }
void Ssao::runMain(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx) { CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer; cmdb->bindShaderProgram(m_main.m_grProg); // Bind resources cmdb->bindSampler(0, 0, m_r->getSamplers().m_trilinearClamp); cmdb->bindSampler(0, 1, m_r->getSamplers().m_trilinearRepeat); rgraphCtx.bindTexture(0, 2, m_r->getDepthDownscale().getHiZRt(), HIZ_HALF_DEPTH); cmdb->bindTexture(0, 3, m_main.m_noiseTex->getGrTextureView(), TextureUsageBit::SAMPLED_FRAGMENT); if(m_useNormal) { rgraphCtx.bindColorTexture(0, 4, m_r->getGBuffer().getColorRt(2)); } struct Unis { Vec4 m_unprojectionParams; Vec4 m_projectionMat; Mat3x4 m_viewRotMat; } unis; const Mat4& pmat = ctx.m_renderQueue->m_projectionMatrix; unis.m_unprojectionParams = ctx.m_unprojParams; unis.m_projectionMat = Vec4(pmat(0, 0), pmat(1, 1), pmat(2, 2), pmat(2, 3)); unis.m_viewRotMat = Mat3x4(ctx.m_renderQueue->m_viewMatrix.getRotationPart()); cmdb->setPushConstants(&unis, sizeof(unis)); if(m_useCompute) { rgraphCtx.bindImage(0, 5, m_runCtx.m_rts[0], TextureSubresourceInfo()); const U sizeX = (m_width + m_workgroupSize[0] - 1) / m_workgroupSize[0]; const U sizeY = (m_height + m_workgroupSize[1] - 1) / m_workgroupSize[1]; cmdb->dispatchCompute(sizeX, sizeY, 1); } else { cmdb->setViewport(0, 0, m_width, m_height); drawQuad(cmdb); } }
void f() { int i,j; double A[M*N]; Assignment result; for( i=0; i<M*N; i++) A[i] = fabs( ((int)rand())/1024./1024./1024. ); //printf("*************\n"); //pmat( A, M,N ); result = match( A, M, N ); printf("----\n"); pmat( A, M,N ); for( i=0; i<result.n; i++ ) printf( "%d --> %g\n", i, result.assignment[i] ); printf("Cost: %g\n", result.cost); }
//#define BUG 0 void fdjac2(void (*fcn)(int, int, double[], double[], int *, void *), int m, int n, double x[], double fvec[], double fjac[], int ldfjac, int *iflag, double epsfcn, double wa[], void *data) { int i; int j; int ij; double eps; double h; double temp; //static double zero = 0.0; //dkb - changed to equal TvdB solver code, use constant delta = 1e-5 // temp = dmax1(epsfcn,MACHEP); // eps = sqrt(temp); JACOBIAN = 1; eps = 1e-5; #ifdef BUG printf( "fdjac2\n" ); #endif ij = 0; for (j=0; j<n; j++) { temp = x[j]; // h = eps * fabs(temp); // if (h == zero) // h = eps; h = eps; // added dkb x[j] = temp + h; fcn(m,n,x,wa,iflag, data); x[j] = temp; if (*iflag < 0) return; for (i=0; i<m; i++) { fjac[ij] = (wa[i] - fvec[i])/h; ij += 1; /* fjac[i+m*j] */ } } #ifdef BUG printf("jacobian:\n"); pmat( m, n, fjac ); for (i = 0; i < m*n; i++) printf("%6e ", fjac[i]); #endif JACOBIAN = 0; }
void GSRenderer::VSync(int field) { GSPerfMonAutoTimer pmat(&m_perfmon); m_perfmon.Put(GSPerfMon::Frame); Flush(); if(s_dump && s_n >= s_saven) { m_regs->Dump(root_sw + format("%05d_f%lld_gs_reg.txt", s_n, m_perfmon.GetFrame())); } if(!m_dev->IsLost(true)) { if(!Merge(field ? 1 : 0)) { return; } } else { ResetDevice(); } m_dev->AgePool(); // osd if((m_perfmon.GetFrame() & 0x1f) == 0) { m_perfmon.Update(); double fps = 1000.0f / m_perfmon.Get(GSPerfMon::Frame); string s; #ifdef GSTITLEINFO_API_FORCE_VERBOSE if(1)//force verbose reply #else if(m_wnd->IsManaged()) #endif { //GSdx owns the window's title, be verbose. string s2 = m_regs->SMODE2.INT ? (string("Interlaced ") + (m_regs->SMODE2.FFMD ? "(frame)" : "(field)")) : "Progressive"; s = format( "%lld | %d x %d | %.2f fps (%d%%) | %s - %s | %s | %d S/%d P/%d D | %d%% CPU | %.2f | %.2f", m_perfmon.GetFrame(), GetInternalResolution().x, GetInternalResolution().y, fps, (int)(100.0 * fps / GetTvRefreshRate()), s2.c_str(), theApp.m_gs_interlace[m_interlace].name.c_str(), theApp.m_gs_aspectratio[m_aspectratio].name.c_str(), (int)m_perfmon.Get(GSPerfMon::SyncPoint), (int)m_perfmon.Get(GSPerfMon::Prim), (int)m_perfmon.Get(GSPerfMon::Draw), m_perfmon.CPU(), m_perfmon.Get(GSPerfMon::Swizzle) / 1024, m_perfmon.Get(GSPerfMon::Unswizzle) / 1024 ); double fillrate = m_perfmon.Get(GSPerfMon::Fillrate); if(fillrate > 0) { s += format(" | %.2f mpps", fps * fillrate / (1024 * 1024)); int sum = 0; for(int i = 0; i < 16; i++) { sum += m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i); } s += format(" | %d%% CPU", sum); } } else { // Satisfy PCSX2's request for title info: minimal verbosity due to more external title text s = format("%dx%d | %s", GetInternalResolution().x, GetInternalResolution().y, theApp.m_gs_interlace[m_interlace].name.c_str()); } if(m_capture.IsCapturing()) { s += " | Recording..."; } if(m_wnd->IsManaged()) { m_wnd->SetWindowText(s.c_str()); } else { // note: do not use TryEnterCriticalSection. It is unnecessary code complication in // an area that absolutely does not matter (even if it were 100 times slower, it wouldn't // be noticeable). Besides, these locks are extremely short -- overhead of conditional // is way more expensive than just waiting for the CriticalSection in 1 of 10,000,000 tries. --air std::lock_guard<std::mutex> lock(m_pGSsetTitle_Crit); strncpy(m_GStitleInfoBuffer, s.c_str(), countof(m_GStitleInfoBuffer) - 1); m_GStitleInfoBuffer[sizeof(m_GStitleInfoBuffer) - 1] = 0; // make sure null terminated even if text overflows } } else { // [TODO] // We don't have window title rights, or the window has no title, // so let's use actual OSD! } if(m_frameskip) { return; } // present #if 0 // This will scale the OSD to the PS2's output resolution. // Will be affected by 2x, 4x, etc scaling. m_dev->m_osd.m_real_size = m_real_size #elif 0 // This will scale the OSD to the window's size. // Will maintiain the font size no matter what size the window is. GSVector4i window_size = m_wnd->GetClientRect(); m_dev->m_osd.m_real_size.x = window_size.v[2]; m_dev->m_osd.m_real_size.y = window_size.v[3]; #else // This will scale the OSD to the native resolution. // Will size font relative to the window's size. // TODO this should probably be done with native calls m_dev->m_osd.m_real_size.x = 1024; m_dev->m_osd.m_real_size.y = 768; #endif m_dev->Present(m_wnd->GetClientRect().fit(m_aspectratio), m_shader); // snapshot if(!m_snapshot.empty()) { if(!m_dump && m_shift_key) { GSFreezeData fd = {0, nullptr}; Freeze(&fd, true); fd.data = new uint8[fd.size]; Freeze(&fd, false); #ifdef LZMA_SUPPORTED if (m_control_key) m_dump = std::unique_ptr<GSDumpBase>(new GSDump(m_snapshot, m_crc, fd, m_regs)); else m_dump = std::unique_ptr<GSDumpBase>(new GSDumpXz(m_snapshot, m_crc, fd, m_regs)); #else m_dump = std::unique_ptr<GSDumpBase>(new GSDump(m_snapshot, m_crc, fd, m_regs)); #endif delete [] fd.data; } if(GSTexture* t = m_dev->GetCurrent()) { t->Save(m_snapshot + ".bmp"); } m_snapshot.clear(); } else if(m_dump) { if(m_dump->VSync(field, !m_control_key, m_regs)) m_dump.reset(); } // capture if(m_capture.IsCapturing()) { if(GSTexture* current = m_dev->GetCurrent()) { GSVector2i size = m_capture.GetSize(); if(GSTexture* offscreen = m_dev->CopyOffscreen(current, GSVector4(0, 0, 1, 1), size.x, size.y)) { GSTexture::GSMap m; if(offscreen->Map(m)) { m_capture.DeliverFrame(m.bits, m.pitch, !m_dev->IsRBSwapped()); offscreen->Unmap(); } m_dev->Recycle(offscreen); } } } }
void GSRenderer::VSync(int field) { GSPerfMonAutoTimer pmat(&m_perfmon); m_perfmon.Put(GSPerfMon::Frame); Flush(); if(!m_dev->IsLost(true)) { if(!Merge(field ? 1 : 0)) { return; } } else { ResetDevice(); } m_dev->AgePool(); // osd if((m_perfmon.GetFrame() & 0x1f) == 0) { m_perfmon.Update(); double fps = 1000.0f / m_perfmon.Get(GSPerfMon::Frame); string s; #ifdef GSTITLEINFO_API_FORCE_VERBOSE if(1)//force verbose reply #else if(m_wnd->IsManaged()) #endif { //GSdx owns the window's title, be verbose. string s2 = m_regs->SMODE2.INT ? (string("Interlaced ") + (m_regs->SMODE2.FFMD ? "(frame)" : "(field)")) : "Progressive"; s = format( "%lld | %d x %d | %.2f fps (%d%%) | %s - %s | %s | %d S/%d P/%d D | %d%% CPU | %.2f | %.2f", m_perfmon.GetFrame(), GetInternalResolution().x, GetInternalResolution().y, fps, (int)(100.0 * fps / GetTvRefreshRate()), s2.c_str(), theApp.m_gs_interlace[m_interlace].name.c_str(), theApp.m_gs_aspectratio[m_aspectratio].name.c_str(), (int)m_perfmon.Get(GSPerfMon::SyncPoint), (int)m_perfmon.Get(GSPerfMon::Prim), (int)m_perfmon.Get(GSPerfMon::Draw), m_perfmon.CPU(), m_perfmon.Get(GSPerfMon::Swizzle) / 1024, m_perfmon.Get(GSPerfMon::Unswizzle) / 1024 ); double fillrate = m_perfmon.Get(GSPerfMon::Fillrate); if(fillrate > 0) { s += format(" | %.2f mpps", fps * fillrate / (1024 * 1024)); int sum = 0; for(int i = 0; i < 16; i++) { sum += m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i); } s += format(" | %d%% CPU", sum); } } else { // Satisfy PCSX2's request for title info: minimal verbosity due to more external title text s = format("%dx%d | %s", GetInternalResolution().x, GetInternalResolution().y, theApp.m_gs_interlace[m_interlace].name.c_str()); } if(m_capture.IsCapturing()) { s += " | Recording..."; } if(m_wnd->IsManaged()) { m_wnd->SetWindowText(s.c_str()); } else { // note: do not use TryEnterCriticalSection. It is unnecessary code complication in // an area that absolutely does not matter (even if it were 100 times slower, it wouldn't // be noticeable). Besides, these locks are extremely short -- overhead of conditional // is way more expensive than just waiting for the CriticalSection in 1 of 10,000,000 tries. --air std::lock_guard<std::mutex> lock(m_pGSsetTitle_Crit); strncpy(m_GStitleInfoBuffer, s.c_str(), countof(m_GStitleInfoBuffer) - 1); m_GStitleInfoBuffer[sizeof(m_GStitleInfoBuffer) - 1] = 0; // make sure null terminated even if text overflows } } else { // [TODO] // We don't have window title rights, or the window has no title, // so let's use actual OSD! } if(m_frameskip) { return; } // present m_dev->Present(m_wnd->GetClientRect().fit(m_aspectratio), m_shader); // snapshot if(!m_snapshot.empty()) { bool shift = false; #ifdef _WIN32 shift = !!(::GetAsyncKeyState(VK_SHIFT) & 0x8000); #else shift = m_shift_key; #endif if(!m_dump && shift) { GSFreezeData fd; fd.size = 0; fd.data = NULL; Freeze(&fd, true); fd.data = new uint8[fd.size]; Freeze(&fd, false); m_dump.Open(m_snapshot, m_crc, fd, m_regs); delete [] fd.data; } if(GSTexture* t = m_dev->GetCurrent()) { t->Save(m_snapshot + ".bmp"); } m_snapshot.clear(); } else { if(m_dump) { bool control = false; #ifdef _WIN32 control = !!(::GetAsyncKeyState(VK_CONTROL) & 0x8000); #else control = m_control_key; #endif m_dump.VSync(field, !control, m_regs); } } // capture if(m_capture.IsCapturing()) { if(GSTexture* current = m_dev->GetCurrent()) { GSVector2i size = m_capture.GetSize(); if(GSTexture* offscreen = m_dev->CopyOffscreen(current, GSVector4(0, 0, 1, 1), size.x, size.y)) { GSTexture::GSMap m; if(offscreen->Map(m)) { m_capture.DeliverFrame(m.bits, m.pitch, !m_dev->IsRBSwapped()); offscreen->Unmap(); } m_dev->Recycle(offscreen); } } } }
void DisplayDeviceOpenGL::render(const Renderable* r) const { if(!r->isEnabled()) { // Renderable item not enabled then early return. return; } StencilScopePtr stencil_scope; if(r->hasClipSettings()) { ModelManager2D mm(r->getPosition().x, r->getPosition().y); auto clip_shape = r->getStencilMask(); bool cam_set = false; if(clip_shape->getCamera() == nullptr && r->getCamera() != nullptr) { cam_set = true; clip_shape->setCamera(r->getCamera()); } stencil_scope.reset(new StencilScopeOGL(r->getStencilSettings())); glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); glDepthMask(GL_FALSE); glClear(GL_STENCIL_BUFFER_BIT); render(clip_shape.get()); stencil_scope->applyNewSettings(keep_stencil_settings); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthMask(GL_TRUE); if(cam_set) { clip_shape->setCamera(nullptr); } } auto shader = r->getShader(); shader->makeActive(); BlendEquationScopeOGL be_scope(*r); BlendModeScopeOGL bm_scope(*r); // apply lighting/depth check/depth write here. bool use_lighting = r->isLightingStateSet() ? r->useLighting() : false; // Set the depth enable. if(r->isDepthEnableStateSet()) { if(get_current_depth_enable() != r->isDepthEnabled()) { if(r->isDepthEnabled()) { glEnable(GL_DEPTH_TEST); } else { glDisable(GL_DEPTH_TEST); } get_current_depth_enable() = r->isDepthEnabled(); } } else { // We assume that depth is disabled if not specified. if(get_current_depth_enable() == true) { glDisable(GL_DEPTH_TEST); get_current_depth_enable() = false; } } glm::mat4 pmat(1.0f); glm::mat4 vmat(1.0f); if(r->getCamera()) { // set camera here. pmat = r->getCamera()->getProjectionMat(); vmat = r->getCamera()->getViewMat(); } else if(get_default_camera() != nullptr) { pmat = get_default_camera()->getProjectionMat(); vmat = get_default_camera()->getViewMat(); } if(use_lighting) { for(auto lp : r->getLights()) { /// xxx need to set lights here. } } if(r->getRenderTarget()) { r->getRenderTarget()->apply(); } if(shader->getPUniform() != ShaderProgram::INVALID_UNIFORM) { shader->setUniformValue(shader->getPUniform(), glm::value_ptr(pmat)); } if(shader->getMvUniform() != ShaderProgram::INVALID_UNIFORM) { glm::mat4 mvmat = vmat; if(is_global_model_matrix_valid() && !r->ignoreGlobalModelMatrix()) { mvmat *= get_global_model_matrix() * r->getModelMatrix(); } else { mvmat *= r->getModelMatrix(); } shader->setUniformValue(shader->getMvUniform(), glm::value_ptr(mvmat)); } if(shader->getMvpUniform() != ShaderProgram::INVALID_UNIFORM) { glm::mat4 pvmat(1.0f); if(is_global_model_matrix_valid() && !r->ignoreGlobalModelMatrix()) { pvmat = pmat * vmat * get_global_model_matrix() * r->getModelMatrix(); } else { pvmat = pmat * vmat * r->getModelMatrix(); } shader->setUniformValue(shader->getMvpUniform(), glm::value_ptr(pvmat)); } if(shader->getColorUniform() != ShaderProgram::INVALID_UNIFORM) { if(r->isColorSet()) { shader->setUniformValue(shader->getColorUniform(), r->getColor().asFloatVector()); } else { shader->setUniformValue(shader->getColorUniform(), ColorScope::getCurrentColor().asFloatVector()); } } shader->setUniformsForTexture(r->getTexture()); // XXX we should make this either or with setting the mvp/color uniforms above. auto uniform_draw_fn = shader->getUniformDrawFunction(); if(uniform_draw_fn) { uniform_draw_fn(shader); } // Loop through uniform render variables and set them. /*for(auto& urv : r->UniformRenderVariables()) { for(auto& rvd : urv->VariableDescritionList()) { auto rvdd = std::dynamic_pointer_cast<RenderVariableDeviceData>(rvd->GetDisplayData()); ASSERT_LOG(rvdd != nullptr, "Unable to cast DeviceData to RenderVariableDeviceData."); shader->SetUniformValue(rvdd->GetActiveMapIterator(), urv->Value()); } }*/ // Need to figure the interaction with shaders. /// XXX Need to create a mapping between attributes and the index value below. for(auto as : r->getAttributeSet()) { //ASSERT_LOG(as->getCount() > 0, "No (or negative) number of vertices in attribute set. " << as->getCount()); if((!as->isMultiDrawEnabled() && as->getCount() <= 0) || (as->isMultiDrawEnabled() && as->getMultiDrawCount() <= 0)) { //LOG_WARN("No (or negative) number of vertices in attribute set. " << as->getCount()); continue; } GLenum draw_mode = convert_drawing_mode(as->getDrawMode()); // apply blend, if any, from attribute set. BlendEquationScopeOGL be_scope(*as); BlendModeScopeOGL bm_scope(*as); if(shader->getColorUniform() != ShaderProgram::INVALID_UNIFORM && as->isColorSet()) { shader->setUniformValue(shader->getColorUniform(), as->getColor().asFloatVector()); } for(auto& attr : as->getAttributes()) { if(attr->isEnabled()) { shader->applyAttribute(attr); } } if(as->isInstanced()) { if(as->isIndexed()) { as->bindIndex(); // XXX as->GetIndexArray() should be as->GetIndexArray()+as->GetOffset() glDrawElementsInstanced(draw_mode, static_cast<GLsizei>(as->getCount()), convert_index_type(as->getIndexType()), as->getIndexArray(), as->getInstanceCount()); as->unbindIndex(); } else { glDrawArraysInstanced(draw_mode, static_cast<GLint>(as->getOffset()), static_cast<GLsizei>(as->getCount()), as->getInstanceCount()); } } else { if(as->isIndexed()) { as->bindIndex(); // XXX as->GetIndexArray() should be as->GetIndexArray()+as->GetOffset() glDrawElements(draw_mode, static_cast<GLsizei>(as->getCount()), convert_index_type(as->getIndexType()), as->getIndexArray()); as->unbindIndex(); } else { if(as->isMultiDrawEnabled()) { glMultiDrawArrays(draw_mode, as->getMultiOffsetArray().data(), as->getMultiCountArray().data(), as->getMultiDrawCount()); } else { glDrawArrays(draw_mode, static_cast<GLint>(as->getOffset()), static_cast<GLsizei>(as->getCount())); } } } shader->cleanUpAfterDraw(); glBindBuffer(GL_ARRAY_BUFFER, 0); } if(r->getRenderTarget()) { r->getRenderTarget()->unapply(); } }