static inline void CORE_cgetrf_reclap_update(const int M, const int column, const int n1, const int n2, PLASMA_Complex32_t *A, const int LDA, int *IPIV, const int thidx, const int thcnt) { static PLASMA_Complex32_t posone = 1.0; static PLASMA_Complex32_t negone = -1.0; PLASMA_Complex32_t *Atop = A + column*LDA; PLASMA_Complex32_t *Atop2 = Atop + n1 *LDA; int coff, ccnt, lm, loff; CORE_cbarrier_thread( thidx, thcnt ); psplit( n2, thidx, thcnt, &coff, &ccnt ); if (ccnt > 0) { CORE_claswap1( ccnt, Atop2 + coff*LDA, LDA, column, n1 + column, IPIV ); /* swap to the right */ cblas_ctrsm( CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, n1, ccnt, CBLAS_SADDR(posone), Atop + column, LDA, Atop2 + coff*LDA + column, LDA ); } /* __sync_synchronize(); */ /* hopefully we will not need memory fences */ /* need to wait for pivoting and triangular solve to finish */ CORE_cbarrier_thread( thidx, thcnt ); psplit( M, thidx, thcnt, &loff, &lm ); if (thidx == 0) { loff = column + n1; lm -= column + n1; }; cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, lm, n2, n1, CBLAS_SADDR(negone), Atop+loff, LDA, Atop2 + column, LDA, CBLAS_SADDR(posone), Atop2+loff, LDA ); }
pstring ppreprocessor::replace_macros(const pstring &line) { std::vector<pstring> elems(psplit(line, m_expr_sep)); pstring ret(""); for (auto & elem : elems) { define_t *def = get_define(elem); ret += (def != nullptr) ? def->m_replace : elem; } return ret; }
pstring ppreprocessor::replace_macros(const pstring &line) { std::vector<pstring> elems(psplit(line, m_expr_sep)); pstringbuffer ret = ""; for (auto & elem : elems) { define_t *def = get_define(elem); if (def != nullptr) ret.cat(def->m_replace); else ret.cat(elem); } return ret; }
static void CORE_cgetrf_reclap_rec(const int M, const int N, PLASMA_Complex32_t *A, const int LDA, int *IPIV, int *info, const int thidx, const int thcnt, const int column) { int jp, n1, n2, lm, loff; PLASMA_Complex32_t tmp1, tmp2, tmp3; PLASMA_Complex32_t *Atop = A + column*LDA; /* Assumption: N = min( M, N ); */ if (N > 1) { int coff, ccnt; n1 = N / 2; n2 = N - n1; CORE_cgetrf_reclap_rec( M, n1, A, LDA, IPIV, info, thidx, thcnt, column ); if ( *info != 0 ) return; CORE_cgetrf_reclap_update(M, column, n1, n2, A, LDA, IPIV, thidx, thcnt); CORE_cgetrf_reclap_rec( M, n2, A, LDA, IPIV, info, thidx, thcnt, column + n1 ); if ( *info != 0 ) return; psplit( n1, thidx, thcnt, &coff, &ccnt ); if (ccnt > 0) { CORE_claswap1( ccnt, Atop+coff*LDA, LDA, n1 + column, N + column, IPIV ); /* swap to the left */ } } else { int thrd; CORE_cbarrier_thread( thidx, thcnt ); psplit( M, thidx, thcnt, &loff, &lm ); if (thidx == 0) { loff = column; lm -= column; } tmp2 = Atop[column]; /* all threads read the pivot element in case they need it */ jp = cblas_icamax( lm, Atop + loff, 1 ); tmp1 = Atop[loff + jp]; CORE_camax1_thread( tmp1, thidx, thcnt, &thrd, &tmp3, loff + jp + 1, IPIV + column ); Atop[column] = tmp3; /* all threads set the pivot element: no need for synchronization */ if ( tmp3 != 0.0 ) { if ( cabsf(tmp3) >= sfmin ) { PLASMA_Complex32_t tmp = (PLASMA_Complex32_t)1.0 / tmp3; n1 = (thidx == 0) ? 1 : 0; cblas_cscal( lm - n1, CBLAS_SADDR(tmp), Atop + loff + n1, 1 ); } else { int i; PLASMA_Complex32_t *Atop2; n1 = (thidx == 0) ? 1 : 0; Atop2 = Atop + loff + n1; for( i=0; i < lm-n1; i++, Atop2++) *Atop2 = *Atop2 / tmp3; } if (thrd == thidx) { /* the thread that owns the best pivot */ if (loff + jp != column) /* if there is a need to exchange the pivot */ Atop[loff + jp] = tmp2 / tmp3; } } else { *info = column + 1; return; } CORE_cbarrier_thread( thidx, thcnt ); } }
pstring ppreprocessor::process_line(pstring line) { bool line_cont = plib::right(line, 1) == "\\"; if (line_cont) line = plib::left(line, line.size() - 1); if (m_state == LINE_CONTINUATION) m_line += line; else m_line = line; if (line_cont) { m_state = LINE_CONTINUATION; return ""; } else m_state = PROCESS; line = process_comments(m_line); pstring lt = plib::trim(plib::replace_all(line, pstring("\t"), pstring(" "))); pstring ret; // FIXME ... revise and extend macro handling if (plib::startsWith(lt, "#")) { std::vector<pstring> lti(psplit(lt, " ", true)); if (lti[0] == "#if") { m_level++; std::size_t start = 0; lt = replace_macros(lt); std::vector<pstring> t(psplit(replace_all(lt.substr(3), pstring(" "), pstring("")), m_expr_sep)); int val = static_cast<int>(expr(t, start, 255)); if (val == 0) m_ifflag |= (1 << m_level); } else if (lti[0] == "#ifdef") { m_level++; if (get_define(lti[1]) == nullptr) m_ifflag |= (1 << m_level); } else if (lti[0] == "#ifndef") { m_level++; if (get_define(lti[1]) != nullptr) m_ifflag |= (1 << m_level); } else if (lti[0] == "#else") { m_ifflag ^= (1 << m_level); } else if (lti[0] == "#endif") { m_ifflag &= ~(1 << m_level); m_level--; } else if (lti[0] == "#include") { // ignore } else if (lti[0] == "#pragma") { if (m_ifflag == 0 && lti.size() > 3 && lti[1] == "NETLIST") { if (lti[2] == "warning") error("NETLIST: " + catremainder(lti, 3, " ")); } } else if (lti[0] == "#define") { if (m_ifflag == 0) { if (lti.size() != 3) error("PREPRO: only simple defines allowed: " + line); m_defines.insert({lti[1], define_t(lti[1], lti[2])}); } } else { if (m_ifflag == 0) error(pfmt("unknown directive on line {1}: {2}")(m_lineno)(replace_macros(line))); } } else { lt = replace_macros(lt); if (m_ifflag == 0) ret += lt; } return ret; }
pstring ppreprocessor::process_line(const pstring &line) { pstring lt = line.replace("\t"," ").trim(); pstringbuffer ret; m_lineno++; // FIXME ... revise and extend macro handling if (lt.startsWith("#")) { std::vector<pstring> lti(psplit(lt, " ", true)); if (lti[0].equals("#if")) { m_level++; std::size_t start = 0; lt = replace_macros(lt); std::vector<pstring> t(psplit(lt.substr(3).replace(" ",""), m_expr_sep)); int val = static_cast<int>(expr(t, start, 0)); if (val == 0) m_ifflag |= (1 << m_level); } else if (lti[0].equals("#ifdef")) { m_level++; if (get_define(lti[1]) == nullptr) m_ifflag |= (1 << m_level); } else if (lti[0].equals("#ifndef")) { m_level++; if (get_define(lti[1]) != nullptr) m_ifflag |= (1 << m_level); } else if (lti[0].equals("#else")) { m_ifflag ^= (1 << m_level); } else if (lti[0].equals("#endif")) { m_ifflag &= ~(1 << m_level); m_level--; } else if (lti[0].equals("#include")) { // ignore } else if (lti[0].equals("#pragma")) { if (m_ifflag == 0 && lti.size() > 3 && lti[1].equals("NETLIST")) { if (lti[2].equals("warning")) error("NETLIST: " + catremainder(lti, 3, " ")); } } else if (lti[0].equals("#define")) { if (m_ifflag == 0) { if (lti.size() != 3) error("PREPRO: only simple defines allowed: " + line); m_defines.insert({lti[1], define_t(lti[1], lti[2])}); } } else error(pfmt("unknown directive on line {1}: {2}")(m_lineno)(line)); } else { lt = replace_macros(lt); if (m_ifflag == 0) { ret.cat(lt); } } return ret; }