static inline void CORE_cgetrf_reclap_update(const int M, const int column, const int n1, const int n2,
                                             PLASMA_Complex32_t *A, const int LDA, int *IPIV, 
                                             const int thidx, const int thcnt)
{
    static PLASMA_Complex32_t posone =  1.0;
    static PLASMA_Complex32_t negone = -1.0;
    PLASMA_Complex32_t *Atop  = A    + column*LDA;
    PLASMA_Complex32_t *Atop2 = Atop + n1    *LDA;
    int coff, ccnt, lm, loff;

    CORE_cbarrier_thread( thidx, thcnt );
    
    psplit( n2, thidx, thcnt, &coff, &ccnt );

    if (ccnt > 0) {
        CORE_claswap1( ccnt, Atop2 + coff*LDA, LDA, column, n1 + column, IPIV ); /* swap to the right */
        
        cblas_ctrsm( CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit,
                     n1, ccnt, CBLAS_SADDR(posone), Atop + column, LDA, Atop2 + coff*LDA + column, LDA );
    }
    
    /* __sync_synchronize(); */ /* hopefully we will not need memory fences */
    
    /* need to wait for pivoting and triangular solve to finish */
    CORE_cbarrier_thread( thidx, thcnt );
    
    psplit( M, thidx, thcnt, &loff, &lm );
    if (thidx == 0) {
        loff = column + n1;
        lm  -= column + n1;
    };
    
    cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, lm, n2, n1,
                 CBLAS_SADDR(negone), Atop+loff, LDA, Atop2 + column, LDA, CBLAS_SADDR(posone), Atop2+loff, LDA );
}
示例#2
0
pstring ppreprocessor::replace_macros(const pstring &line)
{
	std::vector<pstring> elems(psplit(line, m_expr_sep));
	pstring ret("");
	for (auto & elem : elems)
	{
		define_t *def = get_define(elem);
		ret += (def != nullptr) ? def->m_replace : elem;
	}
	return ret;
}
示例#3
0
pstring ppreprocessor::replace_macros(const pstring &line)
{
	std::vector<pstring> elems(psplit(line, m_expr_sep));
	pstringbuffer ret = "";
	for (auto & elem : elems)
	{
		define_t *def = get_define(elem);
		if (def != nullptr)
			ret.cat(def->m_replace);
		else
			ret.cat(elem);
	}
	return ret;
}
static void 
CORE_cgetrf_reclap_rec(const int M, const int N, 
                       PLASMA_Complex32_t *A, const int LDA, 
                       int *IPIV, int *info, 
                       const int thidx, const int thcnt, const int column)
{
    int jp, n1, n2, lm, loff;
    PLASMA_Complex32_t tmp1, tmp2, tmp3;
    PLASMA_Complex32_t *Atop = A + column*LDA;
    
    /* Assumption: N = min( M, N ); */
    if (N > 1) {
        int coff, ccnt;
        
        n1 = N / 2;
        n2 = N - n1;
        
        CORE_cgetrf_reclap_rec( M, n1, A, LDA, IPIV, info, 
                                thidx, thcnt, column );
        if ( *info != 0 )
            return;
        
        CORE_cgetrf_reclap_update(M, column, n1, n2,
                                  A, LDA, IPIV, 
                                  thidx, thcnt);
        
        CORE_cgetrf_reclap_rec( M, n2, A, LDA, IPIV, info, 
                                thidx, thcnt, column + n1 );
        if ( *info != 0 )
            return;
        
        psplit( n1, thidx, thcnt, &coff, &ccnt );
        
        if (ccnt > 0) {
            CORE_claswap1( ccnt, Atop+coff*LDA, LDA, n1 + column, N + column, IPIV ); /* swap to the left */
        }
        
    } else {
        int thrd;
        
        CORE_cbarrier_thread( thidx, thcnt );
        
        psplit( M, thidx, thcnt, &loff, &lm );
        
        if (thidx == 0) {
            loff = column;
            lm -= column;
        }
        
        tmp2 = Atop[column]; /* all threads read the pivot element in case they need it */
        
        jp = cblas_icamax( lm, Atop + loff, 1 );
        tmp1 = Atop[loff + jp];
        
        CORE_camax1_thread( tmp1, thidx, thcnt, &thrd, 
                            &tmp3, loff + jp + 1, IPIV + column );
        
        Atop[column] = tmp3; /* all threads set the pivot element: no need for synchronization */
        
        if ( tmp3 != 0.0 ) {
            if ( cabsf(tmp3) >= sfmin ) {
                PLASMA_Complex32_t tmp = (PLASMA_Complex32_t)1.0 / tmp3;
                n1 = (thidx == 0) ? 1 : 0;
                cblas_cscal( lm - n1, CBLAS_SADDR(tmp), Atop + loff + n1, 1 );
            } else {
                int i;
                PLASMA_Complex32_t *Atop2;
                n1 = (thidx == 0) ? 1 : 0;
                Atop2 = Atop + loff + n1;

                for( i=0; i < lm-n1; i++, Atop2++)
                    *Atop2 = *Atop2 / tmp3;
            }

            if (thrd == thidx) { /* the thread that owns the best pivot */
              if (loff + jp != column) /* if there is a need to exchange the pivot */
                Atop[loff + jp] = tmp2 / tmp3;
            }
        
        } else {
            *info = column + 1;
            return;
        }

        CORE_cbarrier_thread( thidx, thcnt );
    }
}
示例#5
0
pstring  ppreprocessor::process_line(pstring line)
{
	bool line_cont = plib::right(line, 1) == "\\";
	if (line_cont)
		line = plib::left(line, line.size() - 1);

	if (m_state == LINE_CONTINUATION)
		m_line += line;
	else
		m_line = line;

	if (line_cont)
	{
		m_state = LINE_CONTINUATION;
		return "";
	}
	else
		m_state = PROCESS;

	line = process_comments(m_line);

	pstring lt = plib::trim(plib::replace_all(line, pstring("\t"), pstring(" ")));
	pstring ret;
	// FIXME ... revise and extend macro handling
	if (plib::startsWith(lt, "#"))
	{
		std::vector<pstring> lti(psplit(lt, " ", true));
		if (lti[0] == "#if")
		{
			m_level++;
			std::size_t start = 0;
			lt = replace_macros(lt);
			std::vector<pstring> t(psplit(replace_all(lt.substr(3), pstring(" "), pstring("")), m_expr_sep));
			int val = static_cast<int>(expr(t, start, 255));
			if (val == 0)
				m_ifflag |= (1 << m_level);
		}
		else if (lti[0] == "#ifdef")
		{
			m_level++;
			if (get_define(lti[1]) == nullptr)
				m_ifflag |= (1 << m_level);
		}
		else if (lti[0] == "#ifndef")
		{
			m_level++;
			if (get_define(lti[1]) != nullptr)
				m_ifflag |= (1 << m_level);
		}
		else if (lti[0] == "#else")
		{
			m_ifflag ^= (1 << m_level);
		}
		else if (lti[0] == "#endif")
		{
			m_ifflag &= ~(1 << m_level);
			m_level--;
		}
		else if (lti[0] == "#include")
		{
			// ignore
		}
		else if (lti[0] == "#pragma")
		{
			if (m_ifflag == 0 && lti.size() > 3 && lti[1] == "NETLIST")
			{
				if (lti[2] == "warning")
					error("NETLIST: " + catremainder(lti, 3, " "));
			}
		}
		else if (lti[0] == "#define")
		{
			if (m_ifflag == 0)
			{
				if (lti.size() != 3)
					error("PREPRO: only simple defines allowed: " + line);
				m_defines.insert({lti[1], define_t(lti[1], lti[2])});
			}
		}
		else
		{
			if (m_ifflag == 0)
				error(pfmt("unknown directive on line {1}: {2}")(m_lineno)(replace_macros(line)));
		}
	}
	else
	{
		lt = replace_macros(lt);
		if (m_ifflag == 0)
			ret += lt;
	}
	return ret;
}
示例#6
0
pstring  ppreprocessor::process_line(const pstring &line)
{
	pstring lt = line.replace("\t"," ").trim();
	pstringbuffer ret;
	m_lineno++;
	// FIXME ... revise and extend macro handling
	if (lt.startsWith("#"))
	{
		std::vector<pstring> lti(psplit(lt, " ", true));
		if (lti[0].equals("#if"))
		{
			m_level++;
			std::size_t start = 0;
			lt = replace_macros(lt);
			std::vector<pstring> t(psplit(lt.substr(3).replace(" ",""), m_expr_sep));
			int val = static_cast<int>(expr(t, start, 0));
			if (val == 0)
				m_ifflag |= (1 << m_level);
		}
		else if (lti[0].equals("#ifdef"))
		{
			m_level++;
			if (get_define(lti[1]) == nullptr)
				m_ifflag |= (1 << m_level);
		}
		else if (lti[0].equals("#ifndef"))
		{
			m_level++;
			if (get_define(lti[1]) != nullptr)
				m_ifflag |= (1 << m_level);
		}
		else if (lti[0].equals("#else"))
		{
			m_ifflag ^= (1 << m_level);
		}
		else if (lti[0].equals("#endif"))
		{
			m_ifflag &= ~(1 << m_level);
			m_level--;
		}
		else if (lti[0].equals("#include"))
		{
			// ignore
		}
		else if (lti[0].equals("#pragma"))
		{
			if (m_ifflag == 0 && lti.size() > 3 && lti[1].equals("NETLIST"))
			{
				if (lti[2].equals("warning"))
					error("NETLIST: " + catremainder(lti, 3, " "));
			}
		}
		else if (lti[0].equals("#define"))
		{
			if (m_ifflag == 0)
			{
				if (lti.size() != 3)
					error("PREPRO: only simple defines allowed: " + line);
				m_defines.insert({lti[1], define_t(lti[1], lti[2])});
			}
		}
		else
			error(pfmt("unknown directive on line {1}: {2}")(m_lineno)(line));
	}
	else
	{
		lt = replace_macros(lt);
		if (m_ifflag == 0)
		{
			ret.cat(lt);
		}
	}
	return ret;
}