static inline void binary_op(CallEnv env, Link (*bin_op)(Link, Link)){ Link b = stack_pop(env->stack); Link a = stack_pop(env->stack); Link link = bin_op(a,b); link_free(a); link_free(b); stack_push(env->stack, link); }
void test_3D_sort(unsigned int n) { typedef Kokkos::View<KeyType*[3],ExecutionSpace > KeyViewType; KeyViewType keys("Keys",n*n*n); Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931); Kokkos::fill_random(keys,g,100.0); double sum_before = 0.0; double sum_after = 0.0; unsigned int sort_fails = 0; Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_before); int bin_1d = 1; while( bin_1d*bin_1d*bin_1d*4< (int) keys.dimension_0() ) bin_1d*=2; int bin_max[3] = {bin_1d,bin_1d,bin_1d}; typename KeyViewType::value_type min[3] = {0,0,0}; typename KeyViewType::value_type max[3] = {100,100,100}; typedef Kokkos::BinOp3D< KeyViewType > BinOp; BinOp bin_op(bin_max,min,max); Kokkos::BinSort< KeyViewType , BinOp > Sorter(keys,bin_op,false); Sorter.create_permute_vector(); Sorter.template sort< KeyViewType >(keys); Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_after); Kokkos::parallel_reduce(keys.dimension_0()-1,bin3d_is_sorted_struct<ExecutionSpace, KeyType>(keys,bin_1d,min[0],max[0]),sort_fails); double ratio = sum_before/sum_after; double epsilon = 1e-10; unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; if ( sort_fails ) printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails); ASSERT_EQ(sort_fails,0); ASSERT_EQ(equal_sum,1); }
// Compute C = A (binary_op) B for CSR matrices that are in the // canonical CSR format. Matrix dimensions of A and B should be the // same. C will be in canonical format as well. void csr_binop_csr_canonical( const CSRMatrix &A, const CSRMatrix &B, CSRMatrix &C, RCP<const Basic>(&bin_op)(const RCP<const Basic> &, const RCP<const Basic> &)) { SYMENGINE_ASSERT(A.row_ == B.row_ and A.col_ == B.col_ and C.row_ == A.row_ and C.col_ == A.col_); // Method that works for canonical CSR matrices C.p_[0] = 0; unsigned nnz = 0; unsigned A_pos, B_pos, A_end, B_end; for (unsigned i = 0; i < A.row_; i++) { A_pos = A.p_[i]; B_pos = B.p_[i]; A_end = A.p_[i + 1]; B_end = B.p_[i + 1]; // while not finished with either row while (A_pos < A_end and B_pos < B_end) { unsigned A_j = A.j_[A_pos]; unsigned B_j = B.j_[B_pos]; if (A_j == B_j) { RCP<const Basic> result = bin_op(A.x_[A_pos], B.x_[B_pos]); if (neq(*result, *zero)) { C.j_.push_back(A_j); C.x_.push_back(result); nnz++; } A_pos++; B_pos++; } else if (A_j < B_j) { RCP<const Basic> result = bin_op(A.x_[A_pos], zero); if (neq(*result, *zero)) { C.j_.push_back(A_j); C.x_.push_back(result); nnz++; } A_pos++; } else { // B_j < A_j RCP<const Basic> result = bin_op(zero, B.x_[B_pos]); if (neq(*result, *zero)) { C.j_.push_back(B_j); C.x_.push_back(result); nnz++; } B_pos++; } } // tail while (A_pos < A_end) { RCP<const Basic> result = bin_op(A.x_[A_pos], zero); if (neq(*result, *zero)) { C.j_.push_back(A.j_[A_pos]); C.x_.push_back(result); nnz++; } A_pos++; } while (B_pos < B_end) { RCP<const Basic> result = bin_op(zero, B.x_[B_pos]); if (neq(*result, *zero)) { C.j_.push_back(B.j_[B_pos]); C.x_.push_back(result); nnz++; } B_pos++; } C.p_[i + 1] = nnz; } // It's enough to check for duplicates as the column indices // remain sorted after the above operations if (CSRMatrix::csr_has_duplicates(C.p_, C.j_, A.row_)) CSRMatrix::csr_sum_duplicates(C.p_, C.j_, C.x_, A.row_); }
/* * Walk an expression tree, filling in operand values from the * pmResult at the leaf nodes and propagating the computed values * towards the root node of the tree. */ static int eval_expr(node_t *np, pmResult *rp, int level) { int sts; int i; int j; int k; size_t need; assert(np != NULL); if (np->left != NULL) { sts = eval_expr(np->left, rp, level+1); if (sts < 0) return sts; } if (np->right != NULL) { sts = eval_expr(np->right, rp, level+1); if (sts < 0) return sts; } /* mostly, np->left is not NULL ... */ assert (np->type == L_NUMBER || np->type == L_NAME || np->left != NULL); switch (np->type) { case L_NUMBER: if (np->info->numval == 0) { /* initialize ivlist[] for singular instance first time through */ np->info->numval = 1; if ((np->info->ivlist = (val_t *)malloc(sizeof(val_t))) == NULL) { __pmNoMem("eval_expr: number ivlist", sizeof(val_t), PM_FATAL_ERR); /*NOTREACHED*/ } np->info->ivlist[0].inst = PM_INDOM_NULL; /* don't need error checking, done in the lexical scanner */ np->info->ivlist[0].value.l = atoi(np->value); } return 1; break; case L_DELTA: case L_RATE: /* * this and the last values are in the left expr */ np->info->last_stamp = np->info->stamp; np->info->stamp = rp->timestamp; free_ivlist(np); np->info->numval = np->left->info->numval <= np->left->info->last_numval ? np->left->info->numval : np->left->info->last_numval; if (np->info->numval <= 0) return np->info->numval; if ((np->info->ivlist = (val_t *)malloc(np->info->numval*sizeof(val_t))) == NULL) { __pmNoMem("eval_expr: delta()/rate() ivlist", np->info->numval*sizeof(val_t), PM_FATAL_ERR); /*NOTREACHED*/ } /* * delta() * ivlist[k] = left->ivlist[i] - left->last_ivlist[j] * rate() * ivlist[k] = (left->ivlist[i] - left->last_ivlist[j]) / * (timestamp - left->last_stamp) */ for (i = k = 0; i < np->left->info->numval; i++) { j = i; if (j >= np->left->info->last_numval) j = 0; if (np->left->info->ivlist[i].inst != np->left->info->last_ivlist[j].inst) { /* current ith inst != last jth inst ... search in last */ #ifdef PCP_DEBUG if ((pmDebug & DBG_TRACE_DERIVE) && (pmDebug & DBG_TRACE_APPL2)) { fprintf(stderr, "eval_expr: inst[%d] mismatch left [%d]=%d last [%d]=%d\n", k, i, np->left->info->ivlist[i].inst, j, np->left->info->last_ivlist[j].inst); } #endif for (j = 0; j < np->left->info->last_numval; j++) { if (np->left->info->ivlist[i].inst == np->left->info->last_ivlist[j].inst) break; } if (j == np->left->info->last_numval) { /* no match, skip this instance from this result */ continue; } #ifdef PCP_DEBUG else { if ((pmDebug & DBG_TRACE_DERIVE) && (pmDebug & DBG_TRACE_APPL2)) { fprintf(stderr, "eval_expr: recover @ last [%d]=%d\n", j, np->left->info->last_ivlist[j].inst); } } #endif } np->info->ivlist[k].inst = np->left->info->ivlist[i].inst; if (np->type == L_DELTA) { /* for delta() result type == operand type */ switch (np->left->desc.type) { case PM_TYPE_32: np->info->ivlist[k].value.l = np->left->info->ivlist[i].value.l - np->left->info->last_ivlist[j].value.l; break; case PM_TYPE_U32: np->info->ivlist[k].value.ul = np->left->info->ivlist[i].value.ul - np->left->info->last_ivlist[j].value.ul; break; case PM_TYPE_64: np->info->ivlist[k].value.ll = np->left->info->ivlist[i].value.ll - np->left->info->last_ivlist[j].value.ll; break; case PM_TYPE_U64: np->info->ivlist[k].value.ull = np->left->info->ivlist[i].value.ull - np->left->info->last_ivlist[j].value.ull; break; case PM_TYPE_FLOAT: np->info->ivlist[k].value.f = np->left->info->ivlist[i].value.f - np->left->info->last_ivlist[j].value.f; break; case PM_TYPE_DOUBLE: np->info->ivlist[k].value.d = np->left->info->ivlist[i].value.d - np->left->info->last_ivlist[j].value.d; break; default: /* * Nothing should end up here as check_expr() checks * for numeric data type at bind time */ return PM_ERR_CONV; } } else { /* rate() conversion, type will be DOUBLE */ struct timeval stampdiff; stampdiff = np->info->stamp; __pmtimevalDec(&stampdiff, &np->info->last_stamp); switch (np->left->desc.type) { case PM_TYPE_32: np->info->ivlist[k].value.d = (double)(np->left->info->ivlist[i].value.l - np->left->info->last_ivlist[j].value.l); break; case PM_TYPE_U32: np->info->ivlist[k].value.d = (double)(np->left->info->ivlist[i].value.ul - np->left->info->last_ivlist[j].value.ul); break; case PM_TYPE_64: np->info->ivlist[k].value.d = (double)(np->left->info->ivlist[i].value.ll - np->left->info->last_ivlist[j].value.ll); break; case PM_TYPE_U64: np->info->ivlist[k].value.d = (double)(np->left->info->ivlist[i].value.ull - np->left->info->last_ivlist[j].value.ull); break; case PM_TYPE_FLOAT: np->info->ivlist[k].value.d = (double)(np->left->info->ivlist[i].value.f - np->left->info->last_ivlist[j].value.f); break; case PM_TYPE_DOUBLE: np->info->ivlist[k].value.d = np->left->info->ivlist[i].value.d - np->left->info->last_ivlist[j].value.d; break; default: /* * Nothing should end up here as check_expr() checks * for numeric data type at bind time */ return PM_ERR_CONV; } np->info->ivlist[k].value.d /= __pmtimevalToReal(&stampdiff); /* * check_expr() ensures dimTime is 0 or 1 at bind time */ if (np->left->desc.units.dimTime == 1) { /* scale rate(time counter) -> time utilization */ if (np->info->time_scale < 0) { /* * one trip initialization for time utilization * scaling factor (to scale metric from counter * units into seconds) */ int i; np->info->time_scale = 1; if (np->left->desc.units.scaleTime > PM_TIME_SEC) { for (i = PM_TIME_SEC; i < np->left->desc.units.scaleTime; i++) np->info->time_scale *= 60; } else { for (i = np->left->desc.units.scaleTime; i < PM_TIME_SEC; i++) np->info->time_scale /= 1000; } } np->info->ivlist[k].value.d *= np->info->time_scale; } } k++; } np->info->numval = k; return np->info->numval; break; case L_INSTANT: /* * values are in the left expr */ np->info->last_stamp = np->info->stamp; np->info->stamp = rp->timestamp; np->info->numval = np->left->info->numval; if (np->info->numval > 0) np->info->ivlist = np->left->info->ivlist; return np->info->numval; break; case L_AVG: case L_COUNT: case L_SUM: case L_MAX: case L_MIN: if (np->info->ivlist == NULL) { /* initialize ivlist[] for singular instance first time through */ if ((np->info->ivlist = (val_t *)malloc(sizeof(val_t))) == NULL) { __pmNoMem("eval_expr: aggr ivlist", sizeof(val_t), PM_FATAL_ERR); /*NOTREACHED*/ } np->info->ivlist[0].inst = PM_IN_NULL; } /* * values are in the left expr */ if (np->type == L_COUNT) { np->info->numval = 1; np->info->ivlist[0].value.l = np->left->info->numval; } else { np->info->numval = 1; if (np->type == L_AVG) np->info->ivlist[0].value.f = 0; else if (np->type == L_SUM) { switch (np->desc.type) { case PM_TYPE_32: np->info->ivlist[0].value.l = 0; break; case PM_TYPE_U32: np->info->ivlist[0].value.ul = 0; break; case PM_TYPE_64: np->info->ivlist[0].value.ll = 0; break; case PM_TYPE_U64: np->info->ivlist[0].value.ull = 0; break; case PM_TYPE_FLOAT: np->info->ivlist[0].value.f = 0; break; case PM_TYPE_DOUBLE: np->info->ivlist[0].value.d = 0; break; } } for (i = 0; i < np->left->info->numval; i++) { switch (np->type) { case L_AVG: switch (np->left->desc.type) { case PM_TYPE_32: np->info->ivlist[0].value.f += (float)np->left->info->ivlist[i].value.l / np->left->info->numval; break; case PM_TYPE_U32: np->info->ivlist[0].value.f += (float)np->left->info->ivlist[i].value.ul / np->left->info->numval; break; case PM_TYPE_64: np->info->ivlist[0].value.f += (float)np->left->info->ivlist[i].value.ll / np->left->info->numval; break; case PM_TYPE_U64: np->info->ivlist[0].value.f += (float)np->left->info->ivlist[i].value.ull / np->left->info->numval; break; case PM_TYPE_FLOAT: np->info->ivlist[0].value.f += (float)np->left->info->ivlist[i].value.f / np->left->info->numval; break; case PM_TYPE_DOUBLE: np->info->ivlist[0].value.f += (float)np->left->info->ivlist[i].value.d / np->left->info->numval; break; default: /* * check_expr() checks for numeric data * type at bind time ... if here, botch! */ return PM_ERR_CONV; } break; case L_MAX: switch (np->desc.type) { case PM_TYPE_32: if (i == 0 || np->info->ivlist[0].value.l < np->left->info->ivlist[i].value.l) np->info->ivlist[0].value.l = np->left->info->ivlist[i].value.l; break; case PM_TYPE_U32: if (i == 0 || np->info->ivlist[0].value.ul < np->left->info->ivlist[i].value.ul) np->info->ivlist[0].value.ul = np->left->info->ivlist[i].value.ul; break; case PM_TYPE_64: if (i == 0 || np->info->ivlist[0].value.ll < np->left->info->ivlist[i].value.ll) np->info->ivlist[0].value.ll = np->left->info->ivlist[i].value.ll; break; case PM_TYPE_U64: if (i == 0 || np->info->ivlist[0].value.ull < np->left->info->ivlist[i].value.ull) np->info->ivlist[0].value.ull = np->left->info->ivlist[i].value.ull; break; case PM_TYPE_FLOAT: if (i == 0 || np->info->ivlist[0].value.f < np->left->info->ivlist[i].value.f) np->info->ivlist[0].value.f = np->left->info->ivlist[i].value.f; break; case PM_TYPE_DOUBLE: if (i == 0 || np->info->ivlist[0].value.d < np->left->info->ivlist[i].value.d) np->info->ivlist[0].value.d = np->left->info->ivlist[i].value.d; break; default: /* * check_expr() checks for numeric data * type at bind time ... if here, botch! */ return PM_ERR_CONV; } break; case L_MIN: switch (np->desc.type) { case PM_TYPE_32: if (i == 0 || np->info->ivlist[0].value.l > np->left->info->ivlist[i].value.l) np->info->ivlist[0].value.l = np->left->info->ivlist[i].value.l; break; case PM_TYPE_U32: if (i == 0 || np->info->ivlist[0].value.ul > np->left->info->ivlist[i].value.ul) np->info->ivlist[0].value.ul = np->left->info->ivlist[i].value.ul; break; case PM_TYPE_64: if (i == 0 || np->info->ivlist[0].value.ll > np->left->info->ivlist[i].value.ll) np->info->ivlist[0].value.ll = np->left->info->ivlist[i].value.ll; break; case PM_TYPE_U64: if (i == 0 || np->info->ivlist[0].value.ull > np->left->info->ivlist[i].value.ull) np->info->ivlist[0].value.ull = np->left->info->ivlist[i].value.ull; break; case PM_TYPE_FLOAT: if (i == 0 || np->info->ivlist[0].value.f > np->left->info->ivlist[i].value.f) np->info->ivlist[0].value.f = np->left->info->ivlist[i].value.f; break; case PM_TYPE_DOUBLE: if (i == 0 || np->info->ivlist[0].value.d > np->left->info->ivlist[i].value.d) np->info->ivlist[0].value.d = np->left->info->ivlist[i].value.d; break; default: /* * check_expr() checks for numeric data * type at bind time ... if here, botch! */ return PM_ERR_CONV; } break; case L_SUM: switch (np->desc.type) { case PM_TYPE_32: np->info->ivlist[0].value.l += np->left->info->ivlist[i].value.l; break; case PM_TYPE_U32: np->info->ivlist[0].value.ul += np->left->info->ivlist[i].value.ul; break; case PM_TYPE_64: np->info->ivlist[0].value.ll += np->left->info->ivlist[i].value.ll; break; case PM_TYPE_U64: np->info->ivlist[0].value.ull += np->left->info->ivlist[i].value.ull; break; case PM_TYPE_FLOAT: np->info->ivlist[0].value.f += np->left->info->ivlist[i].value.f; break; case PM_TYPE_DOUBLE: np->info->ivlist[0].value.d += np->left->info->ivlist[i].value.d; break; default: /* * check_expr() checks for numeric data * type at bind time ... if here, botch! */ return PM_ERR_CONV; } break; } } } return np->info->numval; break; case L_NAME: /* * Extract instance-values from pmResult and store them in * ivlist[] as <int, pmAtomValue> pairs */ for (j = 0; j < rp->numpmid; j++) { if (np->info->pmid == rp->vset[j]->pmid) { free_ivlist(np); np->info->numval = rp->vset[j]->numval; if (np->info->numval <= 0) return np->info->numval; if ((np->info->ivlist = (val_t *)malloc(np->info->numval*sizeof(val_t))) == NULL) { __pmNoMem("eval_expr: metric ivlist", np->info->numval*sizeof(val_t), PM_FATAL_ERR); /*NOTREACHED*/ } for (i = 0; i < np->info->numval; i++) { np->info->ivlist[i].inst = rp->vset[j]->vlist[i].inst; switch (np->desc.type) { case PM_TYPE_32: case PM_TYPE_U32: np->info->ivlist[i].value.l = rp->vset[j]->vlist[i].value.lval; break; case PM_TYPE_64: case PM_TYPE_U64: memcpy((void *)&np->info->ivlist[i].value.ll, (void *)rp->vset[j]->vlist[i].value.pval->vbuf, sizeof(__int64_t)); break; case PM_TYPE_FLOAT: if (rp->vset[j]->valfmt == PM_VAL_INSITU) { /* old style insitu float */ np->info->ivlist[i].value.l = rp->vset[j]->vlist[i].value.lval; } else { assert(rp->vset[j]->vlist[i].value.pval->vtype == PM_TYPE_FLOAT); memcpy((void *)&np->info->ivlist[i].value.f, (void *)rp->vset[j]->vlist[i].value.pval->vbuf, sizeof(float)); } break; case PM_TYPE_DOUBLE: memcpy((void *)&np->info->ivlist[i].value.d, (void *)rp->vset[j]->vlist[i].value.pval->vbuf, sizeof(double)); break; case PM_TYPE_STRING: need = rp->vset[j]->vlist[i].value.pval->vlen-PM_VAL_HDR_SIZE; if ((np->info->ivlist[i].value.cp = (char *)malloc(need)) == NULL) { __pmNoMem("eval_expr: string value", rp->vset[j]->vlist[i].value.pval->vlen, PM_FATAL_ERR); /*NOTREACHED*/ } memcpy((void *)np->info->ivlist[i].value.cp, (void *)rp->vset[j]->vlist[i].value.pval->vbuf, need); np->info->ivlist[i].vlen = need; break; case PM_TYPE_AGGREGATE: case PM_TYPE_AGGREGATE_STATIC: case PM_TYPE_EVENT: case PM_TYPE_HIGHRES_EVENT: if ((np->info->ivlist[i].value.vbp = (pmValueBlock *)malloc(rp->vset[j]->vlist[i].value.pval->vlen)) == NULL) { __pmNoMem("eval_expr: aggregate value", rp->vset[j]->vlist[i].value.pval->vlen, PM_FATAL_ERR); /*NOTREACHED*/ } memcpy(np->info->ivlist[i].value.vbp, (void *)rp->vset[j]->vlist[i].value.pval, rp->vset[j]->vlist[i].value.pval->vlen); np->info->ivlist[i].vlen = rp->vset[j]->vlist[i].value.pval->vlen; break; default: /* * really only PM_TYPE_NOSUPPORT should * end up here */ return PM_ERR_TYPE; } } return np->info->numval; } } #ifdef PCP_DEBUG if (pmDebug & DBG_TRACE_DERIVE) { char strbuf[20]; fprintf(stderr, "eval_expr: botch: operand %s not in the extended pmResult\n", pmIDStr_r(np->info->pmid, strbuf, sizeof(strbuf))); __pmDumpResult(stderr, rp); } #endif return PM_ERR_PMID; case L_ANON: /* no values available for anonymous metrics */ return 0; default: /* * binary operator cases ... always have a left and right * operand and no errors (these are caught earlier when the * recursive call on each of the operands would may have * returned an error */ assert(np->left != NULL); assert(np->right != NULL); free_ivlist(np); /* * empty result cases first */ if (np->left->info->numval == 0) { np->info->numval = 0; return np->info->numval; } if (np->right->info->numval == 0) { np->info->numval = 0; return np->info->numval; } /* * really got some work to do ... */ if (np->left->desc.indom == PM_INDOM_NULL) np->info->numval = np->right->info->numval; else if (np->right->desc.indom == PM_INDOM_NULL) np->info->numval = np->left->info->numval; else { /* * Generally have the same number of instances because * both operands are over the same instance domain, * fetched with the same profile. When not the case, * the result can contain no more instances than in * the smaller of the operands. */ if (np->left->info->numval <= np->right->info->numval) np->info->numval = np->left->info->numval; else np->info->numval = np->right->info->numval; } if ((np->info->ivlist = (val_t *)malloc(np->info->numval*sizeof(val_t))) == NULL) { __pmNoMem("eval_expr: expr ivlist", np->info->numval*sizeof(val_t), PM_FATAL_ERR); /*NOTREACHED*/ } /* * ivlist[k] = left-ivlist[i] <op> right-ivlist[j] */ for (i = j = k = 0; k < np->info->numval; ) { if (i >= np->left->info->numval || j >= np->right->info->numval) { /* run out of operand instances, quit */ np->info->numval = k; break; } if (np->left->desc.indom != PM_INDOM_NULL && np->right->desc.indom != PM_INDOM_NULL) { if (np->left->info->ivlist[i].inst != np->right->info->ivlist[j].inst) { /* left ith inst != right jth inst ... search in right */ #ifdef PCP_DEBUG if ((pmDebug & DBG_TRACE_DERIVE) && (pmDebug & DBG_TRACE_APPL2)) { fprintf(stderr, "eval_expr: inst[%d] mismatch left [%d]=%d right [%d]=%d\n", k, i, np->left->info->ivlist[i].inst, j, np->right->info->ivlist[j].inst); } #endif for (j = 0; j < np->right->info->numval; j++) { if (np->left->info->ivlist[i].inst == np->right->info->ivlist[j].inst) break; } if (j == np->right->info->numval) { /* * no match, so next instance on left operand, * and reset to start from first instance of * right operand */ i++; j = 0; continue; } #ifdef PCP_DEBUG else { if ((pmDebug & DBG_TRACE_DERIVE) && (pmDebug & DBG_TRACE_APPL2)) { fprintf(stderr, "eval_expr: recover @ right [%d]=%d\n", j, np->right->info->ivlist[j].inst); } } #endif } } np->info->ivlist[k].value = bin_op(np->desc.type, np->type, np->left->info->ivlist[i].value, np->left->desc.type, np->left->info->mul_scale, np->left->info->div_scale, np->right->info->ivlist[j].value, np->right->desc.type, np->right->info->mul_scale, np->right->info->div_scale); if (np->left->desc.indom != PM_INDOM_NULL) np->info->ivlist[k].inst = np->left->info->ivlist[i].inst; else np->info->ivlist[k].inst = np->right->info->ivlist[j].inst; k++; if (np->left->desc.indom != PM_INDOM_NULL) { i++; if (np->right->desc.indom != PM_INDOM_NULL) { j++; if (j >= np->right->info->numval) { /* rescan if need be */ j = 0; } } } else if (np->right->desc.indom != PM_INDOM_NULL) { j++; } } return np->info->numval; } /*NOTREACHED*/ }