/** 1D simplex noise with derivative. * If the last argument is not null, the analytic derivative * is also calculated. */ float sdnoise1( float x, float *dnoise_dx) { int i0 = FASTFLOOR(x); int i1 = i0 + 1; float x0 = x - i0; float x1 = x0 - 1.0f; float gx0, gx1; float n0, n1; float t20, t40, t21, t41; float x20 = x0*x0; float t0 = 1.0f - x20; // if(t0 < 0.0f) t0 = 0.0f; // Never happens for 1D: x0<=1 always t20 = t0 * t0; t40 = t20 * t20; grad1(perm[i0 & 0xff], &gx0); n0 = t40 * gx0 * x0; float x21 = x1*x1; float t1 = 1.0f - x21; // if(t1 < 0.0f) t1 = 0.0f; // Never happens for 1D: |x1|<=1 always t21 = t1 * t1; t41 = t21 * t21; grad1(perm[i1 & 0xff], &gx1); n1 = t41 * gx1 * x1; /* Compute derivative according to: * *dnoise_dx = -8.0f * t20 * t0 * x0 * (gx0 * x0) + t40 * gx0; * *dnoise_dx += -8.0f * t21 * t1 * x1 * (gx1 * x1) + t41 * gx1; */ *dnoise_dx = t20 * t0 * gx0 * x20; *dnoise_dx += t21 * t1 * gx1 * x21; *dnoise_dx *= -8.0f; *dnoise_dx += t40 * gx0 + t41 * gx1; *dnoise_dx *= 0.25f; /* Scale derivative to match the noise scaling */ // The maximum value of this noise is 8*(3/4)^4 = 2.53125 // A factor of 0.395 would scale to fit exactly within [-1,1], but // to better match classic Perlin noise, we scale it down some more. return 0.25f * (n0 + n1); }
// compute gradient magnitude and orientation at each location (uses sse) void gradMag( float *I, float *M, float *O, int h, int w, int d, bool full ) { int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; __m128 *_Gx, *_Gy, *_M2, _m; float *acost = acosTable(), acMult = 10000.0f; // allocate memory for storing one column of output (padded so h4%4==0) h4 = (h % 4 == 0) ? h : h - (h % 4) + 4; s = d * h4 * sizeof(float); M2 = (float*) alMalloc(s, 16); _M2 = (__m128*) M2; Gx = (float*) alMalloc(s, 16); _Gx = (__m128*) Gx; Gy = (float*) alMalloc(s, 16); _Gy = (__m128*) Gy; // compute gradient magnitude and orientation for each column for ( x = 0; x < w; x++ ) { // compute gradients (Gx, Gy) with maximum squared magnitude (M2) for (c = 0; c < d; c++) { grad1( I + x * h + c * w * h, Gx + c * h4, Gy + c * h4, h, w, x ); for ( y = 0; y < h4 / 4; y++ ) { y1 = h4 / 4 * c + y; _M2[y1] = ADD(MUL(_Gx[y1], _Gx[y1]), MUL(_Gy[y1], _Gy[y1])); if ( c == 0 ) { continue; } _m = CMPGT( _M2[y1], _M2[y] ); _M2[y] = OR( AND(_m, _M2[y1]), ANDNOT(_m, _M2[y]) ); _Gx[y] = OR( AND(_m, _Gx[y1]), ANDNOT(_m, _Gx[y]) ); _Gy[y] = OR( AND(_m, _Gy[y1]), ANDNOT(_m, _Gy[y]) ); } } // compute gradient mangitude (M) and normalize Gx for ( y = 0; y < h4 / 4; y++ ) { _m = MINsse( RCPSQRT(_M2[y]), SET(1e10f) ); _M2[y] = RCP(_m); if (O) { _Gx[y] = MUL( MUL(_Gx[y], _m), SET(acMult) ); } if (O) { _Gx[y] = XOR( _Gx[y], AND(_Gy[y], SET(-0.f)) ); } }; memcpy( M + x * h, M2, h * sizeof(float) ); // compute and store gradient orientation (O) via table lookup if ( O != 0 ) for ( y = 0; y < h; y++ ) { O[x * h + y] = acost[(int)Gx[y]]; } if ( O != 0 && full ) { y1 = ((~size_t(O + x * h) + 1) & 15) / 4; y = 0; for ( ; y < y1; y++ ) { O[y + x * h] += (Gy[y] < 0) * PI; } for ( ; y < h - 4; y += 4 ) STRu( O[y + x * h], ADD( LDu(O[y + x * h]), AND(CMPLT(LDu(Gy[y]), SET(0.f)), SET(PI)) ) ); for ( ; y < h; y++ ) { O[y + x * h] += (Gy[y] < 0) * PI; } } } alFree(Gx); alFree(Gy); alFree(M2); }
void GameWidget::paintEvent(QPaintEvent *e) { QPainter p(this); if (m_bShowSoser) { p.drawPixmap(m_soserX, 0, m_soser); } for(TShips::const_iterator sit = m_ships.constBegin(); sit != m_ships.constEnd(); ++sit) { p.drawPixmap(sit->x, sit->y, *sit->pix); } QRadialGradient grad1(0, 0, 7); grad1.setColorAt(1, QColor(0,0,0)); grad1.setColorAt(0, QColor(255,255,255)); QPen pen1(QColor(0,0,0)); QRadialGradient grad2(0, 0, 7); grad2.setColorAt(1, QColor(255,0,0)); grad2.setColorAt(0, QColor(255,200,200)); QPen pen2(QColor(255,0,0)); p.setPen(pen1); p.setBrush(QBrush(grad1)); int lastL = 1; for(TShots::const_iterator it = m_shots.constBegin(); it != m_shots.constEnd(); ++it) { int x = it->x, y = it->y, l = it->level; if (l != lastL) { p.setPen((l==1)?pen1:pen2); p.setBrush((l==1)?grad1:grad2); lastL = l; } p.setBrushOrigin(x, y); p.drawEllipse(x - 7, y - 7, 14, 14); } p.drawPixmap(m_gunX-32, m_gunY, PicBucket::instance().getPic(1, 2).pixmap); m_shotspreg = QRegion(); if (m_lives == 0) { p.setPen(QPen(Qt::NoPen)); p.setBrush(QBrush(QColor(255,255,255,188))); p.drawRect(0, 0, sizeX, sizeY); QLabel::paintEvent(e); return; } }
/* 1D simplex noise */ GLfloat _slang_library_noise1 (GLfloat x) { int i0 = FASTFLOOR(x); int i1 = i0 + 1; float x0 = x - i0; float x1 = x0 - 1.0f; float t1 = 1.0f - x1*x1; float n0, n1; float t0 = 1.0f - x0*x0; /* if(t0 < 0.0f) t0 = 0.0f; // this never happens for the 1D case */ t0 *= t0; n0 = t0 * t0 * grad1(perm[i0 & 0xff], x0); /* if(t1 < 0.0f) t1 = 0.0f; // this never happens for the 1D case */ t1 *= t1; n1 = t1 * t1 * grad1(perm[i1 & 0xff], x1); /* The maximum value of this noise is 8*(3/4)^4 = 2.53125 */ /* A factor of 0.395 would scale to fit exactly within [-1,1], but */ /* we want to match PRMan's 1D noise, so we scale it down some more. */ return 0.25f * (n0 + n1); }
// 1D simplex noise float snoise1(float x) { int i0 = FASTFLOOR(x); int i1 = i0 + 1; float x0 = x - i0; float x1 = x0 - 1.0f; float n0, n1; float t0 = 1.0f - x0*x0; // if(t0 < 0.0f) t0 = 0.0f; // this never happens for the 1D case t0 *= t0; n0 = t0 * t0 * grad1(perm[i0 & 0xff], x0); float t1 = 1.0f - x1*x1; // if(t1 < 0.0f) t1 = 0.0f; // this never happens for the 1D case t1 *= t1; n1 = t1 * t1 * grad1(perm[i1 & 0xff], x1); // The maximum value of this noise is 8*(3/4)^4 = 2.53125 // A factor of 0.395 would scale to fit exactly within [-1,1], but // The algorithm isn't perfect, as it is assymetric. The correction will normalize the result to the interval [-1,1], but the average will be off by 3%. return (n0 + n1 + 0.076368899f) / 2.45488110001f; }
void MusicControl::paintEvent(QPaintEvent *) // Draws the beautifull gradient ! { QPainter painter(this); QColor mainColor = m_c->mainColor(); QLinearGradient grad1(0, 0, 0, height()/2); grad1.setColorAt(0, mainColor.lighter(110)); grad1.setColorAt(1, mainColor.darker(130)); painter.fillRect(0, 0, width(), height(), grad1); QLinearGradient grad2(0, height()/2, 0, height()); grad2.setColorAt(0, mainColor.lighter(60)); grad2.setColorAt(1, mainColor); painter.fillRect(0, height()/2, width(), height()/2, grad2); }
void RadialGradient::doPainting() { QPainter painter(this); int h = height(); int w = width(); QRadialGradient grad1(w/2, h/2, 80); grad1.setColorAt(0, QColor("#032E91")); grad1.setColorAt(0.3, Qt::white); grad1.setColorAt(1, QColor("#032E91")); painter.fillRect(0, 0, w, h, grad1); }
// 1D simplex noise float snoise1(snoise_permtable* permtable, float x) { int i0 = FASTFLOOR(x); int i1 = i0 + 1; float x0 = x - i0; float x1 = x0 - 1.0f; float n0, n1; float t0 = 1.0f - x0*x0; // if(t0 < 0.0f) t0 = 0.0f; t0 *= t0; n0 = t0 * t0 * grad1(permtable->perm[i0 & 0xff], x0); float t1 = 1.0f - x1*x1; // if(t1 < 0.0f) t1 = 0.0f; t1 *= t1; n1 = t1 * t1 * grad1(permtable->perm[i1 & 0xff], x1); // The maximum value of this noise is 8*(3/4)^4 = 2.53125 // A factor of 0.395 would scale to fit exactly within [-1,1], but // we want to match PRMan's 1D noise, so we scale it down some more. return 0.25f * (n0 + n1); }
void Bouncer::updateItem() { const Vector& center = m_rect->position(); const Vector dir = m_rect->position2() - m_rect->position1(); const float a = m_rect->width(); const float b = dir.length() / 2.0f; m_item->setPos(center.x, center.y); m_item->setRect(-b, -a, 2.0f * b, 2.0f * a); m_item->setTransform(QTransform()); m_item->rotate(dir.angle() / M_PI * 180.0f); m_item2->setPos(center.x, center.y); m_item2->setRect(-b, -a, 2.0f * b, 2.0f * a); m_item2->setTransform(QTransform()); m_item2->rotate(dir.angle() / M_PI * 180.0f); m_itemCap1->setPos(m_rect->position1().x, m_rect->position1().y); m_itemCap1->setRect(-a, -a, 2.0f * a, 2.0f * a); m_itemCap2->setPos(m_rect->position2().x, m_rect->position2().y); m_itemCap2->setRect(-a, -a, 2.0f * a, 2.0f * a); QLinearGradient grad(-b, -a, -b, a); grad.setColorAt(0.0f, QColor(255, 116, 0, 0)); grad.setColorAt(0.4f, QColor(255, 116, 0, 255)); grad.setColorAt(0.6f, QColor(255, 116, 0, 255)); grad.setColorAt(1.0f, QColor(255, 116, 0, 0)); m_item->setBrush(grad); QLinearGradient gradRect2(-b, -a, -b, a); gradRect2.setColorAt(0.0f, QColor(255, 116, 0, 0)); gradRect2.setColorAt(0.4f, QColor(255, 116, 0, 0)); gradRect2.setColorAt(0.5f, QColor(255, 255, 255, 255)); gradRect2.setColorAt(0.6f, QColor(255, 116, 0, 0)); gradRect2.setColorAt(1.0f, QColor(255, 116, 0, 0)); m_item2->setBrush(gradRect2); QRadialGradient grad1(0,0,a,0,0); grad1.setColorAt(0.0f, QColor(255, 255, 255, 255)); grad1.setColorAt(0.8f, QColor(255, 116, 0, 255)); grad1.setColorAt(1.0f, QColor(255, 116, 0, 0)); m_itemCap1->setBrush(grad1); m_itemCap2->setBrush(grad1); }
/** Render a soft outline around the edge of the TableZone. @param painter QPainter object */ void TableZone::paintZoneOutline(QPainter *painter) { QLinearGradient grad1(0, 0, 0, 1); grad1.setCoordinateMode(QGradient::ObjectBoundingMode); grad1.setColorAt(0, GRADIENT_COLOR); grad1.setColorAt(1, GRADIENT_COLORLESS); painter->fillRect(QRectF(0, 0, width, BOX_LINE_WIDTH), QBrush(grad1)); grad1.setFinalStop(1, 0); painter->fillRect(QRectF(0, 0, BOX_LINE_WIDTH, height), QBrush(grad1)); grad1.setStart(0, 1); grad1.setFinalStop(0, 0); painter->fillRect(QRectF(0, height - BOX_LINE_WIDTH, width, BOX_LINE_WIDTH), QBrush(grad1)); grad1.setStart(1, 0); painter->fillRect(QRectF(width - BOX_LINE_WIDTH, 0, BOX_LINE_WIDTH, height), QBrush(grad1)); }
void caCircularGauge::drawNeedle(QPainter *p) { double angle = (m_startAngle-(m_value-m_minValue)/(m_maxValue-m_minValue)*m_arcLength)*3.1415927/180.0; QPolygonF tr1, tr2; QPointF longArm, shortArm, side1, side2; longArm = QPointF(m_outerRadius*cos(angle),-m_outerRadius*sin(angle)); shortArm = QPointF(-2*cos(angle),2*sin(angle)); side1 = QPointF(-2*sin(angle),-2*cos(angle)); side2 = -side1; tr1 << longArm << side1 << shortArm; tr2 << longArm << side2 << shortArm; p->setPen(Qt::NoPen); QRadialGradient grad1(QPointF(0,0),m_outerRadius,side1*.5); grad1.setColorAt(0.0, palette().color(QPalette::Mid)); grad1.setColorAt(1.0, palette().color(QPalette::Dark)); QRadialGradient grad2(QPointF(0,0),m_outerRadius,side2*.5); grad2.setColorAt(0.0, palette().color(QPalette::Midlight)); grad2.setColorAt(1.0, palette().color(QPalette::Dark)); p->setBrush(grad1); p->drawPolygon(tr1); p->setBrush(grad2); p->drawPolygon(tr2); QPen pen(Qt::black); pen.setJoinStyle(Qt::RoundJoin); p->setPen(pen); p->drawLine(longArm,side1); p->drawLine(side1,shortArm); p->drawLine(shortArm,side2); p->drawLine(side2,longArm); p->setBrush(palette().color(QPalette::Dark)); p->drawEllipse(QRectF(-.5,-.5,1,1)); }
// compute x and y gradients at each location (uses sse) void grad2( float *I, float *Gx, float *Gy, int h, int w, int d ) { int o, x, c, a=w*h; for(c=0; c<d; c++) for(x=0; x<w; x++) { o=c*a+x*h; grad1( I+o, Gx+o, Gy+o, h, w, x ); } }
void operator()( const Range& range ) const { double inv_count = 1./inputs.rows; int ivcount = ann->layer_sizes.front(); int ovcount = ann->layer_sizes.back(); int itype = inputs.type(), otype = outputs.type(); int count = inputs.rows; int i, j, k, l_count = ann->layer_count(); vector<vector<double> > x(l_count); vector<vector<double> > df(l_count); vector<double> _buf(ann->max_lsize*dcount0*2); double* buf[] = { &_buf[0], &_buf[ann->max_lsize*dcount0] }; double E = 0; for( i = 0; i < l_count; i++ ) { x[i].resize(ann->layer_sizes[i]*dcount0); df[i].resize(ann->layer_sizes[i]*dcount0); } for( int si = range.start; si < range.end; si++ ) { int i0 = si*dcount0, i1 = std::min((si + 1)*dcount0, count); int dcount = i1 - i0; const double* w = ann->weights[0].ptr<double>(); // grab and preprocess input data for( i = 0; i < dcount; i++ ) { const uchar* x0data_p = inputs.ptr(i0 + i); const float* x0data_f = (const float*)x0data_p; const double* x0data_d = (const double*)x0data_p; double* xdata = &x[0][i*ivcount]; for( j = 0; j < ivcount; j++ ) xdata[j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2+1]; } Mat x1(dcount, ivcount, CV_64F, &x[0][0]); // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i]) for( i = 1; i < l_count; i++ ) { Mat x2( dcount, ann->layer_sizes[i], CV_64F, &x[i][0] ); Mat _w = ann->weights[i].rowRange(0, x1.cols); gemm( x1, _w, 1, noArray(), 0, x2 ); Mat _df( x2.size(), CV_64F, &df[i][0] ); ann->calc_activ_func_deriv( x2, _df, ann->weights[i] ); x1 = x2; } Mat grad1(dcount, ovcount, CV_64F, buf[l_count & 1]); w = ann->weights[l_count+1].ptr<double>(); // calculate error for( i = 0; i < dcount; i++ ) { const uchar* udata_p = outputs.ptr(i0+i); const float* udata_f = (const float*)udata_p; const double* udata_d = (const double*)udata_p; const double* xdata = &x[l_count-1][i*ovcount]; double* gdata = grad1.ptr<double>(i); double sweight = sw ? sw[si+i] : inv_count, E1 = 0; for( j = 0; j < ovcount; j++ ) { double t = (otype == CV_32F ? (double)udata_f[j] : udata_d[j])*w[j*2] + w[j*2+1] - xdata[j]; gdata[j] = t*sweight; E1 += t*t; } E += sweight*E1; } for( i = l_count-1; i > 0; i-- ) { int n1 = ann->layer_sizes[i-1], n2 = ann->layer_sizes[i]; Mat _df(dcount, n2, CV_64F, &df[i][0]); multiply(grad1, _df, grad1); { AutoLock lock(ann->mtx); Mat _dEdw = dEdw->at(i).rowRange(0, n1); x1 = Mat(dcount, n1, CV_64F, &x[i-1][0]); gemm(x1, grad1, 1, _dEdw, 1, _dEdw, GEMM_1_T); // update bias part of dEdw double* dst = dEdw->at(i).ptr<double>(n1); for( k = 0; k < dcount; k++ ) { const double* src = grad1.ptr<double>(k); for( j = 0; j < n2; j++ ) dst[j] += src[j]; } } Mat grad2( dcount, n1, CV_64F, buf[i&1] ); if( i > 1 ) { Mat _w = ann->weights[i].rowRange(0, n1); gemm(grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T); } grad1 = grad2; } } { AutoLock lock(ann->mtx); *pE += E; } }
int train_backprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit ) { int i, j, k; double prev_E = DBL_MAX*0.5, E = 0; int itype = inputs.type(), otype = outputs.type(); int count = inputs.rows; int iter = -1, max_iter = termCrit.maxCount*count; double epsilon = termCrit.epsilon*count; int l_count = layer_count(); int ivcount = layer_sizes[0]; int ovcount = layer_sizes.back(); // allocate buffers vector<vector<double> > x(l_count); vector<vector<double> > df(l_count); vector<Mat> dw(l_count); for( i = 0; i < l_count; i++ ) { int n = layer_sizes[i]; x[i].resize(n+1); df[i].resize(n); dw[i] = Mat::zeros(weights[i].size(), CV_64F); } Mat _idx_m(1, count, CV_32S); int* _idx = _idx_m.ptr<int>(); for( i = 0; i < count; i++ ) _idx[i] = i; AutoBuffer<double> _buf(max_lsize*2); double* buf[] = { _buf, (double*)_buf + max_lsize }; const double* sw = _sw.empty() ? 0 : _sw.ptr<double>(); // run back-propagation loop /* y_i = w_i*x_{i-1} x_i = f(y_i) E = 1/2*||u - x_N||^2 grad_N = (x_N - u)*f'(y_i) dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i w_i(t+1) = w_i(t) + dw_i(t) grad_{i-1} = w_i^t*grad_i */ for( iter = 0; iter < max_iter; iter++ ) { int idx = iter % count; double sweight = sw ? count*sw[idx] : 1.; if( idx == 0 ) { //printf("%d. E = %g\n", iter/count, E); if( fabs(prev_E - E) < epsilon ) break; prev_E = E; E = 0; // shuffle indices for( i = 0; i < count; i++ ) { j = rng.uniform(0, count); k = rng.uniform(0, count); std::swap(_idx[j], _idx[k]); } } idx = _idx[idx]; const uchar* x0data_p = inputs.ptr(idx); const float* x0data_f = (const float*)x0data_p; const double* x0data_d = (const double*)x0data_p; double* w = weights[0].ptr<double>(); for( j = 0; j < ivcount; j++ ) x[0][j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2 + 1]; Mat x1( 1, ivcount, CV_64F, &x[0][0] ); // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i]) for( i = 1; i < l_count; i++ ) { int n = layer_sizes[i]; Mat x2(1, n, CV_64F, &x[i][0] ); Mat _w = weights[i].rowRange(0, x1.cols); gemm(x1, _w, 1, noArray(), 0, x2); Mat _df(1, n, CV_64F, &df[i][0] ); calc_activ_func_deriv( x2, _df, weights[i] ); x1 = x2; } Mat grad1( 1, ovcount, CV_64F, buf[l_count&1] ); w = weights[l_count+1].ptr<double>(); // calculate error const uchar* udata_p = outputs.ptr(idx); const float* udata_f = (const float*)udata_p; const double* udata_d = (const double*)udata_p; double* gdata = grad1.ptr<double>(); for( k = 0; k < ovcount; k++ ) { double t = (otype == CV_32F ? (double)udata_f[k] : udata_d[k])*w[k*2] + w[k*2+1] - x[l_count-1][k]; gdata[k] = t*sweight; E += t*t; } E *= sweight; // backward pass, update weights for( i = l_count-1; i > 0; i-- ) { int n1 = layer_sizes[i-1], n2 = layer_sizes[i]; Mat _df(1, n2, CV_64F, &df[i][0]); multiply( grad1, _df, grad1 ); Mat _x(n1+1, 1, CV_64F, &x[i-1][0]); x[i-1][n1] = 1.; gemm( _x, grad1, params.bpDWScale, dw[i], params.bpMomentScale, dw[i] ); add( weights[i], dw[i], weights[i] ); if( i > 1 ) { Mat grad2(1, n1, CV_64F, buf[i&1]); Mat _w = weights[i].rowRange(0, n1); gemm( grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T ); grad1 = grad2; } } } iter /= count; return iter; }
void pcl::people::HOG::gradMag( float *I, int h, int w, int d, float *M, float *O ) const { #if defined(__SSE2__) int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; __m128 *_Gx, *_Gy, *_M2, _m; float *acost = acosTable(), acMult=25000/2.02f; // allocate memory for storing one column of output (padded so h4%4==0) h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float); M2=(float*) alMalloc(s,16); _M2=(__m128*) M2; Gx=(float*) alMalloc(s,16); _Gx=(__m128*) Gx; Gy=(float*) alMalloc(s,16); _Gy=(__m128*) Gy; // compute gradient magnitude and orientation for each column for( x=0; x<w; x++ ) { // compute gradients (Gx, Gy) and squared magnitude (M2) for each channel for( c=0; c<d; c++ ) grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x ); for( y=0; y<d*h4/4; y++ ) _M2[y]=pcl::sse_add(pcl::sse_mul(_Gx[y],_Gx[y]),pcl::sse_mul(_Gy[y],_Gy[y])); // store gradients with maximum response in the first channel for(c=1; c<d; c++) { for( y=0; y<h4/4; y++ ) { y1=h4/4*c+y; _m = pcl::sse_cmpgt( _M2[y1], _M2[y] ); _M2[y] = pcl::sse_or( pcl::sse_and(_m,_M2[y1]), pcl::sse_andnot(_m,_M2[y]) ); _Gx[y] = pcl::sse_or( pcl::sse_and(_m,_Gx[y1]), pcl::sse_andnot(_m,_Gx[y]) ); _Gy[y] = pcl::sse_or( pcl::sse_and(_m,_Gy[y1]), pcl::sse_andnot(_m,_Gy[y]) ); } } // compute gradient magnitude (M) and normalize Gx for( y=0; y<h4/4; y++ ) { _m = pcl::sse_min( pcl::sse_rcpsqrt(_M2[y]), pcl::sse_set(1e10f) ); _M2[y] = pcl::sse_rcp(_m); _Gx[y] = pcl::sse_mul( pcl::sse_mul(_Gx[y],_m), pcl::sse_set(acMult) ); _Gx[y] = pcl::sse_xor( _Gx[y], pcl::sse_and(_Gy[y], pcl::sse_set(-0.f)) ); }; memcpy( M+x*h, M2, h*sizeof(float) ); // compute and store gradient orientation (O) via table lookup if(O!=0) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]]; } alFree(Gx); alFree(Gy); alFree(M2); #else int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; float *acost = acosTable(), acMult=25000/2.02f; // allocate memory for storing one column of output (padded so h4%4==0) h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float); M2=(float*) alMalloc(s,16); Gx=(float*) alMalloc(s,16); Gy=(float*) alMalloc(s,16); float m; // compute gradient magnitude and orientation for each column for( x=0; x<w; x++ ) { // compute gradients (Gx, Gy) and squared magnitude (M2) for each channel for( c=0; c<d; c++ ) grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x ); for( y=0; y<d*h4; y++ ) { M2[y] = Gx[y] * Gx[y] + Gy[y] * Gy[y]; } // store gradients with maximum response in the first channel for(c=1; c<d; c++) { for( y=0; y<h4/4; y++ ) { y1=h4/4*c+y; for (int ii = 0; ii < 4; ++ii) { if (M2[y1 * 4 + ii] > M2[y * 4 + ii]) { M2[y * 4 + ii] = M2[y1 * 4 + ii]; Gx[y * 4 + ii] = Gx[y1 * 4 + ii]; Gy[y * 4 + ii] = Gy[y1 * 4 + ii]; } } } } // compute gradient magnitude (M) and normalize Gx for( y=0; y<h4; y++ ) { m = 1.0f/sqrtf(M2[y]); m = m < 1e10f ? m : 1e10f; M2[y] = 1.0f / m; Gx[y] = ((Gx[y] * m) * acMult); if (Gy[y] < 0) Gx[y] = -Gx[y]; } memcpy( M+x*h, M2, h*sizeof(float) ); // compute and store gradient orientation (O) via table lookup if(O!=0) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]]; } alFree(Gx); alFree(Gy); alFree(M2); #endif }
/** * @param inarray Input array. * @param outarray Output array. * @param time Current simulation time. * @param lambda Timestep. */ void Bidomain::DoImplicitSolve( const Array<OneD, const Array<OneD, NekDouble> >&inarray, Array<OneD, Array<OneD, NekDouble> >&outarray, const NekDouble time, const NekDouble lambda) { int nvariables = inarray.num_elements(); int nq = m_fields[0]->GetNpoints(); Array<OneD, NekDouble> grad0(nq), grad1(nq), grad2(nq), grad(nq); Array<OneD, NekDouble> ggrad0(nq), ggrad1(nq), ggrad2(nq), ggrad(nq), temp(nq); // We solve ( \sigma\nabla^2 - HHlambda ) Y[i] = rhs [i] // inarray = input: \hat{rhs} -> output: \hat{Y} // outarray = output: nabla^2 \hat{Y} // where \hat = modal coeffs for (int i = 0; i < nvariables; ++i) { // Only apply diffusion to first variable. if (i > 1) { Vmath::Vcopy(nq, &inarray[i][0], 1, &outarray[i][0], 1); continue; } if (i == 0) { StdRegions::ConstFactorMap factors; factors[StdRegions::eFactorLambda] = (1.0/lambda)*(m_capMembrane*m_chi); if (m_spacedim==1) { // Take first partial derivative m_fields[i]->PhysDeriv(inarray[1],ggrad0); // Take second partial derivative m_fields[i]->PhysDeriv(0,ggrad0,ggrad0); // Multiply by Intracellular-Conductivity if (m_session->DefinesFunction("IntracellularConductivity") && m_session->DefinesFunction("ExtracellularConductivity")) { Vmath::Smul(nq, m_session->GetParameter("sigmaix"), ggrad0, 1, ggrad0, 1); } // Add partial derivatives together Vmath::Vcopy(nq, ggrad0, 1, ggrad, 1); Vmath::Smul(nq, -1.0, ggrad, 1, ggrad, 1); // Multiply 1.0/timestep/lambda Vmath::Smul(nq, -factors[StdRegions::eFactorLambda], inarray[i], 1, temp, 1); Vmath::Vadd(nq, ggrad, 1, temp, 1, m_fields[i]->UpdatePhys(), 1); // Solve a system of equations with Helmholtz solver and transform // back into physical space. m_fields[i]->HelmSolve(m_fields[i]->GetPhys(), m_fields[i]->UpdateCoeffs(),NullFlagList,factors); m_fields[i]->BwdTrans( m_fields[i]->GetCoeffs(), m_fields[i]->UpdatePhys()); m_fields[i]->SetPhysState(true); // Copy the solution vector (required as m_fields must be set). outarray[i] = m_fields[i]->GetPhys(); } if (m_spacedim==2) { // Take first partial derivative m_fields[i]->PhysDeriv(inarray[1],ggrad0,ggrad1); // Take second partial derivative m_fields[i]->PhysDeriv(0,ggrad0,ggrad0); m_fields[i]->PhysDeriv(1,ggrad1,ggrad1); // Multiply by Intracellular-Conductivity if (m_session->DefinesFunction("IntracellularConductivity") && m_session->DefinesFunction("ExtracellularConductivity")) { Vmath::Smul(nq, m_session->GetParameter("sigmaix"), ggrad0, 1, ggrad0, 1); Vmath::Smul(nq, m_session->GetParameter("sigmaiy"), ggrad1, 1, ggrad1, 1); } // Add partial derivatives together Vmath::Vadd(nq, ggrad0, 1, ggrad1, 1, ggrad, 1); Vmath::Smul(nq, -1.0, ggrad, 1, ggrad, 1); // Multiply 1.0/timestep/lambda Vmath::Smul(nq, -factors[StdRegions::eFactorLambda], inarray[i], 1, temp, 1); Vmath::Vadd(nq, ggrad, 1, temp, 1, m_fields[i]->UpdatePhys(), 1); // Solve a system of equations with Helmholtz solver and transform // back into physical space. m_fields[i]->HelmSolve(m_fields[i]->GetPhys(), m_fields[i]->UpdateCoeffs(),NullFlagList,factors,m_vardiffi); m_fields[i]->BwdTrans( m_fields[i]->GetCoeffs(), m_fields[i]->UpdatePhys()); m_fields[i]->SetPhysState(true); // Copy the solution vector (required as m_fields must be set). outarray[i] = m_fields[i]->GetPhys(); } if (m_spacedim==3) { // Take first partial derivative m_fields[i]->PhysDeriv(inarray[1],ggrad0,ggrad1,ggrad2); // Take second partial derivative m_fields[i]->PhysDeriv(0,ggrad0,ggrad0); m_fields[i]->PhysDeriv(1,ggrad1,ggrad1); m_fields[i]->PhysDeriv(2,ggrad2,ggrad2); // Multiply by Intracellular-Conductivity if (m_session->DefinesFunction("IntracellularConductivity") && m_session->DefinesFunction("ExtracellularConductivity")) { Vmath::Smul(nq, m_session->GetParameter("sigmaix"), ggrad0, 1, ggrad0, 1); Vmath::Smul(nq, m_session->GetParameter("sigmaiy"), ggrad1, 1, ggrad1, 1); Vmath::Smul(nq, m_session->GetParameter("sigmaiz"), ggrad2, 1, ggrad2, 1); } // Add partial derivatives together Vmath::Vadd(nq, ggrad0, 1, ggrad1, 1, ggrad, 1); Vmath::Vadd(nq, ggrad2, 1, ggrad, 1, ggrad, 1); Vmath::Smul(nq, -1.0, ggrad, 1, ggrad, 1); // Multiply 1.0/timestep/lambda Vmath::Smul(nq, -factors[StdRegions::eFactorLambda], inarray[i], 1, temp, 1); Vmath::Vadd(nq, ggrad, 1, temp, 1, m_fields[i]->UpdatePhys(), 1); // Solve a system of equations with Helmholtz solver and transform // back into physical space. m_fields[i]->HelmSolve(m_fields[i]->GetPhys(), m_fields[i]->UpdateCoeffs(),NullFlagList,factors,m_vardiffi); m_fields[i]->BwdTrans( m_fields[i]->GetCoeffs(), m_fields[i]->UpdatePhys()); m_fields[i]->SetPhysState(true); // Copy the solution vector (required as m_fields must be set). outarray[i] = m_fields[i]->GetPhys(); } } if (i == 1) { StdRegions::ConstFactorMap factors; factors[StdRegions::eFactorLambda] = 0.0; if (m_spacedim==1) { // Take first partial derivative m_fields[i]->PhysDeriv(m_fields[0]->UpdatePhys(),grad0); // Take second derivative m_fields[i]->PhysDeriv(0,grad0,grad0); // Multiply by Intracellular-Conductivity if (m_session->DefinesFunction("IntracellularConductivity") && m_session->DefinesFunction("ExtracellularConductivity")) { Vmath::Smul(nq, m_session->GetParameter("sigmaix"), grad0, 1, grad0, 1); } // and sum terms Vmath::Vcopy(nq, grad0, 1, grad, 1); Vmath::Smul(nq, (-1.0*m_session->GetParameter("sigmaix"))/(m_session->GetParameter("sigmaix")+m_session->GetParameter("sigmaix")), grad, 1, grad, 1); // Now solve Poisson problem for \phi_e m_fields[i]->SetPhys(grad); m_fields[i]->HelmSolve(m_fields[i]->GetPhys(), m_fields[i]->UpdateCoeffs(), NullFlagList, factors); m_fields[i]->BwdTrans( m_fields[i]->GetCoeffs(), m_fields[i]->UpdatePhys()); m_fields[i]->SetPhysState(true); // Copy the solution vector (required as m_fields must be set). outarray[i] = m_fields[i]->GetPhys(); } if (m_spacedim==2) { // Take first partial derivative m_fields[i]->PhysDeriv(m_fields[0]->UpdatePhys(),grad0,grad1); // Take second derivative m_fields[i]->PhysDeriv(0,grad0,grad0); m_fields[i]->PhysDeriv(1,grad1,grad1); // Multiply by Intracellular-Conductivity if (m_session->DefinesFunction("IntracellularConductivity") && m_session->DefinesFunction("ExtracellularConductivity")) { Vmath::Smul(nq, m_session->GetParameter("sigmaix"), grad0, 1, grad0, 1); Vmath::Smul(nq, m_session->GetParameter("sigmaiy"), grad1, 1, grad1, 1); } // and sum terms Vmath::Vadd(nq, grad0, 1, grad1, 1, grad, 1); Vmath::Smul(nq, -1.0, grad, 1, grad, 1); // Now solve Poisson problem for \phi_e m_fields[i]->SetPhys(grad); m_fields[i]->HelmSolve(m_fields[i]->GetPhys(), m_fields[i]->UpdateCoeffs(), NullFlagList, factors, m_vardiffie); m_fields[i]->BwdTrans( m_fields[i]->GetCoeffs(), m_fields[i]->UpdatePhys()); m_fields[i]->SetPhysState(true); // Copy the solution vector (required as m_fields must be set). outarray[i] = m_fields[i]->GetPhys(); } if (m_spacedim==3) { // Take first partial derivative m_fields[i]->PhysDeriv(m_fields[0]->UpdatePhys(),grad0,grad1,grad2); // Take second derivative m_fields[i]->PhysDeriv(0,grad0,grad0); m_fields[i]->PhysDeriv(1,grad1,grad1); m_fields[i]->PhysDeriv(2,grad2,grad2); // Multiply by Intracellular-Conductivity if (m_session->DefinesFunction("IntracellularConductivity") && m_session->DefinesFunction("ExtracellularConductivity")) { Vmath::Smul(nq, m_session->GetParameter("sigmaix"), grad0, 1, grad0, 1); Vmath::Smul(nq, m_session->GetParameter("sigmaiy"), grad1, 1, grad1, 1); Vmath::Smul(nq, m_session->GetParameter("sigmaiz"), grad2, 1, grad2, 1); } // and sum terms Vmath::Vadd(nq, grad0, 1, grad1, 1, grad, 1); Vmath::Vadd(nq, grad2, 1, grad, 1, grad, 1); Vmath::Smul(nq, -1.0, grad, 1, grad, 1); // Now solve Poisson problem for \phi_e m_fields[i]->SetPhys(grad); m_fields[i]->HelmSolve(m_fields[i]->GetPhys(), m_fields[i]->UpdateCoeffs(), NullFlagList, factors, m_vardiffie); m_fields[i]->BwdTrans( m_fields[i]->GetCoeffs(), m_fields[i]->UpdatePhys()); m_fields[i]->SetPhysState(true); // Copy the solution vector (required as m_fields must be set). outarray[i] = m_fields[i]->GetPhys(); } } } }