void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { const dt_iop_colorout_data_t *const d = (dt_iop_colorout_data_t *)piece->data; const int ch = piece->colors; const int gamutcheck = (d->mode == DT_PROFILE_GAMUTCHECK); if(!isnan(d->cmatrix[0])) { // fprintf(stderr,"Using cmatrix codepath\n"); // convert to rgb using matrix #ifdef _OPENMP #pragma omp parallel for schedule(static) default(none) shared(roi_in, roi_out, ivoid, ovoid) #endif for(int j = 0; j < roi_out->height; j++) { float *in = (float *)ivoid + (size_t)ch * roi_in->width * j; float *out = (float *)ovoid + (size_t)ch * roi_out->width * j; const __m128 m0 = _mm_set_ps(0.0f, d->cmatrix[6], d->cmatrix[3], d->cmatrix[0]); const __m128 m1 = _mm_set_ps(0.0f, d->cmatrix[7], d->cmatrix[4], d->cmatrix[1]); const __m128 m2 = _mm_set_ps(0.0f, d->cmatrix[8], d->cmatrix[5], d->cmatrix[2]); for(int i = 0; i < roi_out->width; i++, in += ch, out += ch) { const __m128 xyz = dt_Lab_to_XYZ_SSE(_mm_load_ps(in)); const __m128 t = _mm_add_ps(_mm_mul_ps(m0, _mm_shuffle_ps(xyz, xyz, _MM_SHUFFLE(0, 0, 0, 0))), _mm_add_ps(_mm_mul_ps(m1, _mm_shuffle_ps(xyz, xyz, _MM_SHUFFLE(1, 1, 1, 1))), _mm_mul_ps(m2, _mm_shuffle_ps(xyz, xyz, _MM_SHUFFLE(2, 2, 2, 2))))); _mm_stream_ps(out, t); } } _mm_sfence(); // apply profile #ifdef _OPENMP #pragma omp parallel for schedule(static) default(none) shared(roi_in, roi_out, ivoid, ovoid) #endif for(int j = 0; j < roi_out->height; j++) { float *in = (float *)ivoid + (size_t)ch * roi_in->width * j; float *out = (float *)ovoid + (size_t)ch * roi_out->width * j; for(int i = 0; i < roi_out->width; i++, in += ch, out += ch) { for(int i = 0; i < 3; i++) if(d->lut[i][0] >= 0.0f) { out[i] = (out[i] < 1.0f) ? lerp_lut(d->lut[i], out[i]) : dt_iop_eval_exp(d->unbounded_coeffs[i], out[i]); } } } } else { // fprintf(stderr,"Using xform codepath\n"); const __m128 outofgamutpixel = _mm_set_ps(0.0f, 1.0f, 1.0f, 0.0f); #ifdef _OPENMP #pragma omp parallel for schedule(static) default(none) shared(ivoid, ovoid, roi_out) #endif for(int k = 0; k < roi_out->height; k++) { const float *in = ((float *)ivoid) + (size_t)ch * k * roi_out->width; float *out = ((float *)ovoid) + (size_t)ch * k * roi_out->width; if(!gamutcheck) { cmsDoTransform(d->xform, in, out, roi_out->width); } else { void *rgb = dt_alloc_align(16, 4 * sizeof(float) * roi_out->width); cmsDoTransform(d->xform, in, rgb, roi_out->width); float *rgbptr = (float *)rgb; for(int j = 0; j < roi_out->width; j++, rgbptr += 4, out += 4) { const __m128 pixel = _mm_load_ps(rgbptr); __m128 ingamut = _mm_cmplt_ps(pixel, _mm_set_ps(-FLT_MAX, 0.0f, 0.0f, 0.0f)); ingamut = _mm_or_ps(_mm_unpacklo_ps(ingamut, ingamut), _mm_unpackhi_ps(ingamut, ingamut)); ingamut = _mm_or_ps(_mm_unpacklo_ps(ingamut, ingamut), _mm_unpackhi_ps(ingamut, ingamut)); const __m128 result = _mm_or_ps(_mm_and_ps(ingamut, outofgamutpixel), _mm_andnot_ps(ingamut, pixel)); _mm_stream_ps(out, result); } dt_free_align(rgb); } } _mm_sfence(); } if(piece->pipe->mask_display) dt_iop_alpha_copy(ivoid, ovoid, roi_out->width, roi_out->height); }
void process (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { const dt_iop_colorout_data_t *const d = (dt_iop_colorout_data_t *)piece->data; const int ch = piece->colors; const int gamutcheck = (d->softproof_enabled == DT_SOFTPROOF_GAMUTCHECK); if(!isnan(d->cmatrix[0])) { //fprintf(stderr,"Using cmatrix codepath\n"); // convert to rgb using matrix #ifdef _OPENMP #pragma omp parallel for schedule(static) default(none) shared(roi_in,roi_out, ivoid, ovoid) #endif for(int j=0; j<roi_out->height; j++) { float *in = (float*)ivoid + ch*roi_in->width *j; float *out = (float*)ovoid + ch*roi_out->width*j; const __m128 m0 = _mm_set_ps(0.0f,d->cmatrix[6],d->cmatrix[3],d->cmatrix[0]); const __m128 m1 = _mm_set_ps(0.0f,d->cmatrix[7],d->cmatrix[4],d->cmatrix[1]); const __m128 m2 = _mm_set_ps(0.0f,d->cmatrix[8],d->cmatrix[5],d->cmatrix[2]); for(int i=0; i<roi_out->width; i++, in+=ch, out+=ch ) { const __m128 xyz = dt_Lab_to_XYZ_SSE(_mm_load_ps(in)); const __m128 t = _mm_add_ps(_mm_mul_ps(m0,_mm_shuffle_ps(xyz,xyz,_MM_SHUFFLE(0,0,0,0))),_mm_add_ps(_mm_mul_ps(m1,_mm_shuffle_ps(xyz,xyz,_MM_SHUFFLE(1,1,1,1))),_mm_mul_ps(m2,_mm_shuffle_ps(xyz,xyz,_MM_SHUFFLE(2,2,2,2))))); _mm_stream_ps(out,t); } } _mm_sfence(); // apply profile #ifdef _OPENMP #pragma omp parallel for schedule(static) default(none) shared(roi_in,roi_out, ivoid, ovoid) #endif for(int j=0; j<roi_out->height; j++) { float *in = (float*)ivoid + ch*roi_in->width *j; float *out = (float*)ovoid + ch*roi_out->width*j; for(int i=0; i<roi_out->width; i++, in+=ch, out+=ch ) { for(int i=0; i<3; i++) if (d->lut[i][0] >= 0.0f) { out[i] = (out[i] < 1.0f) ? lerp_lut(d->lut[i], out[i]) : dt_iop_eval_exp(d->unbounded_coeffs[i], out[i]); } } } } else { float *in = (float*)ivoid; float *out = (float*)ovoid; const int rowsize=roi_out->width * 3; //fprintf(stderr,"Using xform codepath\n"); #ifdef _OPENMP #pragma omp parallel for schedule(static) default(none) shared(out, roi_out, in) #endif for (int k=0; k<roi_out->height; k++) { float Lab[rowsize]; float rgb[rowsize]; const int m=(k*(roi_out->width*ch)); for (int l=0; l<roi_out->width; l++) { int li=3*l,ii=ch*l; Lab[li+0] = in[m+ii+0]; Lab[li+1] = in[m+ii+1]; Lab[li+2] = in[m+ii+2]; } cmsDoTransform (d->xform, Lab, rgb, roi_out->width); for (int l=0; l<roi_out->width; l++) { int oi=ch*l, ri=3*l; if(gamutcheck && (rgb[ri+0] < 0.0f || rgb[ri+1] < 0.0f || rgb[ri+2] < 0.0f)) { out[m+oi+0] = 0.0f; out[m+oi+1] = 1.0f; out[m+oi+2] = 1.0f; } else { out[m+oi+0] = rgb[ri+0]; out[m+oi+1] = rgb[ri+1]; out[m+oi+2] = rgb[ri+2]; } } } } if(piece->pipe->mask_display) dt_iop_alpha_copy(ivoid, ovoid, roi_out->width, roi_out->height); }
void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *const ivoid, void *const ovoid, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { const dt_iop_colorout_data_t *const d = (dt_iop_colorout_data_t *)piece->data; const int ch = piece->colors; const int gamutcheck = (d->mode == DT_PROFILE_GAMUTCHECK); if(!isnan(d->cmatrix[0])) { // fprintf(stderr,"Using cmatrix codepath\n"); // convert to rgb using matrix #ifdef _OPENMP #pragma omp parallel for schedule(static) default(none) #endif for(int j = 0; j < roi_out->height; j++) { float *in = (float *)ivoid + (size_t)ch * roi_in->width * j; float *out = (float *)ovoid + (size_t)ch * roi_out->width * j; const __m128 m0 = _mm_set_ps(0.0f, d->cmatrix[6], d->cmatrix[3], d->cmatrix[0]); const __m128 m1 = _mm_set_ps(0.0f, d->cmatrix[7], d->cmatrix[4], d->cmatrix[1]); const __m128 m2 = _mm_set_ps(0.0f, d->cmatrix[8], d->cmatrix[5], d->cmatrix[2]); for(int i = 0; i < roi_out->width; i++, in += ch, out += ch) { const __m128 xyz = dt_Lab_to_XYZ_SSE(_mm_load_ps(in)); const __m128 t = _mm_add_ps(_mm_mul_ps(m0, _mm_shuffle_ps(xyz, xyz, _MM_SHUFFLE(0, 0, 0, 0))), _mm_add_ps(_mm_mul_ps(m1, _mm_shuffle_ps(xyz, xyz, _MM_SHUFFLE(1, 1, 1, 1))), _mm_mul_ps(m2, _mm_shuffle_ps(xyz, xyz, _MM_SHUFFLE(2, 2, 2, 2))))); _mm_stream_ps(out, t); } } _mm_sfence(); process_fastpath_apply_tonecurves(self, piece, ivoid, ovoid, roi_in, roi_out); } else { // fprintf(stderr,"Using xform codepath\n"); const __m128 outofgamutpixel = _mm_set_ps(0.0f, 1.0f, 1.0f, 0.0f); #ifdef _OPENMP #pragma omp parallel for schedule(static) default(none) #endif for(int k = 0; k < roi_out->height; k++) { const float *in = ((float *)ivoid) + (size_t)ch * k * roi_out->width; float *out = ((float *)ovoid) + (size_t)ch * k * roi_out->width; cmsDoTransform(d->xform, in, out, roi_out->width); if(gamutcheck) { for(int j = 0; j < roi_out->width; j++, out += 4) { const __m128 pixel = _mm_load_ps(out); __m128 ingamut = _mm_cmplt_ps(pixel, _mm_set_ps(-FLT_MAX, 0.0f, 0.0f, 0.0f)); ingamut = _mm_or_ps(_mm_unpacklo_ps(ingamut, ingamut), _mm_unpackhi_ps(ingamut, ingamut)); ingamut = _mm_or_ps(_mm_unpacklo_ps(ingamut, ingamut), _mm_unpackhi_ps(ingamut, ingamut)); const __m128 result = _mm_or_ps(_mm_and_ps(ingamut, outofgamutpixel), _mm_andnot_ps(ingamut, pixel)); _mm_stream_ps(out, result); } } } _mm_sfence(); } if(piece->pipe->mask_display) dt_iop_alpha_copy(ivoid, ovoid, roi_out->width, roi_out->height); }