/* Process the next piece of an ImageType 1 image. */ int gx_image1_plane_data(gx_image_enum_common_t * info, const gx_image_plane_t * planes, int height, int *rows_used) { gx_image_enum *penum = (gx_image_enum *) info; gx_device *dev; const int y = penum->y; int y_end = min(y + height, penum->rect.h); int width_spp = penum->rect.w * penum->spp; int num_planes = penum->num_planes; int num_components_per_plane = 1; #define BCOUNT(plane) /* bytes per data row */\ (((penum->rect.w + (plane).data_x) * penum->spp * penum->bps / num_planes\ + 7) >> 3) fixed adjust = penum->adjust; ulong offsets[GS_IMAGE_MAX_COMPONENTS]; int ignore_data_x; bool bit_planar = penum->num_planes > penum->spp; int code; if (height == 0) { *rows_used = 0; return 0; } dev = setup_image_device(penum); /* Now render complete rows. */ if (penum->used.y) { /* * Processing was interrupted by an error. Skip over rows * already processed. */ int px; for (px = 0; px < num_planes; ++px) offsets[px] = planes[px].raster * penum->used.y; penum->used.y = 0; } else memset(offsets, 0, num_planes * sizeof(offsets[0])); if (num_planes == 1 && penum->plane_depths[0] != penum->bps) { /* A single plane with multiple components. */ num_components_per_plane = penum->plane_depths[0] / penum->bps; } for (; penum->y < y_end; penum->y++) { int px; const byte *buffer; int sourcex; int x_used = penum->used.x; if (bit_planar) { /* Repack the bit planes into byte-wide samples. */ buffer = penum->buffer; sourcex = 0; for (px = 0; px < num_planes; px += penum->bps) repack_bit_planes(planes, offsets, penum->bps, penum->buffer, penum->rect.w, &penum->map[px].table, penum->spread); for (px = 0; px < num_planes; ++px) offsets[px] += planes[px].raster; } else { /* * Normally, we unpack the data into the buffer, but if * there is only one plane and we don't need to expand the * input samples, we may use the data directly. */ sourcex = planes[0].data_x; buffer = (*penum->unpack)(penum->buffer, &sourcex, planes[0].data + offsets[0], planes[0].data_x, BCOUNT(planes[0]), &penum->map[0], penum->spread, num_components_per_plane); offsets[0] += planes[0].raster; for (px = 1; px < num_planes; ++px) { (*penum->unpack)(penum->buffer + (px << penum->log2_xbytes), &ignore_data_x, planes[px].data + offsets[px], planes[px].data_x, BCOUNT(planes[px]), &penum->map[px], penum->spread, 1); offsets[px] += planes[px].raster; } } #ifdef DEBUG if (gs_debug_c('b')) dprintf1("[b]image1 y=%d\n", y); if (gs_debug_c('B')) { int i, n = width_spp; if (penum->bps > 8) n *= 2; else if (penum->bps == 1 && penum->unpack_bps == 8) n = (n + 7) / 8; dlputs("[B]row:"); for (i = 0; i < n; i++) dprintf1(" %02x", buffer[i]); dputs("\n"); } #endif penum->cur.x = dda_current(penum->dda.row.x); dda_next(penum->dda.row.x); penum->cur.y = dda_current(penum->dda.row.y); dda_next(penum->dda.row.y); if (!penum->interpolate) switch (penum->posture) { case image_portrait: { /* Precompute integer y and height, */ /* and check for clipping. */ fixed yc = penum->cur.y, yn = dda_current(penum->dda.row.y); if (yn < yc) { fixed temp = yn; yn = yc; yc = temp; } yc -= adjust; if (yc >= penum->clip_outer.q.y) goto mt; yn += adjust; if (yn <= penum->clip_outer.p.y) goto mt; penum->yci = fixed2int_pixround_perfect(yc); penum->hci = fixed2int_pixround_perfect(yn) - penum->yci; if (penum->hci == 0) goto mt; if_debug2('b', "[b]yci=%d, hci=%d\n", penum->yci, penum->hci); } break; case image_landscape: { /* Check for no pixel centers in x. */ fixed xc = penum->cur.x, xn = dda_current(penum->dda.row.x); if (xn < xc) { fixed temp = xn; xn = xc; xc = temp; } xc -= adjust; if (xc >= penum->clip_outer.q.x) goto mt; xn += adjust; if (xn <= penum->clip_outer.p.x) goto mt; penum->xci = fixed2int_pixround_perfect(xc); penum->wci = fixed2int_pixround_perfect(xn) - penum->xci; if (penum->wci == 0) goto mt; if_debug2('b', "[b]xci=%d, wci=%d\n", penum->xci, penum->wci); } break; case image_skewed: ; } update_strip(penum); if (x_used) { /* * Processing was interrupted by an error. Skip over pixels * already processed. */ dda_advance(penum->dda.pixel0.x, x_used); dda_advance(penum->dda.pixel0.y, x_used); penum->used.x = 0; } if_debug2('b', "[b]pixel0 x=%g, y=%g\n", fixed2float(dda_current(penum->dda.pixel0.x)), fixed2float(dda_current(penum->dda.pixel0.y))); code = (*penum->render)(penum, buffer, sourcex + x_used, width_spp - x_used * penum->spp, 1, dev); if (code < 0) { /* Error or interrupt, restore original state. */ penum->used.x += x_used; if (!penum->used.y) { dda_previous(penum->dda.row.x); dda_previous(penum->dda.row.y); dda_translate(penum->dda.strip.x, penum->prev.x - penum->cur.x); dda_translate(penum->dda.strip.y, penum->prev.y - penum->cur.y); } goto out; } penum->prev = penum->cur; mt:; } if (penum->y < penum->rect.h) { code = 0; } else { /* End of input data. Render any left-over buffered data. */ code = gx_image1_flush(info); if (code >= 0) code = 1; } out: /* Note that caller must call end_image */ /* for both error and normal termination. */ *rows_used = penum->y - y; return code; }
int gxht_thresh_image_init(gx_image_enum *penum) { int code = 0; fixed ox, oy; int temp; int dev_width, max_height; int spp_out; int k; gx_ht_order *d_order; if (gx_device_must_halftone(penum->dev)) { if (penum->pis != NULL && penum->pis->dev_ht != NULL) { for (k = 0; k < penum->pis->dev_ht->num_comp; k++) { d_order = &(penum->pis->dev_ht->components[k].corder); code = gx_ht_construct_threshold(d_order, penum->dev, penum->pis, k); if (code < 0 ) { return gs_rethrow(code, "threshold creation failed"); } } } else { return -1; } } spp_out = penum->dev->color_info.num_components; /* If the image is landscaped then we want to maintain a buffer that is sufficiently large so that we can hold a byte of halftoned data along the column. This way we avoid doing multiple writes into the same position over and over. The size of the buffer we need depends upon the bitdepth of the output device, the number of device coloranants and the number of colorants in the source space. Note we will need to eventually consider multi-level halftone case here too. For now, to make use of the SSE2 stuff, we would like to have 16 bytes of data to process at a time. So we will collect the columns of data in a buffer that is 16 wide. We will also keep track of the widths of each column. When the total width count reaches 16, we will create our threshold array and apply it. We may have one column that is buffered between calls in this case. Also if a call is made with h=0 we will flush the buffer as we are at the end of the data. */ if (penum->posture == image_landscape) { int col_length = fixed2int_var_rounded(any_abs(penum->x_extent.y)) * spp_out; ox = dda_current(penum->dda.pixel0.x); oy = dda_current(penum->dda.pixel0.y); temp = (int) ceil((float) col_length/16.0); penum->line_size = temp * 16; /* The stride */ /* Now we need at most 16 of these */ penum->line = gs_alloc_bytes(penum->memory, 16 * penum->line_size + 16, "gxht_thresh"); /* Same with this */ penum->thresh_buffer = gs_alloc_bytes(penum->memory, penum->line_size * 16 + 16, "gxht_thresh"); /* That maps into 2 bytes of Halftone data */ penum->ht_buffer = gs_alloc_bytes(penum->memory, penum->line_size * 2, "gxht_thresh"); penum->ht_stride = penum->line_size; if (penum->line == NULL || penum->thresh_buffer == NULL || penum->ht_buffer == NULL) return -1; penum->ht_landscape.count = 0; penum->ht_landscape.num_contones = 0; if (penum->y_extent.x < 0) { /* Going right to left */ penum->ht_landscape.curr_pos = 15; penum->ht_landscape.index = -1; } else { /* Going left to right */ penum->ht_landscape.curr_pos = 0; penum->ht_landscape.index = 1; } if (penum->x_extent.y < 0) { penum->ht_landscape.flipy = true; penum->ht_landscape.y_pos = fixed2int_pixround_perfect(dda_current(penum->dda.pixel0.y) + penum->x_extent.y); } else { penum->ht_landscape.flipy = false; penum->ht_landscape.y_pos = fixed2int_pixround_perfect(dda_current(penum->dda.pixel0.y)); } memset(&(penum->ht_landscape.widths[0]), 0, sizeof(int)*16); penum->ht_landscape.offset_set = false; penum->ht_offset_bits = 0; /* Will get set in call to render */ if (code >= 0) { #if defined(DEBUG) || defined(PACIFY_VALGRIND) memset(penum->line, 0, 16 * penum->line_size + 16); memset(penum->ht_buffer, 0, penum->line_size * 2); memset(penum->thresh_buffer, 0, 16 * penum->line_size + 16); #endif } } else { /* In the portrait case we allocate a single line buffer in device width, a threshold buffer of the same size and possibly wider and the buffer for the halftoned bits. We have to do a bit of work to enable 16 byte boundary after an offset to ensure that we can make use of the SSE2 operations for thresholding. We do the allocations now to avoid doing them with every line */ /* Initialize the ht_landscape stuff to zero */ memset(&(penum->ht_landscape), 0, sizeof(ht_landscape_info_t)); ox = dda_current(penum->dda.pixel0.x); oy = dda_current(penum->dda.pixel0.y); dev_width = (int) fabs((long) fixed2long_pixround(ox + penum->x_extent.x) - fixed2long_pixround(ox)); /* Get the bit position so that we can do a copy_mono for the left remainder and then 16 bit aligned copies for the rest. The right remainder will be OK as it will land in the MSBit positions. Note the #define chunk bits16 in gdevm1.c. Allow also for a 15 sample over run. */ penum->ht_offset_bits = (-fixed2int_var_pixround(ox)) & 15; if (penum->ht_offset_bits > 0) { penum->ht_stride = ((7 + (dev_width + 4)) / 8) + ARCH_SIZEOF_LONG; } else { penum->ht_stride = ((7 + (dev_width + 2)) / 8) + ARCH_SIZEOF_LONG; } /* We want to figure out the maximum height that we may have in taking a single source row and going to device space */ max_height = (int) ceil(fixed2float(any_abs(penum->dst_height)) / (float) penum->Height); penum->ht_buffer = gs_alloc_bytes(penum->memory, penum->ht_stride * max_height * spp_out, "gxht_thresh"); /* We want to have 128 bit alignement for our contone and threshold strips so that we can use SSE operations in the threshold operation. Add in a minor buffer and offset to ensure this. If gs_alloc_bytes provides at least 16 bit alignment so we may need to move 14 bytes. However, the HT process is split in two operations. One that involves the HT of a left remainder and the rest which ensures that we pack in the HT data in the bits with no skew for a fast copy into the gdevm1 device (16 bit copies). So, we need to account for those pixels which occur first and which are NOT aligned for the contone buffer. After we offset by this remainder portion we should be 128 bit aligned. Also allow a 15 sample over run during the execution. */ temp = (int) ceil((float) ((dev_width + 15.0) + 15.0)/16.0); penum->line_size = temp * 16; /* The stride */ penum->line = gs_alloc_bytes(penum->memory, penum->line_size * spp_out, "gxht_thresh"); penum->thresh_buffer = gs_alloc_bytes(penum->memory, penum->line_size * max_height * spp_out, "gxht_thresh"); if (penum->line == NULL || penum->thresh_buffer == NULL || penum->ht_buffer == NULL) { return -1; } else { #if defined(DEBUG) || defined(PACIFY_VALGRIND) memset(penum->line, 0, penum->line_size * spp_out); memset(penum->ht_buffer, 0, penum->ht_stride * max_height * spp_out); memset(penum->thresh_buffer, 0, penum->line_size * max_height * spp_out); #endif } } /* Precompute values needed for rasterizing. */ penum->dxx = float2fixed(penum->matrix.xx + fixed2float(fixed_epsilon) / 2); return code; }