// static
nsresult
nsUConvPropertySearch::SearchPropertyValue(const char* aProperties[][3],
                                           int32_t aNumberOfProperties,
                                           const nsACString& aKey,
                                           nsACString& aValue)
{
  const char* key = PromiseFlatCString(aKey).get();
  int32_t lo = 0;
  int32_t hi = aNumberOfProperties - 1;
  while (lo <= hi) {
    uint32_t mid = (lo + hi) / 2;
    int32_t comp = nsCRT::strcmp(aProperties[mid][0], key);
    if (comp > 0) {
      hi = mid - 1;
    } else if (comp < 0) {
      lo = mid + 1;
    } else {
      nsDependentCString val(aProperties[mid][1],
                             NS_PTR_TO_UINT32(aProperties[mid][2]));
      aValue.Assign(val);
      return NS_OK;
    }
  }
  aValue.Truncate();
  return NS_ERROR_FAILURE;
}
Beispiel #2
0
static void *
XPT_HashTableLookup(XPTHashTable *table, void *key) {
    XPTHashRecord *bucket = table->buckets[NS_PTR_TO_UINT32(key) % XPT_HASHSIZE];
    while (bucket != NULL) {
        if (bucket->key == key)
            return bucket->value;
        bucket = bucket->next;
    }
    return NULL;
}
Beispiel #3
0
/*static*/ PLDHashNumber
nsSMILCompositor::HashKey(KeyTypePointer aKey)
{
  // Combine the 3 values into one numeric value, which will be hashed.
  // NOTE: We right-shift one of the pointers by 2 to get some randomness in
  // its 2 lowest-order bits. (Those shifted-off bits will always be 0 since
  // our pointers will be word-aligned.)
  return (NS_PTR_TO_UINT32(aKey->mElement.get()) >> 2) +
    NS_PTR_TO_UINT32(aKey->mAttributeName.get());
}
Beispiel #4
0
static void *
XPT_HashTableAdd(XPTHashTable *table, void *key, void *value) {
    XPTHashRecord **bucketloc = table->buckets +
        (NS_PTR_TO_UINT32(key) % XPT_HASHSIZE);
    XPTHashRecord *bucket;

    while (*bucketloc != NULL)
        bucketloc = &((*bucketloc)->next);

    bucket = XPT_NEW(table->arena, XPTHashRecord);
    bucket->key = key;
    bucket->value = value;
    bucket->next = NULL;
    *bucketloc = bucket;
    return value;
}
//--------------------------------------------------------------
NS_IMETHODIMP nsCharsetAlias2::GetPreferred(const nsACString& aAlias,
                                            nsACString& oResult)
{
   if (aAlias.IsEmpty()) return NS_ERROR_NULL_POINTER;
   NS_TIMELINE_START_TIMER("nsCharsetAlias2:GetPreferred");


   // Delay loading charsetalias.properties by hardcoding the most
   // frequent aliases.  Note that it's possible to recur in to this
   // function *while loading* charsetalias.properties (see bug 190951),
   // so we might have an |mDelegate| already that isn't valid yet, but
   // the load is guaranteed to be "UTF-8" so things will be OK.
   for (PRUint32 index = 0; index < NS_ARRAY_LENGTH(kAliases); index++) {
     if (aAlias.LowerCaseEqualsASCII(kAliases[index][0])) {
       oResult.Assign(nsDependentCString(kAliases[index][1],
                                         NS_PTR_TO_UINT32(kAliases[index][2])));
       NS_TIMELINE_STOP_TIMER("nsCharsetAlias2:GetPreferred");
       return NS_OK;
     }
   }

   oResult.Truncate();

   if(!mDelegate) {
     //load charsetalias.properties string bundle with all remaining aliases
     // we may need to protect the following section with a lock so we won't call the
     // 'new nsGREResProperties' from two different threads
     mDelegate = new nsGREResProperties( NS_LITERAL_CSTRING("charsetalias.properties") );
     NS_ASSERTION(mDelegate, "cannot create nsGREResProperties");
     if(nsnull == mDelegate)
       return NS_ERROR_OUT_OF_MEMORY;
   }

   NS_TIMELINE_STOP_TIMER("nsCharsetAlias2:GetPreferred");
   NS_TIMELINE_MARK_TIMER("nsCharsetAlias2:GetPreferred");

   nsCAutoString key(aAlias);
   ToLowerCase(key);

   // hack for now, have to fix nsGREResProperties, but we can't until
   // string bundles use UTF8 keys
   nsAutoString result;
   nsresult rv = mDelegate->Get(NS_ConvertASCIItoUTF16(key), result);
   LossyAppendUTF16toASCII(result, oResult);
   return rv;
}
Beispiel #6
0
/// Expand the colormap from RGB to Packed ARGB as needed by Cairo.
/// And apply any LCMS transformation.
static void
ConvertColormap(uint32_t* aColormap, uint32_t aColors)
{
  // Apply CMS transformation if enabled and available
  if (gfxPlatform::GetCMSMode() == eCMSMode_All) {
    qcms_transform* transform = gfxPlatform::GetCMSRGBTransform();
    if (transform) {
      qcms_transform_data(transform, aColormap, aColormap, aColors);
    }
  }
  // Convert from the GIF's RGB format to the Cairo format.
  // Work from end to begin, because of the in-place expansion
  uint8_t* from = ((uint8_t*)aColormap) + 3 * aColors;
  uint32_t* to = aColormap + aColors;

  // Convert color entries to Cairo format

  // set up for loops below
  if (!aColors) {
    return;
  }
  uint32_t c = aColors;

  // copy as bytes until source pointer is 32-bit-aligned
  // NB: can't use 32-bit reads, they might read off the end of the buffer
  for (; (NS_PTR_TO_UINT32(from) & 0x3) && c; --c) {
    from -= 3;
    *--to = gfxPackedPixel(0xFF, from[0], from[1], from[2]);
  }

  // bulk copy of pixels.
  while (c >= 4) {
    from -= 12;
    to   -=  4;
    c    -=  4;
    GFX_BLOCK_RGB_TO_FRGB(from,to);
  }

  // copy remaining pixel(s)
  // NB: can't use 32-bit reads, they might read off the end of the buffer
  while (c--) {
    from -= 3;
    *--to = gfxPackedPixel(0xFF, from[0], from[1], from[2]);
  }
}
void
nsJPEGDecoder::OutputScanlines(bool* suspend)
{
  *suspend = false;

  const uint32_t top = mInfo.output_scanline;

  while ((mInfo.output_scanline < mInfo.output_height)) {
      // Use the Cairo image buffer as scanline buffer
      uint32_t* imageRow = ((uint32_t*)mImageData) +
                           (mInfo.output_scanline * mInfo.output_width);

      if (mInfo.out_color_space == MOZ_JCS_EXT_NATIVE_ENDIAN_XRGB) {
        // Special case: scanline will be directly converted into packed ARGB
        if (jpeg_read_scanlines(&mInfo, (JSAMPARRAY)&imageRow, 1) != 1) {
          *suspend = true; // suspend
          break;
        }
        continue; // all done for this row!
      }

      JSAMPROW sampleRow = (JSAMPROW)imageRow;
      if (mInfo.output_components == 3) {
        // Put the pixels at end of row to enable in-place expansion
        sampleRow += mInfo.output_width;
      }

      // Request one scanline.  Returns 0 or 1 scanlines.
      if (jpeg_read_scanlines(&mInfo, &sampleRow, 1) != 1) {
        *suspend = true; // suspend
        break;
      }

      if (mTransform) {
        JSAMPROW source = sampleRow;
        if (mInfo.out_color_space == JCS_GRAYSCALE) {
          // Convert from the 1byte grey pixels at begin of row
          // to the 3byte RGB byte pixels at 'end' of row
          sampleRow += mInfo.output_width;
        }
        qcms_transform_data(mTransform, source, sampleRow, mInfo.output_width);
        // Move 3byte RGB data to end of row
        if (mInfo.out_color_space == JCS_CMYK) {
          memmove(sampleRow + mInfo.output_width,
                  sampleRow,
                  3 * mInfo.output_width);
          sampleRow += mInfo.output_width;
        }
      } else {
        if (mInfo.out_color_space == JCS_CMYK) {
          // Convert from CMYK to RGB
          // We cannot convert directly to Cairo, as the CMSRGBTransform
          // may wants to do a RGB transform...
          // Would be better to have platform CMSenabled transformation
          // from CMYK to (A)RGB...
          cmyk_convert_rgb((JSAMPROW)imageRow, mInfo.output_width);
          sampleRow += mInfo.output_width;
        }
        if (mCMSMode == eCMSMode_All) {
          // No embedded ICC profile - treat as sRGB
          qcms_transform* transform = gfxPlatform::GetCMSRGBTransform();
          if (transform) {
            qcms_transform_data(transform, sampleRow, sampleRow,
                                mInfo.output_width);
          }
        }
      }

      // counter for while() loops below
      uint32_t idx = mInfo.output_width;

      // copy as bytes until source pointer is 32-bit-aligned
      for (; (NS_PTR_TO_UINT32(sampleRow) & 0x3) && idx; --idx) {
        *imageRow++ = gfxPackedPixel(0xFF, sampleRow[0], sampleRow[1],
                                     sampleRow[2]);
        sampleRow += 3;
      }

      // copy pixels in blocks of 4
      while (idx >= 4) {
        GFX_BLOCK_RGB_TO_FRGB(sampleRow, imageRow);
        idx       -=  4;
        sampleRow += 12;
        imageRow  +=  4;
      }

      // copy remaining pixel(s)
      while (idx--) {
        // 32-bit read of final pixel will exceed buffer, so read bytes
        *imageRow++ = gfxPackedPixel(0xFF, sampleRow[0], sampleRow[1],
                                     sampleRow[2]);
        sampleRow += 3;
      }
  }

  if (top != mInfo.output_scanline) {
      nsIntRect r(0, top, mInfo.output_width, mInfo.output_scanline-top);
      PostInvalidation(r);
  }

}
PRBool
gfxAlphaRecovery::RecoverAlphaSSE2(gfxImageSurface* blackSurf,
                                   const gfxImageSurface* whiteSurf)
{
    gfxIntSize size = blackSurf->GetSize();

    if (size != whiteSurf->GetSize() ||
            (blackSurf->Format() != gfxASurface::ImageFormatARGB32 &&
             blackSurf->Format() != gfxASurface::ImageFormatRGB24) ||
            (whiteSurf->Format() != gfxASurface::ImageFormatARGB32 &&
             whiteSurf->Format() != gfxASurface::ImageFormatRGB24))
        return PR_FALSE;

    blackSurf->Flush();
    whiteSurf->Flush();

    unsigned char* blackData = blackSurf->Data();
    unsigned char* whiteData = whiteSurf->Data();

    if ((NS_PTR_TO_UINT32(blackData) & 0xf) != (NS_PTR_TO_UINT32(whiteData) & 0xf) ||
            (blackSurf->Stride() - whiteSurf->Stride()) & 0xf) {
        // Cannot keep these in alignment.
        return PR_FALSE;
    }

    __m128i greenMask = _mm_load_si128((__m128i*)greenMaski);
    __m128i alphaMask = _mm_load_si128((__m128i*)alphaMaski);

    for (PRInt32 i = 0; i < size.height; ++i) {
        PRInt32 j = 0;
        // Loop single pixels until at 4 byte alignment.
        while (NS_PTR_TO_UINT32(blackData) & 0xf && j < size.width) {
            *((PRUint32*)blackData) =
                RecoverPixel(*reinterpret_cast<PRUint32*>(blackData),
                             *reinterpret_cast<PRUint32*>(whiteData));
            blackData += 4;
            whiteData += 4;
            j++;
        }
        // This extra loop allows the compiler to do some more clever registry
        // management and makes it about 5% faster than with only the 4 pixel
        // at a time loop.
        for (; j < size.width - 8; j += 8) {
            __m128i black1 = _mm_load_si128((__m128i*)blackData);
            __m128i white1 = _mm_load_si128((__m128i*)whiteData);
            __m128i black2 = _mm_load_si128((__m128i*)(blackData + 16));
            __m128i white2 = _mm_load_si128((__m128i*)(whiteData + 16));

            // Execute the same instructions as described in RecoverPixel, only
            // using an SSE2 packed saturated subtract.
            white1 = _mm_subs_epu8(white1, black1);
            white2 = _mm_subs_epu8(white2, black2);
            white1 = _mm_subs_epu8(greenMask, white1);
            white2 = _mm_subs_epu8(greenMask, white2);
            // Producing the final black pixel in an XMM register and storing
            // that is actually faster than doing a masked store since that
            // does an unaligned storage. We have the black pixel in a register
            // anyway.
            black1 = _mm_andnot_si128(alphaMask, black1);
            black2 = _mm_andnot_si128(alphaMask, black2);
            white1 = _mm_slli_si128(white1, 2);
            white2 = _mm_slli_si128(white2, 2);
            white1 = _mm_and_si128(alphaMask, white1);
            white2 = _mm_and_si128(alphaMask, white2);
            black1 = _mm_or_si128(white1, black1);
            black2 = _mm_or_si128(white2, black2);

            _mm_store_si128((__m128i*)blackData, black1);
            _mm_store_si128((__m128i*)(blackData + 16), black2);
            blackData += 32;
            whiteData += 32;
        }
        for (; j < size.width - 4; j += 4) {
            __m128i black = _mm_load_si128((__m128i*)blackData);
            __m128i white = _mm_load_si128((__m128i*)whiteData);

            white = _mm_subs_epu8(white, black);
            white = _mm_subs_epu8(greenMask, white);
            black = _mm_andnot_si128(alphaMask, black);
            white = _mm_slli_si128(white, 2);
            white = _mm_and_si128(alphaMask, white);
            black = _mm_or_si128(white, black);
            _mm_store_si128((__m128i*)blackData, black);
            blackData += 16;
            whiteData += 16;
        }
        // Loop single pixels until we're done.
        while (j < size.width) {
            *((PRUint32*)blackData) =
                RecoverPixel(*reinterpret_cast<PRUint32*>(blackData),
                             *reinterpret_cast<PRUint32*>(whiteData));
            blackData += 4;
            whiteData += 4;
            j++;
        }
        blackData += blackSurf->Stride() - j * 4;
        whiteData += whiteSurf->Stride() - j * 4;
    }

    blackSurf->MarkDirty();

    return PR_TRUE;
}
Beispiel #9
0
XPT_GetOffsetForAddr(XPTCursor *cursor, void *addr)
{
    XPTHashTable *table = cursor->state->pool->offset_map;
    return NS_PTR_TO_UINT32(XPT_HashTableLookup(table, addr));
}
Beispiel #10
0
void
nsPNGDecoder::row_callback(png_structp png_ptr, png_bytep new_row,
                           png_uint_32 row_num, int pass)
{
  /* libpng comments:
   *
   * this function is called for every row in the image.  If the
   * image is interlacing, and you turned on the interlace handler,
   * this function will be called for every row in every pass.
   * Some of these rows will not be changed from the previous pass.
   * When the row is not changed, the new_row variable will be
   * nullptr. The rows and passes are called in order, so you don't
   * really need the row_num and pass, but I'm supplying them
   * because it may make your life easier.
   *
   * For the non-nullptr rows of interlaced images, you must call
   * png_progressive_combine_row() passing in the row and the
   * old row.  You can call this function for nullptr rows (it will
   * just return) and for non-interlaced images (it just does the
   * memcpy for you) if it will make the code easier.  Thus, you
   * can just do this for all cases:
   *
   *    png_progressive_combine_row(png_ptr, old_row, new_row);
   *
   * where old_row is what was displayed for previous rows.  Note
   * that the first pass (pass == 0 really) will completely cover
   * the old row, so the rows do not have to be initialized.  After
   * the first pass (and only for interlaced images), you will have
   * to pass the current row, and the function will combine the
   * old row and the new row.
   */
  nsPNGDecoder* decoder =
               static_cast<nsPNGDecoder*>(png_get_progressive_ptr(png_ptr));

  // skip this frame
  if (decoder->mFrameIsHidden) {
    return;
  }

  if (row_num >= (png_uint_32) decoder->mFrameRect.height) {
    return;
  }

  if (new_row) {
    int32_t width = decoder->mFrameRect.width;
    uint32_t iwidth = decoder->mFrameRect.width;

    png_bytep line = new_row;
    if (decoder->interlacebuf) {
      line = decoder->interlacebuf + (row_num * decoder->mChannels * width);
      png_progressive_combine_row(png_ptr, line, new_row);
    }

    uint32_t bpr = width * sizeof(uint32_t);
    uint32_t* cptr32 = (uint32_t*)(decoder->mImageData + (row_num*bpr));

    if (decoder->mTransform) {
      if (decoder->mCMSLine) {
        qcms_transform_data(decoder->mTransform, line, decoder->mCMSLine,
                            iwidth);
        // copy alpha over
        uint32_t channels = decoder->mChannels;
        if (channels == 2 || channels == 4) {
          for (uint32_t i = 0; i < iwidth; i++)
            decoder->mCMSLine[4 * i + 3] = line[channels * i + channels - 1];
        }
        line = decoder->mCMSLine;
      } else {
        qcms_transform_data(decoder->mTransform, line, line, iwidth);
      }
    }

    switch (decoder->format) {
      case gfx::SurfaceFormat::B8G8R8X8: {
        // counter for while() loops below
        uint32_t idx = iwidth;

        // copy as bytes until source pointer is 32-bit-aligned
        for (; (NS_PTR_TO_UINT32(line) & 0x3) && idx; --idx) {
          *cptr32++ = gfxPackedPixel(0xFF, line[0], line[1], line[2]);
          line += 3;
        }

        // copy pixels in blocks of 4
        while (idx >= 4) {
          GFX_BLOCK_RGB_TO_FRGB(line, cptr32);
          idx    -=  4;
          line   += 12;
          cptr32 +=  4;
        }

        // copy remaining pixel(s)
        while (idx--) {
          // 32-bit read of final pixel will exceed buffer, so read bytes
          *cptr32++ = gfxPackedPixel(0xFF, line[0], line[1], line[2]);
          line += 3;
        }
      }
      break;
      case gfx::SurfaceFormat::B8G8R8A8: {
        if (!decoder->mDisablePremultipliedAlpha) {
          for (uint32_t x=width; x>0; --x) {
            *cptr32++ = gfxPackedPixel(line[3], line[0], line[1], line[2]);
            line += 4;
          }
        } else {
          for (uint32_t x=width; x>0; --x) {
            *cptr32++ = gfxPackedPixelNoPreMultiply(line[3], line[0], line[1],
                                                    line[2]);
            line += 4;
          }
        }
      }
      break;
      default:
        png_longjmp(decoder->mPNG, 1);
    }

    if (decoder->mNumFrames <= 1) {
      // Only do incremental image display for the first frame
      // XXXbholley - this check should be handled in the superclass
      nsIntRect r(0, row_num, width, 1);
      decoder->PostInvalidation(r);
    }
  }
}
void
LossyConvertEncoding8to16::write_vmx(const char* aSource,
                                      uint32_t aSourceLength)
{
  char16_t *dest = mDestination;

  // Align dest destination to a 16-byte boundary.  We choose to align dest rather than
  // source because we can store neither safely nor fast to unaligned addresses.
  // We must use unsigned datatypes because aSourceLength is unsigned.
  uint32_t i = 0;
  uint32_t alignLen = XPCOM_MIN<uint32_t>(aSourceLength, uint32_t(-NS_PTR_TO_INT32(dest) & 0xf) / sizeof(char16_t));
  // subtraction result can underflow if aSourceLength < alignLen!!!
  // check for underflow
  if (aSourceLength >= alignLen && aSourceLength - alignLen > 31) {
    for (; i < alignLen; i++) {
      dest[i] = static_cast<unsigned char>(aSource[i]);
    }

    // maxIndex can underflow if aSourceLength < 33!!!
    uint32_t maxIndex = aSourceLength - 33;

    // check for underflow
    if (maxIndex <= aSourceLength && i < maxIndex) {
      const char *aOurSource = &aSource[i];
      char16_t *aOurDest = &dest[i];
      register const vector unsigned char zeroes = vec_splat_u8( 0 );
      register vector unsigned char source1, source2, lo1, hi1, lo2, hi2;
      if ((NS_PTR_TO_UINT32(aOurSource) & 15) == 0) {
        // Walk 32 bytes (two VMX registers) at a time.
        while (1) {
          source1 = vec_ld(0, (unsigned char *)aOurSource);
          source2 = vec_ld(16, (unsigned char *)aOurSource);

          // Interleave 0s in with the bytes of source to create lo and hi.
          // store lo and hi into dest.
          hi1 = vec_mergeh(zeroes, source1);
          lo1 = vec_mergel(zeroes, source1);
          hi2 = vec_mergeh(zeroes, source2);
          lo2 = vec_mergel(zeroes, source2);

          vec_st(hi1, 0, (unsigned char *)aOurDest);
          vec_st(lo1, 16, (unsigned char *)aOurDest);
          vec_st(hi2, 32, (unsigned char *)aOurDest);
          vec_st(lo2, 48, (unsigned char *)aOurDest);

          i += 32;
          if (i > maxIndex)
            break;
          aOurSource += 32;
          aOurDest += 32;
        }
      }
      else  {
        register vector unsigned char mask = vec_lvsl(0, (unsigned char *)aOurSource);
        register vector unsigned char vector1  = vec_ld(0, (unsigned char *)aOurSource);
        register vector unsigned char vector2;
        // Walk 32 bytes (two VMX registers) at a time.
        while (1) {
          LoadUnaligned(source1, 0, (unsigned char *)aOurSource, vector1, vector2, mask);
          LoadUnaligned(source2, 16, (unsigned char *)aOurSource, vector2, vector1, mask);

          // Interleave 0s in with the bytes of source to create lo and hi.
          // store lo and hi into dest.
          hi1 = vec_mergeh(zeroes, source1);
          lo1 = vec_mergel(zeroes, source1);
          hi2 = vec_mergeh(zeroes, source2);
          lo2 = vec_mergel(zeroes, source2);

          vec_st(hi1, 0, (unsigned char *)aOurDest);
          vec_st(lo1, 16, (unsigned char *)aOurDest);
          vec_st(hi2, 32, (unsigned char *)aOurDest);
          vec_st(lo2, 48, (unsigned char *)aOurDest);

          i += 32;
          if (i > maxIndex)
            break;
          aOurSource += 32;
          aOurDest += 32;
        }
      }
    }
  }

  // Finish up whatever's left.
  for (; i < aSourceLength; i++) {
    dest[i] = static_cast<unsigned char>(aSource[i]);
  }

  mDestination += i;
}
void
LossyConvertEncoding16to8::write_vmx(const char16_t* aSource,
                                      uint32_t aSourceLength)
{
  char* dest = mDestination;

  // Align destination to a 16-byte boundary.
  // We must use unsigned datatypes because aSourceLength is unsigned.
  uint32_t i = 0;
  uint32_t alignLen = XPCOM_MIN(aSourceLength, uint32_t(-NS_PTR_TO_INT32(dest) & 0xf));
  // subtraction result can underflow if aSourceLength < alignLen!!!
  // check for underflow
  if (aSourceLength >= alignLen && aSourceLength - alignLen > 31) {
    for (; i < alignLen; i++) {
      dest[i] = static_cast<unsigned char>(aSource[i]);
    }

    // maxIndex can underflow if aSourceLength < 33!!!
    uint32_t maxIndex = aSourceLength - 33;

    // check for underflow
    if (maxIndex <= aSourceLength && i < maxIndex) {
      const char16_t *aOurSource = &aSource[i];
      char *aOurDest = &dest[i];
      register vector unsigned char packed1, packed2;
      register vector unsigned short source1, source2, source3, source4;
      if ((NS_PTR_TO_UINT32(aOurSource) & 15) == 0) {
        // Walk 64 bytes (four VMX registers) at a time.
        while (1) {
          source1 = vec_ld(0, (unsigned short *)aOurSource);
          source2 = vec_ld(16, (unsigned short *)aOurSource);
          source3 = vec_ld(32, (unsigned short *)aOurSource);
          source4 = vec_ld(48, (unsigned short *)aOurSource);
          packed1 = vec_packsu(source1, source2);
          packed2 = vec_packsu(source3, source4);
          vec_st(packed1, 0, (unsigned char *)aOurDest);
          vec_st(packed2, 16, (unsigned char *)aOurDest);
          i += 32;
          if(i > maxIndex)
           break;
          aOurDest += 32;
          aOurSource += 32;
        }
      }
      else {
        register vector unsigned char mask = vec_lvsl(0, (unsigned short *)aOurSource);
        register vector unsigned short vector1  = vec_ld(0, (unsigned short *)aOurSource);
        register vector unsigned short vector2;
        // Walk 64 bytes (four VMX registers) at a time.
        while (1) {
          LoadUnaligned(source1, 0, (unsigned short *)aOurSource, vector1, vector2, mask);
          LoadUnaligned(source2, 16, (unsigned short *)aOurSource, vector2, vector1, mask);
          LoadUnaligned(source3, 32, (unsigned short *)aOurSource, vector1, vector2, mask);
          LoadUnaligned(source4, 48, (unsigned short *)aOurSource, vector2, vector1, mask);
          packed1 = vec_packsu(source1, source2);
          packed2 = vec_packsu(source3, source4);
          vec_st(packed1, 0, (unsigned char *)aOurDest);
          vec_st(packed2, 16, (unsigned char *)aOurDest);
          i += 32;
          if(i > maxIndex)
            break;
          aOurDest += 32;
          aOurSource += 32;
        }
      }
    }
  }

  // Finish up the rest.
  for (; i < aSourceLength; i++) {
    dest[i] = static_cast<unsigned char>(aSource[i]);
  }

  mDestination += i;
}