PsychError SCREENMakeTexture(void) 
{
    size_t								ix, iters;
    PsychWindowRecordType				*textureRecord;
    PsychWindowRecordType				*windowRecord;
    PsychRectType						rect;
    psych_bool							isImageMatrixBytes, isImageMatrixDoubles;
    int									numMatrixPlanes, xSize, ySize;
    unsigned char						*byteMatrix;
    double								*doubleMatrix;
    GLuint								*texturePointer;
    GLubyte								*texturePointer_b;
	GLfloat								*texturePointer_f;
    double								*rp, *gp, *bp, *ap;
    GLubyte								*rpb, *gpb, *bpb, *apb;
    int									usepoweroftwo, usefloatformat, assume_texorientation, textureShader;
    double								optimized_orientation;
    psych_bool							bigendian;
	psych_bool							planar_storage = FALSE;

    // Detect endianity (byte-order) of machine:
    ix=255;
    rpb=(GLubyte*) &ix;
    bigendian = ( *rpb == 255 ) ? FALSE : TRUE;
    ix = 0; rpb = NULL;

    if(PsychPrefStateGet_DebugMakeTexture())	//MARK #1
        StoreNowTime();
    
    //all subfunctions should have these two lines.  
    PsychPushHelp(useString, synopsisString, seeAlsoString);
    if(PsychIsGiveHelp()){PsychGiveHelp();return(PsychError_none);};
    
    //Get the window structure for the onscreen window.  It holds the onscreein GL context which we will need in the
    //final step when we copy the texture from system RAM onto the screen.
    PsychErrorExit(PsychCapNumInputArgs(7));   	
    PsychErrorExit(PsychRequireNumInputArgs(2)); 	
    PsychErrorExit(PsychCapNumOutputArgs(1)); 
    
    //get the window record from the window record argument and get info from the window record
    PsychAllocInWindowRecordArg(kPsychUseDefaultArgPosition, TRUE, &windowRecord);
    
    if((windowRecord->windowType!=kPsychDoubleBufferOnscreen) && (windowRecord->windowType!=kPsychSingleBufferOnscreen))
        PsychErrorExitMsg(PsychError_user, "MakeTexture called on something else than a onscreen window");
    
	// Get optional texture orientation flag:
	assume_texorientation = 0;
	PsychCopyInIntegerArg(6, FALSE, &assume_texorientation);
	
	// Get optional texture shader handle:
	textureShader = 0;
	PsychCopyInIntegerArg(7, FALSE, &textureShader);
	
    //get the argument and sanity check it.
    isImageMatrixBytes=PsychAllocInUnsignedByteMatArg(2, kPsychArgAnything, &ySize, &xSize, &numMatrixPlanes, &byteMatrix);
    isImageMatrixDoubles=PsychAllocInDoubleMatArg(2, kPsychArgAnything, &ySize, &xSize, &numMatrixPlanes, &doubleMatrix);
    if(numMatrixPlanes > 4)
        PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Specified image matrix exceeds maximum depth of 4 layers");
    if(ySize<1 || xSize <1)
        PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Specified image matrix must be at least 1 x 1 pixels in size");
    if(! (isImageMatrixBytes || isImageMatrixDoubles))
        PsychErrorExitMsg(PsychError_user, "Illegal argument type");  //not  likely. 

	// Is this a special image matrix which is already pre-transposed to fit our optimal format?
	if (assume_texorientation == 2) {
		// Yes. Swap xSize and ySize to take this into account:
		ix = (size_t) xSize;
		xSize = ySize;
		ySize = (int) ix;
		ix = 0;
	}

	// Build defining rect for this texture:
    PsychMakeRect(rect, 0, 0, xSize, ySize);

    // Copy in texture preferred draw orientation hint. We default to zero degrees, if
    // not provided.
    // This parameter is not yet used. It is silently ignorerd for now...
    optimized_orientation = 0;
    PsychCopyInDoubleArg(3, FALSE, &optimized_orientation);
    
    // Copy in special creation mode flag: It defaults to zero. If set to 1 then we
    // always create a power-of-two GL_TEXTURE_2D texture. This is useful if one wants
    // to create and use drifting gratings with no effort - texture wrapping is only available
    // for GL_TEXTURE_2D, not for non-pot types. It is also useful if the texture is to be
    // exported to external OpenGL code to simplify tex coords assignments.
    usepoweroftwo=0;
    PsychCopyInIntegerArg(4, FALSE, &usepoweroftwo);

    // Check if size constraints are fullfilled for power-of-two mode:
    if (usepoweroftwo & 1) {
		for(ix = 1; ix < (size_t) xSize; ix*=2);
		if (ix != (size_t) xSize) {
			PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Power-of-two texture requested but width of imageMatrix is not a power of two!");
		}
		
		for(ix = 1; ix < (size_t) ySize; ix*=2);
		if (ix != (size_t) ySize) {
			PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Power-of-two texture requested but height of imageMatrix is not a power of two!");
		}
    }

	// Check if creation of a floating point texture is requested? We default to non-floating point,
	// standard 8 bpc textures if this parameter is not provided.
	usefloatformat = 0;
    PsychCopyInIntegerArg(5, FALSE, &usefloatformat);
	if (usefloatformat<0 || usefloatformat>2) PsychErrorExitMsg(PsychError_user, "Invalid value for 'floatprecision' parameter provided! Valid values are 0 for 8bpc int, 1 for 16bpc float or 2 for 32bpc float.");
	if (usefloatformat && !isImageMatrixDoubles) {
		// Floating point texture requested. We only support this if our input is a double matrix, not
		// for uint8 matrices - converting them to float precision would be just a waste of ressources
		// without any benefit for precision.
		PsychErrorExitMsg(PsychError_user, "Creation of a floating point precision texture requested, but uint8 matrix provided! Only double matrices are acceptable for this mode.");
	}

    //Create a texture record.  Really just a window record adapted for textures.  
    PsychCreateWindowRecord(&textureRecord);						//this also fills the window index field.
    textureRecord->windowType=kPsychTexture;
    // MK: We need to assign the screen number of the onscreen-window, so PsychCreateTexture()
    // can query the size of the screen/onscreen-window...
    textureRecord->screenNumber=windowRecord->screenNumber;
    textureRecord->depth=32;
    PsychCopyRect(textureRecord->rect, rect);
    
	// Is texture storage in planar format explicitely requested by usercode? Do the gpu and its size
	// constraints on textures support planar storage for this image?
	// Can a proper planar -> interleaved remapping GLSL shader be generated and assigned for this texture?
	if ((usepoweroftwo == 4) && (numMatrixPlanes > 1) && (windowRecord->gfxcaps & kPsychGfxCapFBO) && !(PsychPrefStateGet_ConserveVRAM() & kPsychDontCacheTextures) &&
		(ySize * numMatrixPlanes <= windowRecord->maxTextureSize) && PsychAssignPlanarTextureShaders(textureRecord, windowRecord, numMatrixPlanes)) {
		// Yes: Use the planar texture storage fast-path.
		planar_storage = TRUE;
		if (PsychPrefStateGet_Verbosity() > 6) printf("PTB-DEBUG: Using planar storage for %i layer texture of size %i x %i texels.\n", numMatrixPlanes, xSize, ySize);
	}
	else {
		planar_storage = FALSE;
		if (PsychPrefStateGet_Verbosity() > 7) printf("PTB-DEBUG: Using standard storage for %i layer texture of size %i x %i texels.\n", numMatrixPlanes, xSize, ySize);
	}

    //Allocate the texture memory and copy the MATLAB matrix into the texture memory.
	if (usefloatformat || (planar_storage && !isImageMatrixBytes)) {
		// Allocate a double for each color component and pixel:
		textureRecord->textureMemorySizeBytes = sizeof(double) * (size_t) numMatrixPlanes * (size_t) xSize * (size_t) ySize;		
	}
    else {
		// Allocate one byte per color component and pixel:
		textureRecord->textureMemorySizeBytes = (size_t) numMatrixPlanes * (size_t) xSize * (size_t) ySize;
    }

	// We allocate our own intermediate conversion buffer unless this is
	// creation of a single-layer luminance8 integer texture from a single
	// layer uint8 input matrix and client storage is disabled. In that case, we can use a zero-copy path:
	if ((isImageMatrixBytes && (numMatrixPlanes == 1) && (!usefloatformat) && !(PsychPrefStateGet_ConserveVRAM() & kPsychDontCacheTextures)) ||
		(isImageMatrixBytes && planar_storage)) {
		// Zero copy path:
		texturePointer = NULL;
	}
	else {
		// Allocate memory:
		if(PsychPrefStateGet_DebugMakeTexture()) StoreNowTime();
		textureRecord->textureMemory = malloc(textureRecord->textureMemorySizeBytes);
		if(PsychPrefStateGet_DebugMakeTexture()) StoreNowTime();
		texturePointer = textureRecord->textureMemory;
	}
	
    // Does script explicitely request usage of a GL_TEXTURE_2D power-of-two texture?
    if (usepoweroftwo & 1) {
      // Enforce creation as a power-of-two texture:
      textureRecord->texturetarget=GL_TEXTURE_2D;
    }

	// Now the conversion routines that convert Matlab/Octave matrices into memory
	// buffers suitable for OpenGL:
	if (planar_storage) {
		// Planar texture storage, backed by a LUMINANCE texture container:

		// Zero-Copy possible? Only for uint8 input -> uint8 output:
		if (texturePointer == NULL) {
			texturePointer = (GLuint*) byteMatrix;
			textureRecord->textureMemory = texturePointer;
			// Set size to zero, so PsychCreateTexture() does not free() our
			// input buffer:
			textureRecord->textureMemorySizeBytes = 0;
			
			// This is always a LUMINANCE8 texture, backing our planar uint8 texture:
			textureRecord->depth = 8 * numMatrixPlanes;
			textureRecord->textureexternaltype   = GL_UNSIGNED_BYTE;
			textureRecord->textureexternalformat = GL_LUMINANCE;
			textureRecord->textureinternalformat = GL_LUMINANCE8;
		}
		else {
			// Some cast operation needed from double input format.
			// We always cast from double to float, potentially with
			// normalization and/or checking of value range.
			textureRecord->textureexternalformat = GL_LUMINANCE;

			if (usefloatformat) {
				// Floating point or other high precision format:
				textureRecord->depth = ((usefloatformat == 1) ? 16 : 32) * numMatrixPlanes;
				textureRecord->textureexternaltype = GL_FLOAT;
				textureRecord->textureinternalformat = (usefloatformat == 1) ? GL_LUMINANCE_FLOAT16_APPLE : GL_LUMINANCE_FLOAT32_APPLE;
				
				// Override for missing floating point texture support: Try to use 16 bit fixed point signed normalized textures [-1.0 ; 1.0] resolved at 15 bits:
				if ((usefloatformat == 1) && !(windowRecord->gfxcaps & kPsychGfxCapFPTex16)) textureRecord->textureinternalformat = GL_LUMINANCE16_SNORM;

				// Perform copy with double -> float cast:
				iters = (size_t) xSize * (size_t) ySize * (size_t) numMatrixPlanes;
				texturePointer_f = (GLfloat*) texturePointer;
				for(ix = 0; ix < iters; ix++) {
					*(texturePointer_f++) = (GLfloat) *(doubleMatrix++);
				}
				iters = (size_t) xSize * (size_t) ySize;
			}
			else {
				// 8 Bit format, but from double input matrix -> cast to uint8:
				textureRecord->depth = 8 * numMatrixPlanes;
				textureRecord->textureexternaltype = GL_UNSIGNED_BYTE;
				textureRecord->textureinternalformat = GL_LUMINANCE8;

				iters = (size_t) xSize * (size_t) ySize * (size_t) numMatrixPlanes;
				texturePointer_b = (GLubyte*) texturePointer;
				for(ix = 0; ix < iters; ix++) {
					*(texturePointer_b++) = (GLubyte) *(doubleMatrix++);
				}
				iters = (size_t) xSize * (size_t) ySize;
			}
		}
	}
	else if (usefloatformat) {
		// Conversion routines for HDR 16 bpc or 32 bpc textures -- Slow path.
		// Our input is always double matrices...
		iters = (size_t) xSize * (size_t) ySize;

		// Our input buffer is always of GL_FLOAT precision:
		textureRecord->textureexternaltype = GL_FLOAT;
		texturePointer_f=(GLfloat*) texturePointer;
		
		if(numMatrixPlanes==1) {
			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(doubleMatrix++);  
			}
			textureRecord->depth=(usefloatformat==1) ? 16 : 32;

			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_LUMINANCE_FLOAT16_APPLE : GL_LUMINANCE_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_LUMINANCE;

			// Override for missing floating point texture support: Try to use 16 bit fixed point signed normalized textures [-1.0 ; 1.0] resolved at 15 bits:
			if ((usefloatformat==1) && !(windowRecord->gfxcaps & kPsychGfxCapFPTex16)) textureRecord->textureinternalformat = GL_LUMINANCE16_SNORM;
		}

		if(numMatrixPlanes==2) {
			rp=(double*) ((size_t) doubleMatrix);
			ap=(double*) ((size_t) rp + (size_t) iters * sizeof(double));

			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(rp++);  
				*(texturePointer_f++)= (GLfloat) *(ap++);  
			}
			textureRecord->depth=(usefloatformat==1) ? 32 : 64;
			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_LUMINANCE_ALPHA_FLOAT16_APPLE : GL_LUMINANCE_ALPHA_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_LUMINANCE_ALPHA;

			// Override for missing floating point texture support: Try to use 16 bit fixed point signed normalized textures [-1.0 ; 1.0] resolved at 15 bits:
			if ((usefloatformat==1) && !(windowRecord->gfxcaps & kPsychGfxCapFPTex16)) textureRecord->textureinternalformat = GL_LUMINANCE16_ALPHA16_SNORM;
		}
		
		if(numMatrixPlanes==3) {
			rp=(double*) ((size_t) doubleMatrix);
			gp=(double*) ((size_t) rp + (size_t) iters * sizeof(double));
			bp=(double*) ((size_t) gp + (size_t) iters * sizeof(double));

			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(rp++);  
				*(texturePointer_f++)= (GLfloat) *(gp++);  
				*(texturePointer_f++)= (GLfloat) *(bp++);  
			}
			textureRecord->depth=(usefloatformat==1) ? 48 : 96;
			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_RGB_FLOAT16_APPLE : GL_RGB_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_RGB;

			// Override for missing floating point texture support: Try to use 16 bit fixed point signed normalized textures [-1.0 ; 1.0] resolved at 15 bits:
			if ((usefloatformat==1) && !(windowRecord->gfxcaps & kPsychGfxCapFPTex16)) textureRecord->textureinternalformat = GL_RGB16_SNORM;
		}
		
		if(numMatrixPlanes==4) {
			rp=(double*) ((size_t) doubleMatrix);
			gp=(double*) ((size_t) rp + (size_t) iters * sizeof(double));
			bp=(double*) ((size_t) gp + (size_t) iters * sizeof(double));
			ap=(double*) ((size_t) bp + (size_t) iters * sizeof(double));

			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(rp++);  
				*(texturePointer_f++)= (GLfloat) *(gp++);  
				*(texturePointer_f++)= (GLfloat) *(bp++);  
				*(texturePointer_f++)= (GLfloat) *(ap++);  
			}			
			textureRecord->depth=(usefloatformat==1) ? 64 : 128;
			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_RGBA_FLOAT16_APPLE : GL_RGBA_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_RGBA;

			// Override for missing floating point texture support: Try to use 16 bit fixed point signed normalized textures [-1.0 ; 1.0] resolved at 15 bits:
			if ((usefloatformat==1) && !(windowRecord->gfxcaps & kPsychGfxCapFPTex16)) textureRecord->textureinternalformat = GL_RGBA16_SNORM;
		}		
		// End of HDR conversion code...
	}
    else {
		// Standard LDR texture 8 bpc conversion routines -- Fast path.
		iters = (size_t) xSize * (size_t) ySize;

		// Improved implementation: Takes 13 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==1){
			texturePointer_b=(GLubyte*) texturePointer;
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= (GLubyte) *(doubleMatrix++);  
			}
			textureRecord->depth=8;
		}
		
		// Improved version: Takes 3 ms on a 800x800 texture...
		// NB: Implementing memcpy manually by a for-loop takes 10 ms! This is a huge difference.
		// -> That's because memcpy on MacOS-X is implemented with hand-coded, highly tuned Assembler code for PowerPC.
		// -> It's always wise to use system-routines if available, instead of coding it by yourself!
		if(isImageMatrixBytes && numMatrixPlanes==1) {
			if (texturePointer) {
				// Need to do a copy. Use optimized memcpy():
				memcpy((void*) texturePointer, (void*) byteMatrix, iters);
				
				//texturePointer_b=(GLubyte*) texturePointer;
				//for(ix=0;ix<iters;ix++){
				//	*(texturePointer_b++) = *(byteMatrix++);  
				//}
			}
			else {
				// Zero-Copy path. Just pass a pointer to our input matrix:
				texturePointer = (GLuint*) byteMatrix;
				textureRecord->textureMemory = texturePointer;
				// Set size to zero, so PsychCreateTexture() does not free() our
				// input buffer:
				textureRecord->textureMemorySizeBytes = 0;
			}

			textureRecord->depth=8;
		}
		
		// New version: Takes 33 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==2){
			texturePointer_b=(GLubyte*) texturePointer;
			rp=(double*) ((size_t) doubleMatrix);
			ap=(double*) ((size_t) rp + (size_t) iters * sizeof(double));
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= (GLubyte) *(rp++);  
				*(texturePointer_b++)= (GLubyte) *(ap++);  
			}
			textureRecord->depth=16;
		}
		
		// New version: Takes 20 ms on a 800x800 texture...
		if(isImageMatrixBytes && numMatrixPlanes==2){
			texturePointer_b=(GLubyte*) texturePointer;
			rpb=(GLubyte*) ((size_t) byteMatrix);
			apb=(GLubyte*) ((size_t) rpb + (size_t) iters);
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= *(rpb++);  
				*(texturePointer_b++)= *(apb++);  
			}
			textureRecord->depth=16;
		}
		
		// Improved version: Takes 43 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==3){
			texturePointer_b=(GLubyte*) texturePointer;
			rp=(double*) ((size_t) doubleMatrix);
			gp=(double*) ((size_t) rp + (size_t) iters * sizeof(double));
			bp=(double*) ((size_t) gp + (size_t) iters * sizeof(double));
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= (GLubyte) *(rp++);  
				*(texturePointer_b++)= (GLubyte) *(gp++);  
				*(texturePointer_b++)= (GLubyte) *(bp++);  
			}
			textureRecord->depth=24;
		}
		
		// Improved version: Takes 25 ms on a 800x800 texture...
		if(isImageMatrixBytes && numMatrixPlanes==3){
			texturePointer_b=(GLubyte*) texturePointer;			
			rpb=(GLubyte*) ((size_t) byteMatrix);
			gpb=(GLubyte*) ((size_t) rpb + (size_t) iters);
			bpb=(GLubyte*) ((size_t) gpb + (size_t) iters);
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= *(rpb++);  
				*(texturePointer_b++)= *(gpb++);  
				*(texturePointer_b++)= *(bpb++);  
			}
			textureRecord->depth=24;
		}
		
		// Improved version: Takes 55 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==4){
			texturePointer_b=(GLubyte*) texturePointer;			
			rp=(double*) ((size_t) doubleMatrix);
			gp=(double*) ((size_t) rp + (size_t) iters * sizeof(double));
			bp=(double*) ((size_t) gp + (size_t) iters * sizeof(double));
			ap=(double*) ((size_t) bp + (size_t) iters * sizeof(double));
			if (bigendian) {
				// Code for big-endian machines like PowerPC:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= (GLubyte) *(ap++);  
					*(texturePointer_b++)= (GLubyte) *(rp++);  
					*(texturePointer_b++)= (GLubyte) *(gp++);  
					*(texturePointer_b++)= (GLubyte) *(bp++);  
				}
			}
			else {
				// Code for little-endian machines like Intel Pentium:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= (GLubyte) *(bp++);  
					*(texturePointer_b++)= (GLubyte) *(gp++);  
					*(texturePointer_b++)= (GLubyte) *(rp++);  
					*(texturePointer_b++)= (GLubyte) *(ap++);  
				}
			}
			
			textureRecord->depth=32;
		}
		
		// Improved version: Takes 33 ms on a 800x800 texture...
		if(isImageMatrixBytes && numMatrixPlanes==4){
			texturePointer_b=(GLubyte*) texturePointer;			
			rpb=(GLubyte*) ((size_t) byteMatrix);
			gpb=(GLubyte*) ((size_t) rpb + (size_t) iters);
			bpb=(GLubyte*) ((size_t) gpb + (size_t) iters);
			apb=(GLubyte*) ((size_t) bpb + (size_t) iters);
			if (bigendian) {
				// Code for big-endian machines like PowerPC:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= *(apb++);  
					*(texturePointer_b++)= *(rpb++);  
					*(texturePointer_b++)= *(gpb++);  
					*(texturePointer_b++)= *(bpb++);  
				}
			}
			else {
				// Code for little-endian machines like Intel Pentium:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= *(bpb++);  
					*(texturePointer_b++)= *(gpb++);  
					*(texturePointer_b++)= *(rpb++);  
					*(texturePointer_b++)= *(apb++);  
				}
			}
			
			textureRecord->depth=32;
		}
	} // End of 8 bpc texture conversion code (fast-path for LDR textures)
    
	// Override for missing floating point texture support?
	if ((usefloatformat==1) && !(windowRecord->gfxcaps & kPsychGfxCapFPTex16)) {
		// Override enabled. Instead of a 16bpc float texture with 11 bit linear precision in the
		// range [-1.0 ; 1.0], we use a 16 bit signed normalized texture with a normalized value
		// range of [-1.0; 1.0], encoded with 1 bit sign and 15 bit magnitude. These textures have
		// an effective linear precision of 15 bits - better than 16 bpc float - but they are restricted
		// to a value range of [-1.0 ; 1.0], as opposed to 16 bpc float textures. Tell user about this
		// replacement at high verbosity levels:
		if (PsychPrefStateGet_Verbosity() > 4)
			printf("PTB-INFO:MakeTexture: Code requested 16 bpc float texture, but this is unsupported. Trying to use 16 bit snorm texture instead.\n");
		
		// Signed normalized textures supported? Otherwise we bail...
		if (!(windowRecord->gfxcaps & kPsychGfxCapSNTex16)) {
			printf("PTB-ERROR:MakeTexture: Code requested 16 bpc floating point texture, but this is unsupported by this graphics card.\n");
			printf("PTB-ERROR:MakeTexture: Tried to use 16 bit snorm texture instead, but failed as this is unsupported as well.\n");
			PsychErrorExitMsg(PsychError_user, "Creation of 15 bit linear precision signed normalized texture failed. Not supported by your graphics hardware!");
		}
		
		// Check value range of pixels. This will not work for out of [-1; 1] range values.
		texturePointer_f=(GLfloat*) texturePointer;
		iters = iters * (size_t) numMatrixPlanes;
		for (ix=0; ix<iters; ix++, texturePointer_f++) {
			if(fabs((double) *texturePointer_f) > 1.0) {
				// Game over!
				printf("PTB-ERROR:MakeTexture: Code requested 16 bpc floating point texture, but this is unsupported by this graphics card.\n");
				printf("PTB-ERROR:MakeTexture: Tried to use 16 bit snorm texture instead, but failed because some pixels are outside the\n");
				printf("PTB-ERROR:MakeTexture: representable range -1.0 to 1.0 for this texture type. Change your code or update your graphics hardware.\n");
				PsychErrorExitMsg(PsychError_user, "Creation of 15 bit linear precision signed normalized texture failed due to out of [-1 ; +1] range pixel values!");
			}
		}
	}

	// This is a special workaround for bugs in FLOAT16 texture creation on Mac OS/X 10.4.x and 10.5.x.
	// The OpenGL fails to properly flush very small values (< 1e-9) to zero when creating a FLOAT16
	// type texture. Instead it seems to initialize with trash data, corrupting the texture.
	// Therefore, if FLOAT16 texture creation is requested, we loop over the whole input buffer and
	// set all values with magnitude smaller than 1e-9 to zero. Better safe than sorry...
	if ((usefloatformat==1) && (windowRecord->gfxcaps & kPsychGfxCapFPTex16)) {
		texturePointer_f=(GLfloat*) texturePointer;
		iters = iters * (size_t) numMatrixPlanes;
		for(ix=0; ix<iters; ix++, texturePointer_f++) if(fabs((double) *texturePointer_f) < 1e-9) { *texturePointer_f = 0.0; }
	}

    // The memory buffer now contains our texture data in a format ready to submit to OpenGL.
    
	// Assign parent window and copy its inheritable properties:
	PsychAssignParentWindow(textureRecord, windowRecord);

    // Texture orientation is zero aka transposed aka non-renderswapped.
    textureRecord->textureOrientation = ((assume_texorientation != 2) && (assume_texorientation != 3)) ? 0 : 2;
    
	// This is our best guess about the number of image channels:
	textureRecord->nrchannels = numMatrixPlanes;

	if (planar_storage) {
		// Setup special rect to fake PsychCreateTexture() into creating a luminance
		// texture numMatrixPlanes times the height (in rows) of the texture, to store the
		// numMatrixPlanes layers concatenated to each other.
		if (textureRecord->textureOrientation == 0) {
			// Normal case: Transposed storage.
			PsychMakeRect(&(textureRecord->rect[0]), 0, 0, xSize * numMatrixPlanes, ySize);
		}
		else {
			// Special case: Non-transposed or isotropic storage:
			PsychMakeRect(&(textureRecord->rect[0]), 0, 0, xSize, ySize * numMatrixPlanes);
		}
		
		// Create planar texture:
		PsychCreateTexture(textureRecord);

		// Restore rect and clientrect of texture to effective size:
		PsychMakeRect(&(textureRecord->rect[0]), 0, 0, xSize, ySize);
		PsychCopyRect(textureRecord->clientrect, textureRecord->rect);
		
		textureRecord->specialflags = kPsychPlanarTexture;
	}
	else {
		// Let's create and bind a new texture object and fill it with our new texture data.
		PsychCreateTexture(textureRecord);
		
		// Assign GLSL filter-/lookup-shaders if needed:
		PsychAssignHighPrecisionTextureShaders(textureRecord, windowRecord, usefloatformat, (usepoweroftwo & 2) ? 1 : 0);
	}
	
	// User specified override shader for this texture provided? This is useful for
	// basic image processing and procedural texture shading:
	if (textureShader!=0) {
		// Assign provided shader as filtershader to this texture: We negate it so
		// that the texture blitter routines know this is a custom shader, not our
		// built in filter shader:
		textureRecord->textureFilterShader = -1 * textureShader;
	}
	
    // Texture ready. Mark it valid and return handle to userspace:
    PsychSetWindowRecordValid(textureRecord);
    PsychCopyOutDoubleArg(1, FALSE, textureRecord->windowIndex);
	
	// Swapping the texture to upright orientation requested?
	if (assume_texorientation == 1) {
		// Transform sourceRecord source texture into a normalized, upright texture if it isn't already in
		// that format. We require this standard orientation for simplified shader design.
		PsychSetShader(windowRecord, 0);
		PsychNormalizeTextureOrientation(textureRecord);
	}
	
	// Shall the texture be finally declared "normally oriented"?
	// This is either due to explicit renderswapping if assume_textureorientation == 1,
	// or because it was already pretransposed in Matlab/Octave if assume_textureorientation == 2,
	// or because user space tells us the texture is isotropic if assume_textureorientation == 3.
	if (assume_texorientation > 0) {
		// Yes. Label it as such:
		textureRecord->textureOrientation = 2;
	}
    
    if(PsychPrefStateGet_DebugMakeTexture()) 	//MARK #4
        StoreNowTime();
    
    return(PsychError_none);
}
PsychError SCREENMakeTexture(void) 
{
    int					ix;
    PsychWindowRecordType		*textureRecord;
    PsychWindowRecordType		*windowRecord;
    PsychRectType			rect;
    Boolean				isImageMatrixBytes, isImageMatrixDoubles;
    int					numMatrixPlanes, xSize, ySize, iters; 
    unsigned char			*byteMatrix;
    double				*doubleMatrix;
    GLuint                              *texturePointer;
    GLubyte                             *texturePointer_b;
	GLfloat								*texturePointer_f;
    double *rp, *gp, *bp, *ap;    
    GLubyte *rpb, *gpb, *bpb, *apb;    
    int                                 usepoweroftwo, usefloatformat, assume_texorientation, textureShader;
    double                              optimized_orientation;
    Boolean                             bigendian;

    // Detect endianity (byte-order) of machine:
    ix=255;
    rpb=(GLubyte*) &ix;
    bigendian = ( *rpb == 255 ) ? FALSE : TRUE;
    ix = 0; rpb = NULL;

    if(PsychPrefStateGet_DebugMakeTexture())	//MARK #1
        StoreNowTime();
    
    //all subfunctions should have these two lines.  
    PsychPushHelp(useString, synopsisString, seeAlsoString);
    if(PsychIsGiveHelp()){PsychGiveHelp();return(PsychError_none);};
    
    //Get the window structure for the onscreen window.  It holds the onscreein GL context which we will need in the
    //final step when we copy the texture from system RAM onto the screen.
    PsychErrorExit(PsychCapNumInputArgs(7));   	
    PsychErrorExit(PsychRequireNumInputArgs(2)); 	
    PsychErrorExit(PsychCapNumOutputArgs(1)); 
    
    //get the window record from the window record argument and get info from the window record
    PsychAllocInWindowRecordArg(kPsychUseDefaultArgPosition, TRUE, &windowRecord);
    
    if((windowRecord->windowType!=kPsychDoubleBufferOnscreen) && (windowRecord->windowType!=kPsychSingleBufferOnscreen))
        PsychErrorExitMsg(PsychError_user, "MakeTexture called on something else than a onscreen window");
    
	// Get optional texture orientation flag:
	assume_texorientation = 0;
	PsychCopyInIntegerArg(6, FALSE, &assume_texorientation);
	
	// Get optional texture shader handle:
	textureShader = 0;
	PsychCopyInIntegerArg(7, FALSE, &textureShader);
	
    //get the argument and sanity check it.
    isImageMatrixBytes=PsychAllocInUnsignedByteMatArg(2, kPsychArgAnything, &ySize, &xSize, &numMatrixPlanes, &byteMatrix);
    isImageMatrixDoubles=PsychAllocInDoubleMatArg(2, kPsychArgAnything, &ySize, &xSize, &numMatrixPlanes, &doubleMatrix);
    if(numMatrixPlanes > 4)
        PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Specified image matrix exceeds maximum depth of 4 layers");
    if(ySize<1 || xSize <1)
        PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Specified image matrix must be at least 1 x 1 pixels in size");
    if(! (isImageMatrixBytes || isImageMatrixDoubles))
        PsychErrorExitMsg(PsychError_user, "Illegal argument type");  //not  likely. 

	// Is this a special image matrix which is already pre-transposed to fit our optimal format?
	if (assume_texorientation == 2) {
		// Yes. Swap xSize and ySize to take this into account:
		ix = xSize;
		xSize = ySize;
		ySize = ix;
		ix = 0;
	}

	// Build defining rect for this texture:
    PsychMakeRect(rect, 0, 0, xSize, ySize);

    // Copy in texture preferred draw orientation hint. We default to zero degrees, if
    // not provided.
    // This parameter is not yet used. It is silently ignorerd for now...
    optimized_orientation = 0;
    PsychCopyInDoubleArg(3, FALSE, &optimized_orientation);
    
    // Copy in special creation mode flag: It defaults to zero. If set to 1 then we
    // always create a power-of-two GL_TEXTURE_2D texture. This is useful if one wants
    // to create and use drifting gratings with no effort - texture wrapping is only available
    // for GL_TEXTURE_2D, not for non-pot types. It is also useful if the texture is to be
    // exported to external OpenGL code to simplify tex coords assignments.
    usepoweroftwo=0;
    PsychCopyInIntegerArg(4, FALSE, &usepoweroftwo);

    // Check if size constraints are fullfilled for power-of-two mode:
    if (usepoweroftwo & 1) {
      for(ix = 1; ix < xSize; ix*=2);
      if (ix!=xSize) {
	PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Power-of-two texture requested but width of imageMatrix is not a power of two!");
      }

      for(ix = 1; ix < ySize; ix*=2);
      if (ix!=ySize) {
	PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Power-of-two texture requested but height of imageMatrix is not a power of two!");
      }
    }

	// Check if creation of a floating point texture is requested? We default to non-floating point,
	// standard 8 bpc textures if this parameter is not provided.
	usefloatformat = 0;
    PsychCopyInIntegerArg(5, FALSE, &usefloatformat);
	if (usefloatformat<0 || usefloatformat>2) PsychErrorExitMsg(PsychError_user, "Invalid value for 'floatprecision' parameter provided! Valid values are 0 for 8bpc int, 1 for 16bpc float or 2 for 32bpc float.");
	if (usefloatformat && !isImageMatrixDoubles) {
		// Floating point texture requested. We only support this if our input is a double matrix, not
		// for uint8 matrices - converting them to float precision would be just a waste of ressources
		// without any benefit for precision.
		PsychErrorExitMsg(PsychError_user, "Creation of a floating point precision texture requested, but uint8 matrix provided! Only double matrices are acceptable for this mode.");
	}

    //Create a texture record.  Really just a window record adapted for textures.  
    PsychCreateWindowRecord(&textureRecord);						//this also fills the window index field.
    textureRecord->windowType=kPsychTexture;
    // MK: We need to assign the screen number of the onscreen-window, so PsychCreateTexture()
    // can query the size of the screen/onscreen-window...
    textureRecord->screenNumber=windowRecord->screenNumber;
    textureRecord->depth=32;
    PsychCopyRect(textureRecord->rect, rect);
    
    //Allocate the texture memory and copy the MATLAB matrix into the texture memory.
    // MK: We only allocate the amount really needed for given format, aka numMatrixPlanes - Bytes per pixel.
	if (usefloatformat) {
		// Allocate a double for each color component and pixel:
		textureRecord->textureMemorySizeBytes= sizeof(double) * numMatrixPlanes * xSize * ySize;		
	}
    else {
		// Allocate one byte per color component and pixel:
		textureRecord->textureMemorySizeBytes= numMatrixPlanes * xSize * ySize;
    }
	// MK: Allocate memory page-aligned... -> Helps Apple texture range extensions et al.
    if(PsychPrefStateGet_DebugMakeTexture()) 	//MARK #2
        StoreNowTime();
    textureRecord->textureMemory=malloc(textureRecord->textureMemorySizeBytes);
    if(PsychPrefStateGet_DebugMakeTexture()) 	//MARK #3
        StoreNowTime();	
    texturePointer=textureRecord->textureMemory;
    
    // Does script explicitely request usage of a GL_TEXTURE_2D power-of-two texture?
    if (usepoweroftwo & 1) {
      // Enforce creation as a power-of-two texture:
      textureRecord->texturetarget=GL_TEXTURE_2D;
    }

	// Now the conversion routines that convert Matlab/Octave matrices into memory
	// buffers suitable for OpenGL:
	if (usefloatformat) {
		// Conversion routines for HDR 16 bpc or 32 bpc textures -- Slow path.
		// Our input is always double matrices...
		iters = xSize * ySize;

		// Our input buffer is always of GL_FLOAT precision:
		textureRecord->textureexternaltype = GL_FLOAT;
		texturePointer_f=(GLfloat*) texturePointer;
		
		if(numMatrixPlanes==1) {
			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(doubleMatrix++);  
			}
			textureRecord->depth=(usefloatformat==1) ? 16 : 32;

			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_LUMINANCE_FLOAT16_APPLE : GL_LUMINANCE_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_LUMINANCE;
		}

		if(numMatrixPlanes==2) {
			rp=(double*) ((psych_uint64) doubleMatrix);
			ap=(double*) ((psych_uint64) doubleMatrix + (psych_uint64) iters*sizeof(double));

			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(rp++);  
				*(texturePointer_f++)= (GLfloat) *(ap++);  
			}
			textureRecord->depth=(usefloatformat==1) ? 32 : 64;
			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_LUMINANCE_ALPHA_FLOAT16_APPLE : GL_LUMINANCE_ALPHA_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_LUMINANCE_ALPHA;
		}
		
		if(numMatrixPlanes==3) {
			rp=(double*) ((psych_uint64) doubleMatrix);
			gp=(double*) ((psych_uint64) doubleMatrix + (psych_uint64) iters*sizeof(double));
			bp=(double*) ((psych_uint64) gp + (psych_uint64) iters*sizeof(double));

			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(rp++);  
				*(texturePointer_f++)= (GLfloat) *(gp++);  
				*(texturePointer_f++)= (GLfloat) *(bp++);  
			}
			textureRecord->depth=(usefloatformat==1) ? 48 : 96;
			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_RGB_FLOAT16_APPLE : GL_RGB_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_RGB;
		}
		
		if(numMatrixPlanes==4) {
			rp=(double*) ((psych_uint64) doubleMatrix);
			gp=(double*) ((psych_uint64) doubleMatrix + (psych_uint64) iters*sizeof(double));
			bp=(double*) ((psych_uint64) gp + (psych_uint64) iters*sizeof(double));
			ap=(double*) ((psych_uint64) bp + (psych_uint64) iters*sizeof(double));

			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(rp++);  
				*(texturePointer_f++)= (GLfloat) *(gp++);  
				*(texturePointer_f++)= (GLfloat) *(bp++);  
				*(texturePointer_f++)= (GLfloat) *(ap++);  
			}			
			textureRecord->depth=(usefloatformat==1) ? 64 : 128;
			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_RGBA_FLOAT16_APPLE : GL_RGBA_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_RGBA;
		}

		// This is a special workaround for bugs in FLOAT16 texture creation on Mac OS/X 10.4.x and 10.5.x.
		// The OpenGL fails to properly flush very small values (< 1e-9) to zero when creating a FLOAT16
		// type texture. Instead it seems to initialize with trash data, corrupting the texture.
		// Therefore, if FLOAT16 texture creation is requested, we loop over the whole input buffer and
		// set all values with magnitude smaller than 1e-9 to zero. Better safe than sorry...
		if(usefloatformat==1) {
			texturePointer_f=(GLfloat*) texturePointer;
			iters = iters * numMatrixPlanes;
			for(ix=0; ix<iters; ix++, texturePointer_f++) if(fabs((double) *texturePointer_f) < 1e-9) { *texturePointer_f = 0.0; }
		}
		
		// End of HDR conversion code...
	}
    else {
		// Standard LDR texture 8 bpc conversion routines -- Fast path.
	
		// Improved implementation: Takes 13 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==1){
			texturePointer_b=(GLubyte*) texturePointer;
			iters=xSize*ySize;
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= (GLubyte) *(doubleMatrix++);  
			}
			textureRecord->depth=8;
		}
		
		// Improved version: Takes 3 ms on a 800x800 texture...
		// NB: Implementing memcpy manually by a for-loop takes 10 ms! This is a huge difference.
		// -> That's because memcpy on MacOS-X is implemented with hand-coded, highly tuned Assembler code for PowerPC.
		// -> It's always wise to use system-routines if available, instead of coding it by yourself!
		if(isImageMatrixBytes && numMatrixPlanes==1){
			memcpy((void*) texturePointer, (void*) byteMatrix, xSize*ySize);
			textureRecord->depth=8;
		}
		
		// New version: Takes 33 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==2){
			texturePointer_b=(GLubyte*) texturePointer;
			iters=xSize*ySize;
			rp=(double*) ((psych_uint64) doubleMatrix);
			ap=(double*) ((psych_uint64) doubleMatrix + (psych_uint64) iters*sizeof(double));
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= (GLubyte) *(rp++);  
				*(texturePointer_b++)= (GLubyte) *(ap++);  
			}
			textureRecord->depth=16;
		}
		
		// New version: Takes 20 ms on a 800x800 texture...
		if(isImageMatrixBytes && numMatrixPlanes==2){
			texturePointer_b=(GLubyte*) texturePointer;
			iters=xSize*ySize;
			rpb=(GLubyte*) ((psych_uint64) byteMatrix);
			apb=(GLubyte*) ((psych_uint64) byteMatrix + (psych_uint64) iters);
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= *(rpb++);  
				*(texturePointer_b++)= *(apb++);  
			}
			textureRecord->depth=16;
		}
		
		// Improved version: Takes 43 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==3){
			texturePointer_b=(GLubyte*) texturePointer;
			iters=xSize*ySize;
			rp=(double*) ((psych_uint64) doubleMatrix);
			gp=(double*) ((psych_uint64) doubleMatrix + (psych_uint64) iters*sizeof(double));
			bp=(double*) ((psych_uint64) gp + (psych_uint64) iters*sizeof(double));
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= (GLubyte) *(rp++);  
				*(texturePointer_b++)= (GLubyte) *(gp++);  
				*(texturePointer_b++)= (GLubyte) *(bp++);  
			}
			textureRecord->depth=24;
		}
		
		// Improved version: Takes 25 ms on a 800x800 texture...
		if(isImageMatrixBytes && numMatrixPlanes==3){
			texturePointer_b=(GLubyte*) texturePointer;
			iters=xSize*ySize;
			
			rpb=(GLubyte*) ((psych_uint64) byteMatrix);
			gpb=(GLubyte*) ((psych_uint64) byteMatrix + (psych_uint64) iters);
			bpb=(GLubyte*) ((psych_uint64) gpb + (psych_uint64) iters);
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= *(rpb++);  
				*(texturePointer_b++)= *(gpb++);  
				*(texturePointer_b++)= *(bpb++);  
			}
			textureRecord->depth=24;
		}
		
		// Improved version: Takes 55 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==4){
			texturePointer_b=(GLubyte*) texturePointer;
			iters=xSize*ySize;
			
			rp=(double*) ((psych_uint64) doubleMatrix);
			gp=(double*) ((psych_uint64) doubleMatrix + (psych_uint64) iters*sizeof(double));
			bp=(double*) ((psych_uint64) gp + (psych_uint64) iters*sizeof(double));
			ap=(double*) ((psych_uint64) bp + (psych_uint64) iters*sizeof(double));
			if (bigendian) {
				// Code for big-endian machines like PowerPC:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= (GLubyte) *(ap++);  
					*(texturePointer_b++)= (GLubyte) *(rp++);  
					*(texturePointer_b++)= (GLubyte) *(gp++);  
					*(texturePointer_b++)= (GLubyte) *(bp++);  
				}
			}
			else {
				// Code for little-endian machines like Intel Pentium:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= (GLubyte) *(bp++);  
					*(texturePointer_b++)= (GLubyte) *(gp++);  
					*(texturePointer_b++)= (GLubyte) *(rp++);  
					*(texturePointer_b++)= (GLubyte) *(ap++);  
				}
			}
			
			textureRecord->depth=32;
		}
		
		// Improved version: Takes 33 ms on a 800x800 texture...
		if(isImageMatrixBytes && numMatrixPlanes==4){
			texturePointer_b=(GLubyte*) texturePointer;
			iters=xSize*ySize;
			
			rpb=(GLubyte*) ((psych_uint64) byteMatrix);
			gpb=(GLubyte*) ((psych_uint64) byteMatrix + (psych_uint64) iters);
			bpb=(GLubyte*) ((psych_uint64) gpb + (psych_uint64) iters);
			apb=(GLubyte*) ((psych_uint64) bpb + (psych_uint64) iters);
			if (bigendian) {
				// Code for big-endian machines like PowerPC:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= *(apb++);  
					*(texturePointer_b++)= *(rpb++);  
					*(texturePointer_b++)= *(gpb++);  
					*(texturePointer_b++)= *(bpb++);  
				}
			}
			else {
				// Code for little-endian machines like Intel Pentium:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= *(bpb++);  
					*(texturePointer_b++)= *(gpb++);  
					*(texturePointer_b++)= *(rpb++);  
					*(texturePointer_b++)= *(apb++);  
				}
			}
			
			textureRecord->depth=32;
		}
	} // End of 8 bpc texture conversion code (fast-path for LDR textures)
    
    // The memory buffer now contains our texture data in a format ready to submit to OpenGL.
    
	// Assign parent window and copy its inheritable properties:
	PsychAssignParentWindow(textureRecord, windowRecord);

    // Texture orientation is zero aka transposed aka non-renderswapped.
    textureRecord->textureOrientation = ((assume_texorientation != 2) && (assume_texorientation != 3)) ? 0 : 2;
    
	// This is our best guess about the number of image channels:
	textureRecord->nrchannels = numMatrixPlanes;

    // Let's create and bind a new texture object and fill it with our new texture data.
    PsychCreateTexture(textureRecord);
    
	// Assign GLSL filter-/lookup-shaders if needed:
	PsychAssignHighPrecisionTextureShaders(textureRecord, windowRecord, usefloatformat, (usepoweroftwo & 2) ? 1 : 0);

	// User specified override shader for this texture provided? This is useful for
	// basic image processing and procedural texture shading:
	if (textureShader!=0) {
		// Assign provided shader as filtershader to this texture: We negate it so
		// that the texture blitter routines know this is a custom shader, not our
		// built in filter shader:
		textureRecord->textureFilterShader = -1 * textureShader;
	}
	
    // Texture ready. Mark it valid and return handle to userspace:
    PsychSetWindowRecordValid(textureRecord);
    PsychCopyOutDoubleArg(1, FALSE, textureRecord->windowIndex);
	
	// Swapping the texture to upright orientation requested?
	if (assume_texorientation == 1) {
		// Transform sourceRecord source texture into a normalized, upright texture if it isn't already in
		// that format. We require this standard orientation for simplified shader design.

		PsychSetShader(windowRecord, 0);
		PsychNormalizeTextureOrientation(textureRecord);
	}
	
	// Shall the texture be finally declared "normally oriented"?
	// This is either due to explicit renderswapping if assume_textureorientation == 1,
	// or because it was already pretransposed in Matlab/Octave if assume_textureorientation == 2,
	// or because user space tells us the texture is isotropic if assume_textureorientation == 3.
	if (assume_texorientation > 0) {
		// Yes. Label it as such:
		textureRecord->textureOrientation = 2;
	}
    
    if(PsychPrefStateGet_DebugMakeTexture()) 	//MARK #4
        StoreNowTime();
    
    return(PsychError_none);
}
PsychError SCREENMakeTexture(void) 
{
    int									ix;
    PsychWindowRecordType				*textureRecord;
    PsychWindowRecordType			*windowRecord;
    PsychRectType						rect;
    Boolean								isImageMatrixBytes, isImageMatrixDoubles;
    int									numMatrixPlanes, xSize, ySize; 
    unsigned char						*byteMatrix;
    double								*doubleMatrix;
    GLuint                              *texturePointer;
    GLubyte                             *texturePointer_b;
    
    
    if(PsychPrefStateGet_DebugMakeTexture())	//MARK #1
        StoreNowTime();
    
    //all subfunctions should have these two lines.  
    PsychPushHelp(useString, synopsisString, seeAlsoString);
    if(PsychIsGiveHelp()){PsychGiveHelp();return(PsychError_none);};
    
    //Get the window structure for the onscreen window.  It holds the onscreein GL context which we will need in the
    //final step when we copy the texture from system RAM onto the screen.
    PsychErrorExit(PsychCapNumInputArgs(2));   	
    PsychErrorExit(PsychRequireNumInputArgs(2)); 	
    PsychErrorExit(PsychCapNumOutputArgs(1)); 
    
    //get the window record from the window record argument and get info from the window record
    PsychAllocInWindowRecordArg(kPsychUseDefaultArgPosition, TRUE, &windowRecord);
    
    if((windowRecord->windowType!=kPsychDoubleBufferOnscreen) && (windowRecord->windowType!=kPsychSingleBufferOnscreen))
        PsychErrorExitMsg(PsychError_user, "MakeTexture called on something else than a onscreen window");
    
    //get the argument and sanity check it.
    isImageMatrixBytes=PsychAllocInUnsignedByteMatArg(2, kPsychArgAnything, &ySize, &xSize, &numMatrixPlanes, &byteMatrix);
    isImageMatrixDoubles=PsychAllocInDoubleMatArg(2, kPsychArgAnything, &ySize, &xSize, &numMatrixPlanes, &doubleMatrix);
    if(numMatrixPlanes > 4)
        PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Specified image matrix exceeds maximum depth of 4 layers");
    if(ySize<1 || xSize <1)
        PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Specified image matrix must be at least 1 x 1 pixels in size");
    if(! (isImageMatrixBytes || isImageMatrixDoubles))
        PsychErrorExitMsg(PsychError_user, "Illegal argument type");  //not  likely. 
    PsychMakeRect(rect, 0, 0, xSize, ySize);
    
    
    //Create a texture record.  Really just a window recored adapted for textures.  
    PsychCreateWindowRecord(&textureRecord);						//this also fills the window index field.
    textureRecord->windowType=kPsychTexture;
    // MK: We need to assign the screen number of the onscreen-window, so PsychCreateTexture()
    // can query the size of the screen/onscreen-window...
    textureRecord->screenNumber=windowRecord->screenNumber;
    textureRecord->depth=32;
    PsychCopyRect(textureRecord->rect, rect);
    
    //Allocate the texture memory and copy the MATLAB matrix into the texture memory.
    // MK: We only allocate the amount really needed for given format, aka numMatrixPlanes - Bytes per pixel.
    textureRecord->textureMemorySizeBytes= numMatrixPlanes * xSize * ySize;
    // MK: Allocate memory page-aligned... -> Helps Apple texture range extensions et al.
    if(PsychPrefStateGet_DebugMakeTexture()) 	//MARK #2
        StoreNowTime();
    textureRecord->textureMemory=valloc(textureRecord->textureMemorySizeBytes);
    if(PsychPrefStateGet_DebugMakeTexture()) 	//MARK #3
        StoreNowTime();	
    texturePointer=textureRecord->textureMemory;
    
    // Original implementation: Takes 80 ms on a 800x800 texture...
    /*        if(isImageMatrixDoubles && numMatrixPlanes==1){
        for(ix=0;ix<xSize;ix++){
            for(iy=0;iy<ySize;iy++){
                textureIndex=xSize*iy+ix;
                matrixIndex=iy + ySize * ix;
                textureRecord->textureMemory[textureIndex]= ((((((GLuint)255 << 8) | 
                                                                (GLuint)(doubleMatrix[matrixIndex])) << 8 ) | 
                                                              (GLuint)(doubleMatrix[matrixIndex]) ) << 8) | 
                    (GLuint)(doubleMatrix[matrixIndex]);
            }
        }
    }
*/	
    
    // Improved implementation: Takes 13 ms on a 800x800 texture...
    if(isImageMatrixDoubles && numMatrixPlanes==1){
        texturePointer_b=(GLubyte*) texturePointer;
        int iters=xSize*ySize;
        for(ix=0;ix<iters;ix++){
            *(texturePointer_b++)= (GLubyte) *(doubleMatrix++);  
        }
        textureRecord->depth=8;
    }
    
    
    // Original implementation: Takes 30 ms on a 800x800 texture...
    /*        if(isImageMatrixBytes && numMatrixPlanes==1){
        for(ix=0;ix<xSize;ix++){
            for(iy=0;iy<ySize;iy++){
                textureIndex=xSize*iy+ix;
                matrixIndex=iy + ySize * ix;
                textureRecord->textureMemory[textureIndex]= ((((((GLuint)255 << 8) | 
                                                                (GLuint)(byteMatrix[matrixIndex])) << 8 ) | 
                                                              (GLuint)(byteMatrix[matrixIndex]) ) << 8) | 
                    (GLuint)(byteMatrix[matrixIndex]);
            }
        }
    
    }
*/	
    // Improved version: Takes 3 ms on a 800x800 texture...
    // NB: Implementing memcpy manually by a for-loop takes 10 ms! This is a huge difference.
    // -> That's because memcpy on MacOS-X is implemented with hand-coded, highly tuned Assembler code for PowerPC.
    // -> It's always wise to use system-routines if available, instead of coding it by yourself!
    if(isImageMatrixBytes && numMatrixPlanes==1){
        memcpy((void*) texturePointer, (void*) byteMatrix, xSize*ySize);
        textureRecord->depth=8;
    }
    
    // New version: Takes 33 ms on a 800x800 texture...
    if(isImageMatrixDoubles && numMatrixPlanes==2){
        texturePointer_b=(GLubyte*) texturePointer;
        int iters=xSize*ySize;
        double *rp, *ap;
        rp=(double*) ((unsigned long long) doubleMatrix);
        ap=(double*) ((unsigned long long) doubleMatrix + (unsigned long long) iters*sizeof(double));
        for(ix=0;ix<iters;ix++){
            *(texturePointer_b++)= (GLubyte) *(rp++);  
            *(texturePointer_b++)= (GLubyte) *(ap++);  
        }
        textureRecord->depth=16;
    }
    
    // New version: Takes 20 ms on a 800x800 texture...
    if(isImageMatrixBytes && numMatrixPlanes==2){
        texturePointer_b=(GLubyte*) texturePointer;
        int iters=xSize*ySize;
        GLubyte *rp, *ap;
        rp=(GLubyte*) ((unsigned long long) byteMatrix);
        ap=(GLubyte*) ((unsigned long long) byteMatrix + (unsigned long long) iters);
        for(ix=0;ix<iters;ix++){
            *(texturePointer_b++)= *(rp++);  
            *(texturePointer_b++)= *(ap++);  
        }
        textureRecord->depth=16;
    }
    
    // Original version: Takes 160 ms on a 800x800 texture...
    /*	if(isImageMatrixDoubles && numMatrixPlanes==3){
        for(ix=0;ix<xSize;ix++){
            for(iy=0;iy<ySize;iy++){
                textureIndex=xSize*iy+ix;
                redIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 0);
                greenIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 1);
                blueIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 2);
                red=(GLuint)doubleMatrix[redIndex];
                green=(GLuint)doubleMatrix[greenIndex];
                blue=(GLuint)doubleMatrix[blueIndex];
                alpha=(GLuint)255;
                textureRecord->textureMemory[textureIndex]= (((((alpha << 8) | red) << 8 ) | green ) << 8) | blue;
            }
        }
    textureRecord->depth=24;
    }
*/	
    // Improved version: Takes 43 ms on a 800x800 texture...
    if(isImageMatrixDoubles && numMatrixPlanes==3){
        texturePointer_b=(GLubyte*) texturePointer;
        int iters=xSize*ySize;
        double *rp, *gp, *bp;
        rp=(double*) ((unsigned long long) doubleMatrix);
        gp=(double*) ((unsigned long long) doubleMatrix + (unsigned long long) iters*sizeof(double));
        bp=(double*) ((unsigned long long) gp + (unsigned long long) iters*sizeof(double));
        for(ix=0;ix<iters;ix++){
            *(texturePointer_b++)= (GLubyte) *(rp++);  
            *(texturePointer_b++)= (GLubyte) *(gp++);  
            *(texturePointer_b++)= (GLubyte) *(bp++);  
        }
        textureRecord->depth=24;
    }
    
    /*        // Original version: Takes 94 ms on a 800x800 texture...
        if(isImageMatrixBytes && numMatrixPlanes==3){
            for(ix=0;ix<xSize;ix++){
                for(iy=0;iy<ySize;iy++){
                    textureIndex=xSize*iy+ix;
                    redIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 0);
                    greenIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 1);
                    blueIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 2);
                    red=(GLuint)byteMatrix[redIndex];
                    green=(GLuint)byteMatrix[greenIndex];
                    blue=(GLuint)byteMatrix[blueIndex];
                    alpha=(GLuint)255;
                    textureRecord->textureMemory[textureIndex]= (((((alpha << 8) | red) << 8 ) | green ) << 8) | blue;
                }
            }
	}
    */	
    // Improved version: Takes 25 ms on a 800x800 texture...
    if(isImageMatrixBytes && numMatrixPlanes==3){
        texturePointer_b=(GLubyte*) texturePointer;
        int iters=xSize*ySize;
        GLubyte *rp, *gp, *bp;
        rp=(GLubyte*) ((unsigned long long) byteMatrix);
        gp=(GLubyte*) ((unsigned long long) byteMatrix + (unsigned long long) iters);
        bp=(GLubyte*) ((unsigned long long) gp + (unsigned long long) iters);
        for(ix=0;ix<iters;ix++){
            *(texturePointer_b++)= *(rp++);  
            *(texturePointer_b++)= *(gp++);  
            *(texturePointer_b++)= *(bp++);  
        }
        textureRecord->depth=24;
    }
    
    // Original version: 190 ms on a 800x800 texture...
    /*        if(isImageMatrixDoubles && numMatrixPlanes==4){
        for(ix=0;ix<xSize;ix++){
            for(iy=0;iy<ySize;iy++){
                textureIndex=xSize*iy+ix;
                redIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 0);
                greenIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 1);
                blueIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 2);
                alphaIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 3);
                red=(GLuint)doubleMatrix[redIndex];
                green=(GLuint)doubleMatrix[greenIndex];
                blue=(GLuint)doubleMatrix[blueIndex];
                alpha=(GLuint)doubleMatrix[alphaIndex];
                textureRecord->textureMemory[textureIndex]= (((((alpha << 8) | red) << 8 ) | green ) << 8) | blue;
            }
        }
    }
*/
    
    // Improved version: Takes 55 ms on a 800x800 texture...
    if(isImageMatrixDoubles && numMatrixPlanes==4){
        texturePointer_b=(GLubyte*) texturePointer;
        int iters=xSize*ySize;
        double *rp, *gp, *bp, *ap;
        rp=(double*) ((unsigned long long) doubleMatrix);
        gp=(double*) ((unsigned long long) doubleMatrix + (unsigned long long) iters*sizeof(double));
        bp=(double*) ((unsigned long long) gp + (unsigned long long) iters*sizeof(double));
        ap=(double*) ((unsigned long long) bp + (unsigned long long) iters*sizeof(double));
        for(ix=0;ix<iters;ix++){
            *(texturePointer_b++)= (GLubyte) *(ap++);  
            *(texturePointer_b++)= (GLubyte) *(rp++);  
            *(texturePointer_b++)= (GLubyte) *(gp++);  
            *(texturePointer_b++)= (GLubyte) *(bp++);  
        }
        textureRecord->depth=32;
    }
    
    // Original version: Takes 125 ms on a 800x800 texture...
    /*        if(isImageMatrixBytes && numMatrixPlanes==4){
        for(ix=0;ix<xSize;ix++){
            for(iy=0;iy<ySize;iy++){
                textureIndex=xSize*iy+ix;
                redIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 0);
                greenIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 1);
                blueIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 2);
                alphaIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 3);
                red=(GLuint)byteMatrix[redIndex];
                green=(GLuint)byteMatrix[greenIndex];
                blue=(GLuint)byteMatrix[blueIndex];
                alpha=(GLuint)byteMatrix[alphaIndex];
                textureRecord->textureMemory[textureIndex]= (((((alpha << 8) | red) << 8 ) | green ) << 8) | blue;
            }
        }
    }
*/
    
    // Improved version: Takes 33 ms on a 800x800 texture...
    if(isImageMatrixBytes && numMatrixPlanes==4){
        texturePointer_b=(GLubyte*) texturePointer;
        int iters=xSize*ySize;
        GLubyte *rp, *gp, *bp, *ap;
        rp=(GLubyte*) ((unsigned long long) byteMatrix);
        gp=(GLubyte*) ((unsigned long long) byteMatrix + (unsigned long long) iters);
        bp=(GLubyte*) ((unsigned long long) gp + (unsigned long long) iters);
        ap=(GLubyte*) ((unsigned long long) bp + (unsigned long long) iters);
        for(ix=0;ix<iters;ix++){
            *(texturePointer_b++)= *(ap++);  
            *(texturePointer_b++)= *(rp++);  
            *(texturePointer_b++)= *(gp++);  
            *(texturePointer_b++)= *(bp++);  
        }
        textureRecord->depth=32;
    }
    
    
    // The memory buffer now contains our texture data in a format ready to submit to OpenGL.
    
    // Assign proper OpenGL-Renderingcontext to texture:
    // MK: Is this the proper way to do it???
    textureRecord->targetSpecific.contextObject = windowRecord->targetSpecific.contextObject;
    
    // Let's create and bind a new texture object and fill it with our new texture data.
    PsychCreateTexture(textureRecord);
    
    // Texture ready. Mark it valid and return handle to userspace:
    PsychSetWindowRecordValid(textureRecord);
    PsychCopyOutDoubleArg(1, FALSE, textureRecord->windowIndex);
    
    if(PsychPrefStateGet_DebugMakeTexture()) 	//MARK #4
        StoreNowTime();
    
    return(PsychError_none);
}