Example #1
0
/*
	Accept a pointer to a Psychtoolbox color specifier and fill it with the 
	color information supplied at the specified argument position in module call .
	
	The behavior depends on the value of required:
	
	1. Required = TRUE
		A. If argument is not present exit with an error PsychError_invalidColorArg.
		B. If argument is present and valid then load it into *color and return true.
	2. Required = FALSE 
		A. If argument is not present then don't touch *color and return false.
		B. If argument is present and valid then load it into *color and return true.
*/
psych_bool PsychCopyInColorArg(int position, psych_bool required, PsychColorType *color)
{
	int i,m,n,p,argSize;
	psych_bool isArg;
	double dummyColor[4];
	double *colorArgMat=NULL;
	unsigned char *colorArgMatBytes=NULL;
	
	if(position == kPsychUseDefaultArgPosition)
		position = kPsychDefaultColorArgPosition;
	isArg = PsychIsArgPresent(PsychArgIn, position);
	if(!isArg){
		if(required)
			PsychErrorExitMsg(PsychError_user, "No color argument supplied"); //1A
		else
			return(FALSE);	//2A
	}

	// Try to retrieve double-matrix:
	if (!PsychAllocInDoubleMatArg(position, kPsychArgAnything, &m, &n, &p, &colorArgMat)) {
	  // No double matrix: Try to retrieve uint8 matrix:
	  if (!PsychAllocInUnsignedByteMatArg(position, TRUE, &m, &n, &p, &colorArgMatBytes)) {
	    PsychErrorExitMsg(PsychError_user, "No color argument or invalid color argument supplied");
	  }

	  // Color as uint8 received: Convert to double.
	  if(p!=1) PsychErrorExit(PsychError_invalidColorArg);
	  argSize = m*n;
	  for(i=0; i<argSize; i++) dummyColor[i] = (double) colorArgMatBytes[i];
	  colorArgMat = (double*) (&dummyColor[0]);
	}

	if(p!=1) PsychErrorExit(PsychError_invalidColorArg);
	argSize = m*n;

	if(argSize==4){
		color->mode = kPsychRGBAColor;
		color->value.rgba.r = colorArgMat[0];
		color->value.rgba.g = colorArgMat[1];
		color->value.rgba.b = colorArgMat[2];
		color->value.rgba.a = colorArgMat[3];
                return(TRUE);	//1B, 2B	
	}if(argSize==3){
		color->mode = kPsychRGBColor;
		color->value.rgb.r = colorArgMat[0];
		color->value.rgb.g = colorArgMat[1];
		color->value.rgb.b = colorArgMat[2];
		return(TRUE);	//1B, 2B	
	}else if(argSize==1){
		color->mode = kPsychIndexColor;
		color->value.index.i = colorArgMat[0];
		return(TRUE); //1B, 2B	
	}else{ 
		PsychErrorExit(PsychError_invalidColorArg);
		return(FALSE);
	}
}
/* PsychPrepareRenderBatch()
 *
 * Perform setup for a batch of render requests for a specific primitive. Some 2D Screen
 * drawing commands allow to specify a list of primitives to draw instead of only a single
 * one. E.g. 'DrawDots' allows to draw thousands of dots with one single DrawDots command.
 * This helper routine is called by such batch-capable commands. It checks which input arguments
 * are provided and if its a single one or multiple ones. It sets up the rendering pipe accordingly,
 * performing required conversion steps. The actual drawing routine just needs to perform primitive
 * specific code.
 */
void PsychPrepareRenderBatch(PsychWindowRecordType *windowRecord, int coords_pos, int* coords_count, double** xy, int colors_pos, int* colors_count, int* colorcomponent_count, double** colors, unsigned char** bytecolors, int sizes_pos, int* sizes_count, double** size)
{
	PsychColorType							color;
	int                                     m,n,p,mc,nc,pc;
	int                                     i, nrpoints, nrsize;
	psych_bool                              isArgThere, isdoublecolors, isuint8colors, usecolorvector, needxy;
	double									*tmpcolors, *pcolors, *tcolors;
	double									convfactor, whiteValue;

	needxy = (coords_pos > 0) ? TRUE: FALSE;
	coords_pos = abs(coords_pos);
	colors_pos = abs(colors_pos);
	sizes_pos = abs(sizes_pos);
	
	// Get mandatory or optional xy coordinates argument
	isArgThere = PsychIsArgPresent(PsychArgIn, coords_pos);
	if(!isArgThere && needxy) {
		PsychErrorExitMsg(PsychError_user, "No position argument supplied");
	}
	
	if (isArgThere) {
		PsychAllocInDoubleMatArg(coords_pos, TRUE, &m, &n, &p, xy);
		if(p!=1 || (m!=*coords_count && (m*n)!=*coords_count)) {
			printf("PTB-ERROR: Coordinates must be a %i tuple or a %i rows vector.\n", *coords_count, *coords_count);
			PsychErrorExitMsg(PsychError_user, "Invalid format for coordinate specification.");
		}
		
		if (m!=1) {
			nrpoints=n;
			*coords_count = n;
		}
		else {
			// Special case: 1 row vector provided for single argument.
			nrpoints=1;
			*coords_count = 1;
		}
	}
	else {
		nrpoints = 0;
		*coords_count = 0;
	}
	
	if (size) {
		// Get optional size argument
		isArgThere = PsychIsArgPresent(PsychArgIn, sizes_pos);
		if(!isArgThere){
			// No size provided: Use a default size of 1.0:
			*size = (double *) PsychMallocTemp(sizeof(double));
			*size[0] = 1;
			nrsize=1;
		} else {
			PsychAllocInDoubleMatArg(sizes_pos, TRUE, &m, &n, &p, size);
			if(p!=1) PsychErrorExitMsg(PsychError_user, "Size must be a scalar or a vector with one column or row");
			nrsize=m*n;
			if (nrsize!=nrpoints && nrsize!=1 && *sizes_count!=1) PsychErrorExitMsg(PsychError_user, "Size vector must contain one size value per item.");
		}
		
		*sizes_count = nrsize;
	}	

	// Check if color argument is provided:
	isArgThere = PsychIsArgPresent(PsychArgIn, colors_pos);        
	if(!isArgThere) {
		// No color argument provided - Use defaults:
		whiteValue=PsychGetWhiteValueFromWindow(windowRecord);
		PsychLoadColorStruct(&color, kPsychIndexColor, whiteValue ); //index mode will coerce to any other.
		usecolorvector=false;
	}
	else {
		// Some color argument provided. Check first, if it's a valid color vector:
		isdoublecolors = PsychAllocInDoubleMatArg(colors_pos, kPsychArgAnything, &mc, &nc, &pc, colors);
		isuint8colors  = PsychAllocInUnsignedByteMatArg(colors_pos, kPsychArgAnything, &mc, &nc, &pc, bytecolors);
		
		// Do we have a color vector, aka one element per vertex?
		if((isdoublecolors || isuint8colors) && pc==1 && mc!=1 && nc==nrpoints && nrpoints>1) {
			// Looks like we might have a color vector... ... Double-check it:
			if (mc!=3 && mc!=4) PsychErrorExitMsg(PsychError_user, "Color vector must be a 3 or 4 row vector");
			// Yes. colors is a valid pointer to it.
			usecolorvector=true;
			
			if (isdoublecolors) {
				if (fabs(windowRecord->colorRange)!=1) {
					// We have to loop through the vector and divide all values by windowRecord->colorRange, so the input values
					// 0-colorRange get mapped to the range 0.0-1.0, as OpenGL expects values in range 0-1 when
					// a color vector is passed in Double- or Float format.
					// This is inefficient, as it burns some cpu-cycles, but necessary to keep color
					// specifications consistent in the PTB - API.
					convfactor = 1.0 / fabs(windowRecord->colorRange);
					tmpcolors=PsychMallocTemp(sizeof(double) * nc * mc);
					pcolors = *colors;
					tcolors = tmpcolors;
					for (i=0; i<(nc*mc); i++) {
						*(tcolors++)=(*pcolors++) * convfactor;
					}
				}
				else {
					// colorRange is == 1 --> No remapping needed as colors are already in proper range!
					// Just setup pointer to our unaltered input color vector:
					tmpcolors=*colors;
				}
				
				*colors = tmpcolors;
			}
			else {
				// Color vector in uint8 format. Nothing to do.
			}
		}
		else {
			// No color vector provided: Check for a single valid color triplet or quadruple:
			usecolorvector=false;
			isArgThere=PsychCopyInColorArg(colors_pos, TRUE, &color);                
		}
	}
	
	// Enable this windowRecords framebuffer as current drawingtarget:
	PsychSetDrawingTarget(windowRecord);
	
	// Setup default drawshader:
	PsychSetShader(windowRecord, -1);
	
	// Setup alpha blending properly:
	PsychUpdateAlphaBlendingFactorLazily(windowRecord);
	
 	// Setup common color for all objects if no color vector has been provided:
	if (!usecolorvector) {
		PsychCoerceColorMode(&color);
		PsychSetGLColor(&color, windowRecord);
		*colors_count = 1;
	}
	else {
		*colors_count = nc;
	}
	*colorcomponent_count = mc;
		
	return;
}
Example #3
0
PsychError IOPORTWrite(void)
{
 	static char useString[] = "[nwritten, when, errmsg, prewritetime, postwritetime, lastchecktime] = IOPort('Write', handle, data [, blocking=1]);";
	static char synopsisString[] = 
		"Write data to device, specified by 'handle'.\n"
		"'data' must be a vector of data items to write, or a matrix (in which case data "
		"in the matrix will be transmitted in column-major order, ie., first the first "
		"column, then the 2nd column etc...), either with data elements of uint8 class "
		"or a (1 Byte per char) character string. The optional flag 'blocking' if "
		"set to 0 will ask the write function to not block, but return immediately, ie. "
		"data is sent/written in the background while your code continues to execute - There "
		"may be an arbitrary delay until data transmission is really finished. The default "
		"setting is 1, ie. blocking writes - The function waits until data transmission is really "
		"finished. You can also use blocking == 2 to request a different mode "
		"for blocking writes, where IOPort is polling for write-completion instead of "
		"a more cpu friendly wait. This may decrease latency for certain applications. "
		"Another even more agressive polling method is implemented via blocking == 3 on "
		"Linux systems with some limited set of hardware, e.g., real native serial ports.\n"
		"On systems without any support for specific polling modes, the 2 or 3 settings are treated "
		"as a standard blocking write.\n\n"
		"Optionally, the function returns the following return arguments:\n"
		"'nwritten' Number of bytes written -- Should match amount of data provided on success.\n"
		"'when' A timestamp of write completion: This is only meaningful in blocking mode!\n"
		"'errmsg' A system defined error message if something wen't wrong.\n"
		"The following three timestamps are for low-level debugging and special purpose:\n"
		"'prewritetime' A timestamp taken immediately before submitting the write request. "
		"'postwritetime' A timestamp taken immediately after submitting the write request. "
		"'lastchecktime' A timestamp taken at the time of last check for write completion if applicable. ";
		
	static char seeAlsoString[] = "";
	
	char			errmsg[1024];
	int				handle, blocking, m, n, p, nwritten;
	psych_uint8*	inData = NULL;
	char*			inChars = NULL;
	void*			writedata = NULL;
	double			timestamp[4] = {0, 0, 0, 0};
	errmsg[0] = 0;

	// Setup online help: 
	PsychPushHelp(useString, synopsisString, seeAlsoString);
	if(PsychIsGiveHelp()) {PsychGiveHelp(); return(PsychError_none); };
	
	PsychErrorExit(PsychCapNumInputArgs(3));     // The maximum number of inputs
	PsychErrorExit(PsychRequireNumInputArgs(2)); // The required number of inputs	
	PsychErrorExit(PsychCapNumOutputArgs(6));	 // The maximum number of outputs

	// Get required port handle:
	PsychCopyInIntegerArg(1, kPsychArgRequired, &handle);

	// Get the data:
	switch(PsychGetArgType(2)) {
		case PsychArgType_uint8:
			PsychAllocInUnsignedByteMatArg(2, kPsychArgRequired, &m, &n, &p, &inData);
			if (p!=1 || m * n == 0) PsychErrorExitMsg(PsychError_user, "'data' is not a vector or 2D matrix, but some higher dimensional matrix!");
			n = m * n;
			writedata = (void*) inData;
		break;
		
		case PsychArgType_char:
			PsychAllocInCharArg(2, kPsychArgRequired, &inChars);
			n = strlen(inChars);
			writedata = (void*) inChars;
		break;
		
		default:
			PsychErrorExitMsg(PsychError_user, "Invalid type for 'data' vector: Must be an uint8 or char vector.");
	}
	
	
	// Get optional blocking flag: Defaults to one -- blocking.
	blocking = 1;
	PsychCopyInIntegerArg(3, kPsychArgOptional, &blocking);

	// Write data:
	nwritten = PsychWriteIOPort(handle, writedata, n, blocking, errmsg, &timestamp[0]);
	if (nwritten < 0 && verbosity > 0) printf("IOPort: Error: %s\n", errmsg); 
	
	PsychCopyOutDoubleArg(1, kPsychArgOptional, nwritten);
	PsychCopyOutDoubleArg(2, kPsychArgOptional, timestamp[0]);
	PsychCopyOutCharArg(3, kPsychArgOptional, errmsg);
	PsychCopyOutDoubleArg(4, kPsychArgOptional, timestamp[1]);
	PsychCopyOutDoubleArg(5, kPsychArgOptional, timestamp[2]);
	PsychCopyOutDoubleArg(6, kPsychArgOptional, timestamp[3]);
	
    return(PsychError_none);
}
PsychError SCREENPutImage(void) 
{
	PsychRectType 		windowRect,positionRect;
	int 			ix, iy, numPlanes, bitsPerColor, matrixRedIndex, matrixGreenIndex, matrixBlueIndex, matrixAlphaIndex, matrixGrayIndex;
	int 			inputM, inputN, inputP, positionRectWidth, positionRectHeight; 
	PsychWindowRecordType	*windowRecord;
	unsigned char		*inputMatrixByte;
	double			*inputMatrixDouble;
	GLuint			*compactMat, matrixGrayValue, matrixRedValue, matrixGreenValue, matrixBlueValue, matrixAlphaValue, compactPixelValue;
	PsychArgFormatType	inputMatrixType;
	GLfloat			xZoom=1, yZoom=-1;
        
	//all sub functions should have these two lines
	PsychPushHelp(useString, synopsisString, seeAlsoString);
	if(PsychIsGiveHelp()){PsychGiveHelp();return(PsychError_none);};

	//cap the number of inputs
	PsychErrorExit(PsychCapNumInputArgs(4));   //The maximum number of inputs
	PsychErrorExit(PsychCapNumOutputArgs(0));  //The maximum number of outputs
        
        //get the image matrix
        inputMatrixType=PsychGetArgType(2);
        switch(inputMatrixType){
            case PsychArgType_none : case PsychArgType_default:
                PsychErrorExitMsg(PsychError_user, "imageArray argument required");
                break;
            case PsychArgType_uint8 :
                PsychAllocInUnsignedByteMatArg(2, TRUE, &inputM, &inputN, &inputP, &inputMatrixByte);
                break;
            case PsychArgType_double :
                PsychAllocInDoubleMatArg(2, TRUE, &inputM, &inputN, &inputP, &inputMatrixDouble);
                break;
            default :
                PsychErrorExitMsg(PsychError_user, "imageArray must be uint8 or double type");
                break;
        }
        
        //get the window and get the rect and stuff
        PsychAllocInWindowRecordArg(kPsychUseDefaultArgPosition, TRUE, &windowRecord);
        numPlanes=PsychGetNumPlanesFromWindowRecord(windowRecord);
        bitsPerColor=PsychGetColorSizeFromWindowRecord(windowRecord);
        PsychGetRectFromWindowRecord(windowRect, windowRecord);
	if(PsychCopyInRectArg(3, FALSE, positionRect)){
            positionRectWidth=(int)PsychGetWidthFromRect(positionRect);
            positionRectHeight=(int)PsychGetHeightFromRect(positionRect);
            if(inputP != 1 && inputP != 3 && inputP != 4)
                PsychErrorExitMsg(PsychError_user, "Third dimension of image matrix must be 1, 3, or 4"); 
            if( positionRectWidth != inputN  || positionRectHeight != inputM){
                //calculate the zoom factor
                xZoom=(GLfloat)positionRectWidth/(GLfloat)inputN;
                yZoom=-((GLfloat)positionRectHeight/(GLfloat)inputM);
            }
        }else{
           positionRect[kPsychLeft]=0;
           positionRect[kPsychTop]=0;
           positionRect[kPsychRight]=inputN;
           positionRect[kPsychBottom]=inputM;
           PsychCenterRect(positionRect, windowRect, positionRect);
           //This should be centered  
        }
        
        
        //put up the image
        if(numPlanes==1){  //screen planes, not image matrix planes.  
            PsychErrorExitMsg(PsychError_unimplemented, "Put Image does not yet support indexed mode");
            //remember to test here for inputP==3 because that would be wrong. 
        }else if(numPlanes==4){
            compactMat=(GLuint *)mxMalloc(sizeof(GLuint) * inputN * inputM);
            for(ix=0;ix<inputN;ix++){
                for(iy=0;iy<inputM;iy++){
                    if(inputP==1){
                        matrixGrayIndex=PsychIndexElementFrom3DArray(inputM, inputN, 1, iy, ix, 0);
                        if(inputMatrixType==PsychArgType_uint8)
                            matrixGrayValue=(GLuint)inputMatrixByte[matrixGrayIndex];
                        else //inputMatrixType==PsychArgType_double
                            matrixGrayValue=(GLuint)inputMatrixDouble[matrixGrayIndex];
                
                        compactPixelValue=((matrixGrayValue<<8 | matrixGrayValue)<<8 | matrixGrayValue)<<8 | 255; 
                        compactMat[iy*inputN+ix]=compactPixelValue; 
                    }else if(inputP==3){
                        matrixRedIndex=PsychIndexElementFrom3DArray(inputM, inputN, 3, iy, ix, 0);
                        matrixGreenIndex=PsychIndexElementFrom3DArray(inputM, inputN, 3, iy, ix, 1);
                        matrixBlueIndex=PsychIndexElementFrom3DArray(inputM, inputN, 3, iy, ix, 2);
                        if(inputMatrixType==PsychArgType_uint8){
                            matrixRedValue=(GLuint)inputMatrixByte[matrixRedIndex];
                            matrixGreenValue=(GLuint)inputMatrixByte[matrixGreenIndex];
                            matrixBlueValue=(GLuint)inputMatrixByte[matrixBlueIndex];
                            matrixAlphaValue=(GLuint)255;
                        }else{
                            matrixRedValue=(GLuint)inputMatrixDouble[matrixRedIndex];
                            matrixGreenValue=(GLuint)inputMatrixDouble[matrixGreenIndex];
                            matrixBlueValue=(GLuint)inputMatrixDouble[matrixBlueIndex];
                            matrixAlphaValue=(GLuint)255;
                        }
                        compactPixelValue= ((matrixRedValue<<8 | matrixGreenValue )<<8 | matrixBlueValue)<<8 | matrixAlphaValue; 
                        compactMat[iy*inputN+ix]=compactPixelValue; 
                    }else if(inputP==4){
                        matrixRedIndex=PsychIndexElementFrom3DArray(inputM, inputN, 3, iy, ix, 0);
                        matrixGreenIndex=PsychIndexElementFrom3DArray(inputM, inputN, 3, iy, ix, 1);
                        matrixBlueIndex=PsychIndexElementFrom3DArray(inputM, inputN, 3, iy, ix, 2);
                        matrixAlphaIndex=PsychIndexElementFrom3DArray(inputM, inputN, 3, iy, ix, 3);
                        if(inputMatrixType==PsychArgType_uint8){  
                            matrixRedValue=(GLuint)inputMatrixByte[matrixRedIndex];
                            matrixGreenValue=(GLuint)inputMatrixByte[matrixGreenIndex];
                            matrixBlueValue=(GLuint)inputMatrixByte[matrixBlueIndex];
                            matrixAlphaValue=(GLuint)inputMatrixByte[matrixAlphaIndex];
                        }else{
                            matrixRedValue=(GLuint)inputMatrixDouble[matrixRedIndex];
                            matrixGreenValue=(GLuint)inputMatrixDouble[matrixGreenIndex];
                            matrixBlueValue=(GLuint)inputMatrixDouble[matrixBlueIndex];
                            matrixAlphaValue=(GLuint)inputMatrixDouble[matrixAlphaIndex];
                        }
                        compactPixelValue= ((matrixRedValue<<8 | matrixGreenValue )<<8 | matrixBlueValue)<<8 | matrixAlphaValue; 
                        compactMat[iy*inputN+ix]=compactPixelValue; 

                    }

                 }
            }

            PsychSetGLContext(windowRecord);
			PsychUpdateAlphaBlendingFactorLazily(windowRecord);

            glRasterPos2i((GLint)(positionRect[kPsychLeft]), (GLint)(positionRect[kPsychTop]));
            PsychTestForGLErrors();
            glPixelStorei(GL_UNPACK_ALIGNMENT, (GLint)(sizeof(GLuint)));  //4  
            PsychTestForGLErrors();
            glPixelZoom(xZoom,yZoom);
            PsychTestForGLErrors();            
            glDrawPixels(inputN, inputM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, compactMat);
            free((void *)compactMat);
            PsychTestForGLErrors();
            PsychFlushGL(windowRecord);  //OS X: This does nothing if we are multi buffered, otherwise it glFlushes
						
			
            PsychTestForGLErrors();
        }else if(numPlanes==3)
            PsychErrorExitMsg(PsychError_unimplemented, "PutImage found hardware without an alpha channel.");	

	return(PsychError_none);
}
PsychError SCREENMakeTexture(void) 
{
    int					ix;
    PsychWindowRecordType		*textureRecord;
    PsychWindowRecordType		*windowRecord;
    PsychRectType			rect;
    Boolean				isImageMatrixBytes, isImageMatrixDoubles;
    int					numMatrixPlanes, xSize, ySize, iters; 
    unsigned char			*byteMatrix;
    double				*doubleMatrix;
    GLuint                              *texturePointer;
    GLubyte                             *texturePointer_b;
	GLfloat								*texturePointer_f;
    double *rp, *gp, *bp, *ap;    
    GLubyte *rpb, *gpb, *bpb, *apb;    
    int                                 usepoweroftwo, usefloatformat, assume_texorientation, textureShader;
    double                              optimized_orientation;
    Boolean                             bigendian;

    // Detect endianity (byte-order) of machine:
    ix=255;
    rpb=(GLubyte*) &ix;
    bigendian = ( *rpb == 255 ) ? FALSE : TRUE;
    ix = 0; rpb = NULL;

    if(PsychPrefStateGet_DebugMakeTexture())	//MARK #1
        StoreNowTime();
    
    //all subfunctions should have these two lines.  
    PsychPushHelp(useString, synopsisString, seeAlsoString);
    if(PsychIsGiveHelp()){PsychGiveHelp();return(PsychError_none);};
    
    //Get the window structure for the onscreen window.  It holds the onscreein GL context which we will need in the
    //final step when we copy the texture from system RAM onto the screen.
    PsychErrorExit(PsychCapNumInputArgs(7));   	
    PsychErrorExit(PsychRequireNumInputArgs(2)); 	
    PsychErrorExit(PsychCapNumOutputArgs(1)); 
    
    //get the window record from the window record argument and get info from the window record
    PsychAllocInWindowRecordArg(kPsychUseDefaultArgPosition, TRUE, &windowRecord);
    
    if((windowRecord->windowType!=kPsychDoubleBufferOnscreen) && (windowRecord->windowType!=kPsychSingleBufferOnscreen))
        PsychErrorExitMsg(PsychError_user, "MakeTexture called on something else than a onscreen window");
    
	// Get optional texture orientation flag:
	assume_texorientation = 0;
	PsychCopyInIntegerArg(6, FALSE, &assume_texorientation);
	
	// Get optional texture shader handle:
	textureShader = 0;
	PsychCopyInIntegerArg(7, FALSE, &textureShader);
	
    //get the argument and sanity check it.
    isImageMatrixBytes=PsychAllocInUnsignedByteMatArg(2, kPsychArgAnything, &ySize, &xSize, &numMatrixPlanes, &byteMatrix);
    isImageMatrixDoubles=PsychAllocInDoubleMatArg(2, kPsychArgAnything, &ySize, &xSize, &numMatrixPlanes, &doubleMatrix);
    if(numMatrixPlanes > 4)
        PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Specified image matrix exceeds maximum depth of 4 layers");
    if(ySize<1 || xSize <1)
        PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Specified image matrix must be at least 1 x 1 pixels in size");
    if(! (isImageMatrixBytes || isImageMatrixDoubles))
        PsychErrorExitMsg(PsychError_user, "Illegal argument type");  //not  likely. 

	// Is this a special image matrix which is already pre-transposed to fit our optimal format?
	if (assume_texorientation == 2) {
		// Yes. Swap xSize and ySize to take this into account:
		ix = xSize;
		xSize = ySize;
		ySize = ix;
		ix = 0;
	}

	// Build defining rect for this texture:
    PsychMakeRect(rect, 0, 0, xSize, ySize);

    // Copy in texture preferred draw orientation hint. We default to zero degrees, if
    // not provided.
    // This parameter is not yet used. It is silently ignorerd for now...
    optimized_orientation = 0;
    PsychCopyInDoubleArg(3, FALSE, &optimized_orientation);
    
    // Copy in special creation mode flag: It defaults to zero. If set to 1 then we
    // always create a power-of-two GL_TEXTURE_2D texture. This is useful if one wants
    // to create and use drifting gratings with no effort - texture wrapping is only available
    // for GL_TEXTURE_2D, not for non-pot types. It is also useful if the texture is to be
    // exported to external OpenGL code to simplify tex coords assignments.
    usepoweroftwo=0;
    PsychCopyInIntegerArg(4, FALSE, &usepoweroftwo);

    // Check if size constraints are fullfilled for power-of-two mode:
    if (usepoweroftwo & 1) {
      for(ix = 1; ix < xSize; ix*=2);
      if (ix!=xSize) {
	PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Power-of-two texture requested but width of imageMatrix is not a power of two!");
      }

      for(ix = 1; ix < ySize; ix*=2);
      if (ix!=ySize) {
	PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Power-of-two texture requested but height of imageMatrix is not a power of two!");
      }
    }

	// Check if creation of a floating point texture is requested? We default to non-floating point,
	// standard 8 bpc textures if this parameter is not provided.
	usefloatformat = 0;
    PsychCopyInIntegerArg(5, FALSE, &usefloatformat);
	if (usefloatformat<0 || usefloatformat>2) PsychErrorExitMsg(PsychError_user, "Invalid value for 'floatprecision' parameter provided! Valid values are 0 for 8bpc int, 1 for 16bpc float or 2 for 32bpc float.");
	if (usefloatformat && !isImageMatrixDoubles) {
		// Floating point texture requested. We only support this if our input is a double matrix, not
		// for uint8 matrices - converting them to float precision would be just a waste of ressources
		// without any benefit for precision.
		PsychErrorExitMsg(PsychError_user, "Creation of a floating point precision texture requested, but uint8 matrix provided! Only double matrices are acceptable for this mode.");
	}

    //Create a texture record.  Really just a window record adapted for textures.  
    PsychCreateWindowRecord(&textureRecord);						//this also fills the window index field.
    textureRecord->windowType=kPsychTexture;
    // MK: We need to assign the screen number of the onscreen-window, so PsychCreateTexture()
    // can query the size of the screen/onscreen-window...
    textureRecord->screenNumber=windowRecord->screenNumber;
    textureRecord->depth=32;
    PsychCopyRect(textureRecord->rect, rect);
    
    //Allocate the texture memory and copy the MATLAB matrix into the texture memory.
    // MK: We only allocate the amount really needed for given format, aka numMatrixPlanes - Bytes per pixel.
	if (usefloatformat) {
		// Allocate a double for each color component and pixel:
		textureRecord->textureMemorySizeBytes= sizeof(double) * numMatrixPlanes * xSize * ySize;		
	}
    else {
		// Allocate one byte per color component and pixel:
		textureRecord->textureMemorySizeBytes= numMatrixPlanes * xSize * ySize;
    }
	// MK: Allocate memory page-aligned... -> Helps Apple texture range extensions et al.
    if(PsychPrefStateGet_DebugMakeTexture()) 	//MARK #2
        StoreNowTime();
    textureRecord->textureMemory=malloc(textureRecord->textureMemorySizeBytes);
    if(PsychPrefStateGet_DebugMakeTexture()) 	//MARK #3
        StoreNowTime();	
    texturePointer=textureRecord->textureMemory;
    
    // Does script explicitely request usage of a GL_TEXTURE_2D power-of-two texture?
    if (usepoweroftwo & 1) {
      // Enforce creation as a power-of-two texture:
      textureRecord->texturetarget=GL_TEXTURE_2D;
    }

	// Now the conversion routines that convert Matlab/Octave matrices into memory
	// buffers suitable for OpenGL:
	if (usefloatformat) {
		// Conversion routines for HDR 16 bpc or 32 bpc textures -- Slow path.
		// Our input is always double matrices...
		iters = xSize * ySize;

		// Our input buffer is always of GL_FLOAT precision:
		textureRecord->textureexternaltype = GL_FLOAT;
		texturePointer_f=(GLfloat*) texturePointer;
		
		if(numMatrixPlanes==1) {
			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(doubleMatrix++);  
			}
			textureRecord->depth=(usefloatformat==1) ? 16 : 32;

			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_LUMINANCE_FLOAT16_APPLE : GL_LUMINANCE_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_LUMINANCE;
		}

		if(numMatrixPlanes==2) {
			rp=(double*) ((psych_uint64) doubleMatrix);
			ap=(double*) ((psych_uint64) doubleMatrix + (psych_uint64) iters*sizeof(double));

			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(rp++);  
				*(texturePointer_f++)= (GLfloat) *(ap++);  
			}
			textureRecord->depth=(usefloatformat==1) ? 32 : 64;
			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_LUMINANCE_ALPHA_FLOAT16_APPLE : GL_LUMINANCE_ALPHA_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_LUMINANCE_ALPHA;
		}
		
		if(numMatrixPlanes==3) {
			rp=(double*) ((psych_uint64) doubleMatrix);
			gp=(double*) ((psych_uint64) doubleMatrix + (psych_uint64) iters*sizeof(double));
			bp=(double*) ((psych_uint64) gp + (psych_uint64) iters*sizeof(double));

			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(rp++);  
				*(texturePointer_f++)= (GLfloat) *(gp++);  
				*(texturePointer_f++)= (GLfloat) *(bp++);  
			}
			textureRecord->depth=(usefloatformat==1) ? 48 : 96;
			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_RGB_FLOAT16_APPLE : GL_RGB_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_RGB;
		}
		
		if(numMatrixPlanes==4) {
			rp=(double*) ((psych_uint64) doubleMatrix);
			gp=(double*) ((psych_uint64) doubleMatrix + (psych_uint64) iters*sizeof(double));
			bp=(double*) ((psych_uint64) gp + (psych_uint64) iters*sizeof(double));
			ap=(double*) ((psych_uint64) bp + (psych_uint64) iters*sizeof(double));

			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(rp++);  
				*(texturePointer_f++)= (GLfloat) *(gp++);  
				*(texturePointer_f++)= (GLfloat) *(bp++);  
				*(texturePointer_f++)= (GLfloat) *(ap++);  
			}			
			textureRecord->depth=(usefloatformat==1) ? 64 : 128;
			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_RGBA_FLOAT16_APPLE : GL_RGBA_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_RGBA;
		}

		// This is a special workaround for bugs in FLOAT16 texture creation on Mac OS/X 10.4.x and 10.5.x.
		// The OpenGL fails to properly flush very small values (< 1e-9) to zero when creating a FLOAT16
		// type texture. Instead it seems to initialize with trash data, corrupting the texture.
		// Therefore, if FLOAT16 texture creation is requested, we loop over the whole input buffer and
		// set all values with magnitude smaller than 1e-9 to zero. Better safe than sorry...
		if(usefloatformat==1) {
			texturePointer_f=(GLfloat*) texturePointer;
			iters = iters * numMatrixPlanes;
			for(ix=0; ix<iters; ix++, texturePointer_f++) if(fabs((double) *texturePointer_f) < 1e-9) { *texturePointer_f = 0.0; }
		}
		
		// End of HDR conversion code...
	}
    else {
		// Standard LDR texture 8 bpc conversion routines -- Fast path.
	
		// Improved implementation: Takes 13 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==1){
			texturePointer_b=(GLubyte*) texturePointer;
			iters=xSize*ySize;
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= (GLubyte) *(doubleMatrix++);  
			}
			textureRecord->depth=8;
		}
		
		// Improved version: Takes 3 ms on a 800x800 texture...
		// NB: Implementing memcpy manually by a for-loop takes 10 ms! This is a huge difference.
		// -> That's because memcpy on MacOS-X is implemented with hand-coded, highly tuned Assembler code for PowerPC.
		// -> It's always wise to use system-routines if available, instead of coding it by yourself!
		if(isImageMatrixBytes && numMatrixPlanes==1){
			memcpy((void*) texturePointer, (void*) byteMatrix, xSize*ySize);
			textureRecord->depth=8;
		}
		
		// New version: Takes 33 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==2){
			texturePointer_b=(GLubyte*) texturePointer;
			iters=xSize*ySize;
			rp=(double*) ((psych_uint64) doubleMatrix);
			ap=(double*) ((psych_uint64) doubleMatrix + (psych_uint64) iters*sizeof(double));
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= (GLubyte) *(rp++);  
				*(texturePointer_b++)= (GLubyte) *(ap++);  
			}
			textureRecord->depth=16;
		}
		
		// New version: Takes 20 ms on a 800x800 texture...
		if(isImageMatrixBytes && numMatrixPlanes==2){
			texturePointer_b=(GLubyte*) texturePointer;
			iters=xSize*ySize;
			rpb=(GLubyte*) ((psych_uint64) byteMatrix);
			apb=(GLubyte*) ((psych_uint64) byteMatrix + (psych_uint64) iters);
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= *(rpb++);  
				*(texturePointer_b++)= *(apb++);  
			}
			textureRecord->depth=16;
		}
		
		// Improved version: Takes 43 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==3){
			texturePointer_b=(GLubyte*) texturePointer;
			iters=xSize*ySize;
			rp=(double*) ((psych_uint64) doubleMatrix);
			gp=(double*) ((psych_uint64) doubleMatrix + (psych_uint64) iters*sizeof(double));
			bp=(double*) ((psych_uint64) gp + (psych_uint64) iters*sizeof(double));
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= (GLubyte) *(rp++);  
				*(texturePointer_b++)= (GLubyte) *(gp++);  
				*(texturePointer_b++)= (GLubyte) *(bp++);  
			}
			textureRecord->depth=24;
		}
		
		// Improved version: Takes 25 ms on a 800x800 texture...
		if(isImageMatrixBytes && numMatrixPlanes==3){
			texturePointer_b=(GLubyte*) texturePointer;
			iters=xSize*ySize;
			
			rpb=(GLubyte*) ((psych_uint64) byteMatrix);
			gpb=(GLubyte*) ((psych_uint64) byteMatrix + (psych_uint64) iters);
			bpb=(GLubyte*) ((psych_uint64) gpb + (psych_uint64) iters);
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= *(rpb++);  
				*(texturePointer_b++)= *(gpb++);  
				*(texturePointer_b++)= *(bpb++);  
			}
			textureRecord->depth=24;
		}
		
		// Improved version: Takes 55 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==4){
			texturePointer_b=(GLubyte*) texturePointer;
			iters=xSize*ySize;
			
			rp=(double*) ((psych_uint64) doubleMatrix);
			gp=(double*) ((psych_uint64) doubleMatrix + (psych_uint64) iters*sizeof(double));
			bp=(double*) ((psych_uint64) gp + (psych_uint64) iters*sizeof(double));
			ap=(double*) ((psych_uint64) bp + (psych_uint64) iters*sizeof(double));
			if (bigendian) {
				// Code for big-endian machines like PowerPC:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= (GLubyte) *(ap++);  
					*(texturePointer_b++)= (GLubyte) *(rp++);  
					*(texturePointer_b++)= (GLubyte) *(gp++);  
					*(texturePointer_b++)= (GLubyte) *(bp++);  
				}
			}
			else {
				// Code for little-endian machines like Intel Pentium:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= (GLubyte) *(bp++);  
					*(texturePointer_b++)= (GLubyte) *(gp++);  
					*(texturePointer_b++)= (GLubyte) *(rp++);  
					*(texturePointer_b++)= (GLubyte) *(ap++);  
				}
			}
			
			textureRecord->depth=32;
		}
		
		// Improved version: Takes 33 ms on a 800x800 texture...
		if(isImageMatrixBytes && numMatrixPlanes==4){
			texturePointer_b=(GLubyte*) texturePointer;
			iters=xSize*ySize;
			
			rpb=(GLubyte*) ((psych_uint64) byteMatrix);
			gpb=(GLubyte*) ((psych_uint64) byteMatrix + (psych_uint64) iters);
			bpb=(GLubyte*) ((psych_uint64) gpb + (psych_uint64) iters);
			apb=(GLubyte*) ((psych_uint64) bpb + (psych_uint64) iters);
			if (bigendian) {
				// Code for big-endian machines like PowerPC:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= *(apb++);  
					*(texturePointer_b++)= *(rpb++);  
					*(texturePointer_b++)= *(gpb++);  
					*(texturePointer_b++)= *(bpb++);  
				}
			}
			else {
				// Code for little-endian machines like Intel Pentium:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= *(bpb++);  
					*(texturePointer_b++)= *(gpb++);  
					*(texturePointer_b++)= *(rpb++);  
					*(texturePointer_b++)= *(apb++);  
				}
			}
			
			textureRecord->depth=32;
		}
	} // End of 8 bpc texture conversion code (fast-path for LDR textures)
    
    // The memory buffer now contains our texture data in a format ready to submit to OpenGL.
    
	// Assign parent window and copy its inheritable properties:
	PsychAssignParentWindow(textureRecord, windowRecord);

    // Texture orientation is zero aka transposed aka non-renderswapped.
    textureRecord->textureOrientation = ((assume_texorientation != 2) && (assume_texorientation != 3)) ? 0 : 2;
    
	// This is our best guess about the number of image channels:
	textureRecord->nrchannels = numMatrixPlanes;

    // Let's create and bind a new texture object and fill it with our new texture data.
    PsychCreateTexture(textureRecord);
    
	// Assign GLSL filter-/lookup-shaders if needed:
	PsychAssignHighPrecisionTextureShaders(textureRecord, windowRecord, usefloatformat, (usepoweroftwo & 2) ? 1 : 0);

	// User specified override shader for this texture provided? This is useful for
	// basic image processing and procedural texture shading:
	if (textureShader!=0) {
		// Assign provided shader as filtershader to this texture: We negate it so
		// that the texture blitter routines know this is a custom shader, not our
		// built in filter shader:
		textureRecord->textureFilterShader = -1 * textureShader;
	}
	
    // Texture ready. Mark it valid and return handle to userspace:
    PsychSetWindowRecordValid(textureRecord);
    PsychCopyOutDoubleArg(1, FALSE, textureRecord->windowIndex);
	
	// Swapping the texture to upright orientation requested?
	if (assume_texorientation == 1) {
		// Transform sourceRecord source texture into a normalized, upright texture if it isn't already in
		// that format. We require this standard orientation for simplified shader design.

		PsychSetShader(windowRecord, 0);
		PsychNormalizeTextureOrientation(textureRecord);
	}
	
	// Shall the texture be finally declared "normally oriented"?
	// This is either due to explicit renderswapping if assume_textureorientation == 1,
	// or because it was already pretransposed in Matlab/Octave if assume_textureorientation == 2,
	// or because user space tells us the texture is isotropic if assume_textureorientation == 3.
	if (assume_texorientation > 0) {
		// Yes. Label it as such:
		textureRecord->textureOrientation = 2;
	}
    
    if(PsychPrefStateGet_DebugMakeTexture()) 	//MARK #4
        StoreNowTime();
    
    return(PsychError_none);
}
PsychError SCREENMakeTexture(void) 
{
    size_t								ix, iters;
    PsychWindowRecordType				*textureRecord;
    PsychWindowRecordType				*windowRecord;
    PsychRectType						rect;
    psych_bool							isImageMatrixBytes, isImageMatrixDoubles;
    int									numMatrixPlanes, xSize, ySize;
    unsigned char						*byteMatrix;
    double								*doubleMatrix;
    GLuint								*texturePointer;
    GLubyte								*texturePointer_b;
	GLfloat								*texturePointer_f;
    double								*rp, *gp, *bp, *ap;
    GLubyte								*rpb, *gpb, *bpb, *apb;
    int									usepoweroftwo, usefloatformat, assume_texorientation, textureShader;
    double								optimized_orientation;
    psych_bool							bigendian;
	psych_bool							planar_storage = FALSE;

    // Detect endianity (byte-order) of machine:
    ix=255;
    rpb=(GLubyte*) &ix;
    bigendian = ( *rpb == 255 ) ? FALSE : TRUE;
    ix = 0; rpb = NULL;

    if(PsychPrefStateGet_DebugMakeTexture())	//MARK #1
        StoreNowTime();
    
    //all subfunctions should have these two lines.  
    PsychPushHelp(useString, synopsisString, seeAlsoString);
    if(PsychIsGiveHelp()){PsychGiveHelp();return(PsychError_none);};
    
    //Get the window structure for the onscreen window.  It holds the onscreein GL context which we will need in the
    //final step when we copy the texture from system RAM onto the screen.
    PsychErrorExit(PsychCapNumInputArgs(7));   	
    PsychErrorExit(PsychRequireNumInputArgs(2)); 	
    PsychErrorExit(PsychCapNumOutputArgs(1)); 
    
    //get the window record from the window record argument and get info from the window record
    PsychAllocInWindowRecordArg(kPsychUseDefaultArgPosition, TRUE, &windowRecord);
    
    if((windowRecord->windowType!=kPsychDoubleBufferOnscreen) && (windowRecord->windowType!=kPsychSingleBufferOnscreen))
        PsychErrorExitMsg(PsychError_user, "MakeTexture called on something else than a onscreen window");
    
	// Get optional texture orientation flag:
	assume_texorientation = 0;
	PsychCopyInIntegerArg(6, FALSE, &assume_texorientation);
	
	// Get optional texture shader handle:
	textureShader = 0;
	PsychCopyInIntegerArg(7, FALSE, &textureShader);
	
    //get the argument and sanity check it.
    isImageMatrixBytes=PsychAllocInUnsignedByteMatArg(2, kPsychArgAnything, &ySize, &xSize, &numMatrixPlanes, &byteMatrix);
    isImageMatrixDoubles=PsychAllocInDoubleMatArg(2, kPsychArgAnything, &ySize, &xSize, &numMatrixPlanes, &doubleMatrix);
    if(numMatrixPlanes > 4)
        PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Specified image matrix exceeds maximum depth of 4 layers");
    if(ySize<1 || xSize <1)
        PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Specified image matrix must be at least 1 x 1 pixels in size");
    if(! (isImageMatrixBytes || isImageMatrixDoubles))
        PsychErrorExitMsg(PsychError_user, "Illegal argument type");  //not  likely. 

	// Is this a special image matrix which is already pre-transposed to fit our optimal format?
	if (assume_texorientation == 2) {
		// Yes. Swap xSize and ySize to take this into account:
		ix = (size_t) xSize;
		xSize = ySize;
		ySize = (int) ix;
		ix = 0;
	}

	// Build defining rect for this texture:
    PsychMakeRect(rect, 0, 0, xSize, ySize);

    // Copy in texture preferred draw orientation hint. We default to zero degrees, if
    // not provided.
    // This parameter is not yet used. It is silently ignorerd for now...
    optimized_orientation = 0;
    PsychCopyInDoubleArg(3, FALSE, &optimized_orientation);
    
    // Copy in special creation mode flag: It defaults to zero. If set to 1 then we
    // always create a power-of-two GL_TEXTURE_2D texture. This is useful if one wants
    // to create and use drifting gratings with no effort - texture wrapping is only available
    // for GL_TEXTURE_2D, not for non-pot types. It is also useful if the texture is to be
    // exported to external OpenGL code to simplify tex coords assignments.
    usepoweroftwo=0;
    PsychCopyInIntegerArg(4, FALSE, &usepoweroftwo);

    // Check if size constraints are fullfilled for power-of-two mode:
    if (usepoweroftwo & 1) {
		for(ix = 1; ix < (size_t) xSize; ix*=2);
		if (ix != (size_t) xSize) {
			PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Power-of-two texture requested but width of imageMatrix is not a power of two!");
		}
		
		for(ix = 1; ix < (size_t) ySize; ix*=2);
		if (ix != (size_t) ySize) {
			PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Power-of-two texture requested but height of imageMatrix is not a power of two!");
		}
    }

	// Check if creation of a floating point texture is requested? We default to non-floating point,
	// standard 8 bpc textures if this parameter is not provided.
	usefloatformat = 0;
    PsychCopyInIntegerArg(5, FALSE, &usefloatformat);
	if (usefloatformat<0 || usefloatformat>2) PsychErrorExitMsg(PsychError_user, "Invalid value for 'floatprecision' parameter provided! Valid values are 0 for 8bpc int, 1 for 16bpc float or 2 for 32bpc float.");
	if (usefloatformat && !isImageMatrixDoubles) {
		// Floating point texture requested. We only support this if our input is a double matrix, not
		// for uint8 matrices - converting them to float precision would be just a waste of ressources
		// without any benefit for precision.
		PsychErrorExitMsg(PsychError_user, "Creation of a floating point precision texture requested, but uint8 matrix provided! Only double matrices are acceptable for this mode.");
	}

    //Create a texture record.  Really just a window record adapted for textures.  
    PsychCreateWindowRecord(&textureRecord);						//this also fills the window index field.
    textureRecord->windowType=kPsychTexture;
    // MK: We need to assign the screen number of the onscreen-window, so PsychCreateTexture()
    // can query the size of the screen/onscreen-window...
    textureRecord->screenNumber=windowRecord->screenNumber;
    textureRecord->depth=32;
    PsychCopyRect(textureRecord->rect, rect);
    
	// Is texture storage in planar format explicitely requested by usercode? Do the gpu and its size
	// constraints on textures support planar storage for this image?
	// Can a proper planar -> interleaved remapping GLSL shader be generated and assigned for this texture?
	if ((usepoweroftwo == 4) && (numMatrixPlanes > 1) && (windowRecord->gfxcaps & kPsychGfxCapFBO) && !(PsychPrefStateGet_ConserveVRAM() & kPsychDontCacheTextures) &&
		(ySize * numMatrixPlanes <= windowRecord->maxTextureSize) && PsychAssignPlanarTextureShaders(textureRecord, windowRecord, numMatrixPlanes)) {
		// Yes: Use the planar texture storage fast-path.
		planar_storage = TRUE;
		if (PsychPrefStateGet_Verbosity() > 6) printf("PTB-DEBUG: Using planar storage for %i layer texture of size %i x %i texels.\n", numMatrixPlanes, xSize, ySize);
	}
	else {
		planar_storage = FALSE;
		if (PsychPrefStateGet_Verbosity() > 7) printf("PTB-DEBUG: Using standard storage for %i layer texture of size %i x %i texels.\n", numMatrixPlanes, xSize, ySize);
	}

    //Allocate the texture memory and copy the MATLAB matrix into the texture memory.
	if (usefloatformat || (planar_storage && !isImageMatrixBytes)) {
		// Allocate a double for each color component and pixel:
		textureRecord->textureMemorySizeBytes = sizeof(double) * (size_t) numMatrixPlanes * (size_t) xSize * (size_t) ySize;		
	}
    else {
		// Allocate one byte per color component and pixel:
		textureRecord->textureMemorySizeBytes = (size_t) numMatrixPlanes * (size_t) xSize * (size_t) ySize;
    }

	// We allocate our own intermediate conversion buffer unless this is
	// creation of a single-layer luminance8 integer texture from a single
	// layer uint8 input matrix and client storage is disabled. In that case, we can use a zero-copy path:
	if ((isImageMatrixBytes && (numMatrixPlanes == 1) && (!usefloatformat) && !(PsychPrefStateGet_ConserveVRAM() & kPsychDontCacheTextures)) ||
		(isImageMatrixBytes && planar_storage)) {
		// Zero copy path:
		texturePointer = NULL;
	}
	else {
		// Allocate memory:
		if(PsychPrefStateGet_DebugMakeTexture()) StoreNowTime();
		textureRecord->textureMemory = malloc(textureRecord->textureMemorySizeBytes);
		if(PsychPrefStateGet_DebugMakeTexture()) StoreNowTime();
		texturePointer = textureRecord->textureMemory;
	}
	
    // Does script explicitely request usage of a GL_TEXTURE_2D power-of-two texture?
    if (usepoweroftwo & 1) {
      // Enforce creation as a power-of-two texture:
      textureRecord->texturetarget=GL_TEXTURE_2D;
    }

	// Now the conversion routines that convert Matlab/Octave matrices into memory
	// buffers suitable for OpenGL:
	if (planar_storage) {
		// Planar texture storage, backed by a LUMINANCE texture container:

		// Zero-Copy possible? Only for uint8 input -> uint8 output:
		if (texturePointer == NULL) {
			texturePointer = (GLuint*) byteMatrix;
			textureRecord->textureMemory = texturePointer;
			// Set size to zero, so PsychCreateTexture() does not free() our
			// input buffer:
			textureRecord->textureMemorySizeBytes = 0;
			
			// This is always a LUMINANCE8 texture, backing our planar uint8 texture:
			textureRecord->depth = 8 * numMatrixPlanes;
			textureRecord->textureexternaltype   = GL_UNSIGNED_BYTE;
			textureRecord->textureexternalformat = GL_LUMINANCE;
			textureRecord->textureinternalformat = GL_LUMINANCE8;
		}
		else {
			// Some cast operation needed from double input format.
			// We always cast from double to float, potentially with
			// normalization and/or checking of value range.
			textureRecord->textureexternalformat = GL_LUMINANCE;

			if (usefloatformat) {
				// Floating point or other high precision format:
				textureRecord->depth = ((usefloatformat == 1) ? 16 : 32) * numMatrixPlanes;
				textureRecord->textureexternaltype = GL_FLOAT;
				textureRecord->textureinternalformat = (usefloatformat == 1) ? GL_LUMINANCE_FLOAT16_APPLE : GL_LUMINANCE_FLOAT32_APPLE;
				
				// Override for missing floating point texture support: Try to use 16 bit fixed point signed normalized textures [-1.0 ; 1.0] resolved at 15 bits:
				if ((usefloatformat == 1) && !(windowRecord->gfxcaps & kPsychGfxCapFPTex16)) textureRecord->textureinternalformat = GL_LUMINANCE16_SNORM;

				// Perform copy with double -> float cast:
				iters = (size_t) xSize * (size_t) ySize * (size_t) numMatrixPlanes;
				texturePointer_f = (GLfloat*) texturePointer;
				for(ix = 0; ix < iters; ix++) {
					*(texturePointer_f++) = (GLfloat) *(doubleMatrix++);
				}
				iters = (size_t) xSize * (size_t) ySize;
			}
			else {
				// 8 Bit format, but from double input matrix -> cast to uint8:
				textureRecord->depth = 8 * numMatrixPlanes;
				textureRecord->textureexternaltype = GL_UNSIGNED_BYTE;
				textureRecord->textureinternalformat = GL_LUMINANCE8;

				iters = (size_t) xSize * (size_t) ySize * (size_t) numMatrixPlanes;
				texturePointer_b = (GLubyte*) texturePointer;
				for(ix = 0; ix < iters; ix++) {
					*(texturePointer_b++) = (GLubyte) *(doubleMatrix++);
				}
				iters = (size_t) xSize * (size_t) ySize;
			}
		}
	}
	else if (usefloatformat) {
		// Conversion routines for HDR 16 bpc or 32 bpc textures -- Slow path.
		// Our input is always double matrices...
		iters = (size_t) xSize * (size_t) ySize;

		// Our input buffer is always of GL_FLOAT precision:
		textureRecord->textureexternaltype = GL_FLOAT;
		texturePointer_f=(GLfloat*) texturePointer;
		
		if(numMatrixPlanes==1) {
			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(doubleMatrix++);  
			}
			textureRecord->depth=(usefloatformat==1) ? 16 : 32;

			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_LUMINANCE_FLOAT16_APPLE : GL_LUMINANCE_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_LUMINANCE;

			// Override for missing floating point texture support: Try to use 16 bit fixed point signed normalized textures [-1.0 ; 1.0] resolved at 15 bits:
			if ((usefloatformat==1) && !(windowRecord->gfxcaps & kPsychGfxCapFPTex16)) textureRecord->textureinternalformat = GL_LUMINANCE16_SNORM;
		}

		if(numMatrixPlanes==2) {
			rp=(double*) ((size_t) doubleMatrix);
			ap=(double*) ((size_t) rp + (size_t) iters * sizeof(double));

			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(rp++);  
				*(texturePointer_f++)= (GLfloat) *(ap++);  
			}
			textureRecord->depth=(usefloatformat==1) ? 32 : 64;
			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_LUMINANCE_ALPHA_FLOAT16_APPLE : GL_LUMINANCE_ALPHA_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_LUMINANCE_ALPHA;

			// Override for missing floating point texture support: Try to use 16 bit fixed point signed normalized textures [-1.0 ; 1.0] resolved at 15 bits:
			if ((usefloatformat==1) && !(windowRecord->gfxcaps & kPsychGfxCapFPTex16)) textureRecord->textureinternalformat = GL_LUMINANCE16_ALPHA16_SNORM;
		}
		
		if(numMatrixPlanes==3) {
			rp=(double*) ((size_t) doubleMatrix);
			gp=(double*) ((size_t) rp + (size_t) iters * sizeof(double));
			bp=(double*) ((size_t) gp + (size_t) iters * sizeof(double));

			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(rp++);  
				*(texturePointer_f++)= (GLfloat) *(gp++);  
				*(texturePointer_f++)= (GLfloat) *(bp++);  
			}
			textureRecord->depth=(usefloatformat==1) ? 48 : 96;
			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_RGB_FLOAT16_APPLE : GL_RGB_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_RGB;

			// Override for missing floating point texture support: Try to use 16 bit fixed point signed normalized textures [-1.0 ; 1.0] resolved at 15 bits:
			if ((usefloatformat==1) && !(windowRecord->gfxcaps & kPsychGfxCapFPTex16)) textureRecord->textureinternalformat = GL_RGB16_SNORM;
		}
		
		if(numMatrixPlanes==4) {
			rp=(double*) ((size_t) doubleMatrix);
			gp=(double*) ((size_t) rp + (size_t) iters * sizeof(double));
			bp=(double*) ((size_t) gp + (size_t) iters * sizeof(double));
			ap=(double*) ((size_t) bp + (size_t) iters * sizeof(double));

			for(ix=0;ix<iters;ix++){
				*(texturePointer_f++)= (GLfloat) *(rp++);  
				*(texturePointer_f++)= (GLfloat) *(gp++);  
				*(texturePointer_f++)= (GLfloat) *(bp++);  
				*(texturePointer_f++)= (GLfloat) *(ap++);  
			}			
			textureRecord->depth=(usefloatformat==1) ? 64 : 128;
			textureRecord->textureinternalformat = (usefloatformat==1) ? GL_RGBA_FLOAT16_APPLE : GL_RGBA_FLOAT32_APPLE; 
			textureRecord->textureexternalformat = GL_RGBA;

			// Override for missing floating point texture support: Try to use 16 bit fixed point signed normalized textures [-1.0 ; 1.0] resolved at 15 bits:
			if ((usefloatformat==1) && !(windowRecord->gfxcaps & kPsychGfxCapFPTex16)) textureRecord->textureinternalformat = GL_RGBA16_SNORM;
		}		
		// End of HDR conversion code...
	}
    else {
		// Standard LDR texture 8 bpc conversion routines -- Fast path.
		iters = (size_t) xSize * (size_t) ySize;

		// Improved implementation: Takes 13 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==1){
			texturePointer_b=(GLubyte*) texturePointer;
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= (GLubyte) *(doubleMatrix++);  
			}
			textureRecord->depth=8;
		}
		
		// Improved version: Takes 3 ms on a 800x800 texture...
		// NB: Implementing memcpy manually by a for-loop takes 10 ms! This is a huge difference.
		// -> That's because memcpy on MacOS-X is implemented with hand-coded, highly tuned Assembler code for PowerPC.
		// -> It's always wise to use system-routines if available, instead of coding it by yourself!
		if(isImageMatrixBytes && numMatrixPlanes==1) {
			if (texturePointer) {
				// Need to do a copy. Use optimized memcpy():
				memcpy((void*) texturePointer, (void*) byteMatrix, iters);
				
				//texturePointer_b=(GLubyte*) texturePointer;
				//for(ix=0;ix<iters;ix++){
				//	*(texturePointer_b++) = *(byteMatrix++);  
				//}
			}
			else {
				// Zero-Copy path. Just pass a pointer to our input matrix:
				texturePointer = (GLuint*) byteMatrix;
				textureRecord->textureMemory = texturePointer;
				// Set size to zero, so PsychCreateTexture() does not free() our
				// input buffer:
				textureRecord->textureMemorySizeBytes = 0;
			}

			textureRecord->depth=8;
		}
		
		// New version: Takes 33 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==2){
			texturePointer_b=(GLubyte*) texturePointer;
			rp=(double*) ((size_t) doubleMatrix);
			ap=(double*) ((size_t) rp + (size_t) iters * sizeof(double));
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= (GLubyte) *(rp++);  
				*(texturePointer_b++)= (GLubyte) *(ap++);  
			}
			textureRecord->depth=16;
		}
		
		// New version: Takes 20 ms on a 800x800 texture...
		if(isImageMatrixBytes && numMatrixPlanes==2){
			texturePointer_b=(GLubyte*) texturePointer;
			rpb=(GLubyte*) ((size_t) byteMatrix);
			apb=(GLubyte*) ((size_t) rpb + (size_t) iters);
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= *(rpb++);  
				*(texturePointer_b++)= *(apb++);  
			}
			textureRecord->depth=16;
		}
		
		// Improved version: Takes 43 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==3){
			texturePointer_b=(GLubyte*) texturePointer;
			rp=(double*) ((size_t) doubleMatrix);
			gp=(double*) ((size_t) rp + (size_t) iters * sizeof(double));
			bp=(double*) ((size_t) gp + (size_t) iters * sizeof(double));
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= (GLubyte) *(rp++);  
				*(texturePointer_b++)= (GLubyte) *(gp++);  
				*(texturePointer_b++)= (GLubyte) *(bp++);  
			}
			textureRecord->depth=24;
		}
		
		// Improved version: Takes 25 ms on a 800x800 texture...
		if(isImageMatrixBytes && numMatrixPlanes==3){
			texturePointer_b=(GLubyte*) texturePointer;			
			rpb=(GLubyte*) ((size_t) byteMatrix);
			gpb=(GLubyte*) ((size_t) rpb + (size_t) iters);
			bpb=(GLubyte*) ((size_t) gpb + (size_t) iters);
			for(ix=0;ix<iters;ix++){
				*(texturePointer_b++)= *(rpb++);  
				*(texturePointer_b++)= *(gpb++);  
				*(texturePointer_b++)= *(bpb++);  
			}
			textureRecord->depth=24;
		}
		
		// Improved version: Takes 55 ms on a 800x800 texture...
		if(isImageMatrixDoubles && numMatrixPlanes==4){
			texturePointer_b=(GLubyte*) texturePointer;			
			rp=(double*) ((size_t) doubleMatrix);
			gp=(double*) ((size_t) rp + (size_t) iters * sizeof(double));
			bp=(double*) ((size_t) gp + (size_t) iters * sizeof(double));
			ap=(double*) ((size_t) bp + (size_t) iters * sizeof(double));
			if (bigendian) {
				// Code for big-endian machines like PowerPC:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= (GLubyte) *(ap++);  
					*(texturePointer_b++)= (GLubyte) *(rp++);  
					*(texturePointer_b++)= (GLubyte) *(gp++);  
					*(texturePointer_b++)= (GLubyte) *(bp++);  
				}
			}
			else {
				// Code for little-endian machines like Intel Pentium:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= (GLubyte) *(bp++);  
					*(texturePointer_b++)= (GLubyte) *(gp++);  
					*(texturePointer_b++)= (GLubyte) *(rp++);  
					*(texturePointer_b++)= (GLubyte) *(ap++);  
				}
			}
			
			textureRecord->depth=32;
		}
		
		// Improved version: Takes 33 ms on a 800x800 texture...
		if(isImageMatrixBytes && numMatrixPlanes==4){
			texturePointer_b=(GLubyte*) texturePointer;			
			rpb=(GLubyte*) ((size_t) byteMatrix);
			gpb=(GLubyte*) ((size_t) rpb + (size_t) iters);
			bpb=(GLubyte*) ((size_t) gpb + (size_t) iters);
			apb=(GLubyte*) ((size_t) bpb + (size_t) iters);
			if (bigendian) {
				// Code for big-endian machines like PowerPC:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= *(apb++);  
					*(texturePointer_b++)= *(rpb++);  
					*(texturePointer_b++)= *(gpb++);  
					*(texturePointer_b++)= *(bpb++);  
				}
			}
			else {
				// Code for little-endian machines like Intel Pentium:
				for(ix=0;ix<iters;ix++){
					*(texturePointer_b++)= *(bpb++);  
					*(texturePointer_b++)= *(gpb++);  
					*(texturePointer_b++)= *(rpb++);  
					*(texturePointer_b++)= *(apb++);  
				}
			}
			
			textureRecord->depth=32;
		}
	} // End of 8 bpc texture conversion code (fast-path for LDR textures)
    
	// Override for missing floating point texture support?
	if ((usefloatformat==1) && !(windowRecord->gfxcaps & kPsychGfxCapFPTex16)) {
		// Override enabled. Instead of a 16bpc float texture with 11 bit linear precision in the
		// range [-1.0 ; 1.0], we use a 16 bit signed normalized texture with a normalized value
		// range of [-1.0; 1.0], encoded with 1 bit sign and 15 bit magnitude. These textures have
		// an effective linear precision of 15 bits - better than 16 bpc float - but they are restricted
		// to a value range of [-1.0 ; 1.0], as opposed to 16 bpc float textures. Tell user about this
		// replacement at high verbosity levels:
		if (PsychPrefStateGet_Verbosity() > 4)
			printf("PTB-INFO:MakeTexture: Code requested 16 bpc float texture, but this is unsupported. Trying to use 16 bit snorm texture instead.\n");
		
		// Signed normalized textures supported? Otherwise we bail...
		if (!(windowRecord->gfxcaps & kPsychGfxCapSNTex16)) {
			printf("PTB-ERROR:MakeTexture: Code requested 16 bpc floating point texture, but this is unsupported by this graphics card.\n");
			printf("PTB-ERROR:MakeTexture: Tried to use 16 bit snorm texture instead, but failed as this is unsupported as well.\n");
			PsychErrorExitMsg(PsychError_user, "Creation of 15 bit linear precision signed normalized texture failed. Not supported by your graphics hardware!");
		}
		
		// Check value range of pixels. This will not work for out of [-1; 1] range values.
		texturePointer_f=(GLfloat*) texturePointer;
		iters = iters * (size_t) numMatrixPlanes;
		for (ix=0; ix<iters; ix++, texturePointer_f++) {
			if(fabs((double) *texturePointer_f) > 1.0) {
				// Game over!
				printf("PTB-ERROR:MakeTexture: Code requested 16 bpc floating point texture, but this is unsupported by this graphics card.\n");
				printf("PTB-ERROR:MakeTexture: Tried to use 16 bit snorm texture instead, but failed because some pixels are outside the\n");
				printf("PTB-ERROR:MakeTexture: representable range -1.0 to 1.0 for this texture type. Change your code or update your graphics hardware.\n");
				PsychErrorExitMsg(PsychError_user, "Creation of 15 bit linear precision signed normalized texture failed due to out of [-1 ; +1] range pixel values!");
			}
		}
	}

	// This is a special workaround for bugs in FLOAT16 texture creation on Mac OS/X 10.4.x and 10.5.x.
	// The OpenGL fails to properly flush very small values (< 1e-9) to zero when creating a FLOAT16
	// type texture. Instead it seems to initialize with trash data, corrupting the texture.
	// Therefore, if FLOAT16 texture creation is requested, we loop over the whole input buffer and
	// set all values with magnitude smaller than 1e-9 to zero. Better safe than sorry...
	if ((usefloatformat==1) && (windowRecord->gfxcaps & kPsychGfxCapFPTex16)) {
		texturePointer_f=(GLfloat*) texturePointer;
		iters = iters * (size_t) numMatrixPlanes;
		for(ix=0; ix<iters; ix++, texturePointer_f++) if(fabs((double) *texturePointer_f) < 1e-9) { *texturePointer_f = 0.0; }
	}

    // The memory buffer now contains our texture data in a format ready to submit to OpenGL.
    
	// Assign parent window and copy its inheritable properties:
	PsychAssignParentWindow(textureRecord, windowRecord);

    // Texture orientation is zero aka transposed aka non-renderswapped.
    textureRecord->textureOrientation = ((assume_texorientation != 2) && (assume_texorientation != 3)) ? 0 : 2;
    
	// This is our best guess about the number of image channels:
	textureRecord->nrchannels = numMatrixPlanes;

	if (planar_storage) {
		// Setup special rect to fake PsychCreateTexture() into creating a luminance
		// texture numMatrixPlanes times the height (in rows) of the texture, to store the
		// numMatrixPlanes layers concatenated to each other.
		if (textureRecord->textureOrientation == 0) {
			// Normal case: Transposed storage.
			PsychMakeRect(&(textureRecord->rect[0]), 0, 0, xSize * numMatrixPlanes, ySize);
		}
		else {
			// Special case: Non-transposed or isotropic storage:
			PsychMakeRect(&(textureRecord->rect[0]), 0, 0, xSize, ySize * numMatrixPlanes);
		}
		
		// Create planar texture:
		PsychCreateTexture(textureRecord);

		// Restore rect and clientrect of texture to effective size:
		PsychMakeRect(&(textureRecord->rect[0]), 0, 0, xSize, ySize);
		PsychCopyRect(textureRecord->clientrect, textureRecord->rect);
		
		textureRecord->specialflags = kPsychPlanarTexture;
	}
	else {
		// Let's create and bind a new texture object and fill it with our new texture data.
		PsychCreateTexture(textureRecord);
		
		// Assign GLSL filter-/lookup-shaders if needed:
		PsychAssignHighPrecisionTextureShaders(textureRecord, windowRecord, usefloatformat, (usepoweroftwo & 2) ? 1 : 0);
	}
	
	// User specified override shader for this texture provided? This is useful for
	// basic image processing and procedural texture shading:
	if (textureShader!=0) {
		// Assign provided shader as filtershader to this texture: We negate it so
		// that the texture blitter routines know this is a custom shader, not our
		// built in filter shader:
		textureRecord->textureFilterShader = -1 * textureShader;
	}
	
    // Texture ready. Mark it valid and return handle to userspace:
    PsychSetWindowRecordValid(textureRecord);
    PsychCopyOutDoubleArg(1, FALSE, textureRecord->windowIndex);
	
	// Swapping the texture to upright orientation requested?
	if (assume_texorientation == 1) {
		// Transform sourceRecord source texture into a normalized, upright texture if it isn't already in
		// that format. We require this standard orientation for simplified shader design.
		PsychSetShader(windowRecord, 0);
		PsychNormalizeTextureOrientation(textureRecord);
	}
	
	// Shall the texture be finally declared "normally oriented"?
	// This is either due to explicit renderswapping if assume_textureorientation == 1,
	// or because it was already pretransposed in Matlab/Octave if assume_textureorientation == 2,
	// or because user space tells us the texture is isotropic if assume_textureorientation == 3.
	if (assume_texorientation > 0) {
		// Yes. Label it as such:
		textureRecord->textureOrientation = 2;
	}
    
    if(PsychPrefStateGet_DebugMakeTexture()) 	//MARK #4
        StoreNowTime();
    
    return(PsychError_none);
}
PsychError SCREENPutImage(void) 
{
	PsychRectType			windowRect, positionRect;
	int						ix, iy;
	size_t					matrixRedIndex, matrixGreenIndex, matrixBlueIndex, matrixAlphaIndex, matrixGrayIndex;
	int						inputM, inputN, inputP, positionRectWidth, positionRectHeight;
	size_t					pixelIndex = 0;
	PsychWindowRecordType	*windowRecord;
	unsigned char			*inputMatrixByte;
	double					*inputMatrixDouble;
	GLfloat					*pixelData;
	GLfloat					matrixGrayValue, matrixRedValue, matrixGreenValue, matrixBlueValue, matrixAlphaValue;
	PsychArgFormatType		inputMatrixType;
	GLfloat					xZoom = 1, yZoom = -1;
        
	// All sub functions should have these two lines.
	PsychPushHelp(useString, synopsisString, seeAlsoString);
	if (PsychIsGiveHelp()) {
		PsychGiveHelp();
		return PsychError_none;
	};

	// Cap the number of inputs.
	PsychErrorExit(PsychCapNumInputArgs(4));   //The maximum number of inputs
	PsychErrorExit(PsychCapNumOutputArgs(0));  //The maximum number of outputs
        
	// Get the image matrix.
	inputMatrixType = PsychGetArgType(2);
	switch (inputMatrixType) {
		case PsychArgType_none:
		case PsychArgType_default:
			PsychErrorExitMsg(PsychError_user, "imageArray argument required");
			break;
		case PsychArgType_uint8:
			PsychAllocInUnsignedByteMatArg(2, TRUE, &inputM, &inputN, &inputP, &inputMatrixByte);
			break;
		case PsychArgType_double:
			PsychAllocInDoubleMatArg(2, TRUE, &inputM, &inputN, &inputP, &inputMatrixDouble);
			break;
		default:
			PsychErrorExitMsg(PsychError_user, "imageArray must be uint8 or double type");
			break;
	}

    if (inputP != 1 && inputP != 3 && inputP != 4) {
        PsychErrorExitMsg(PsychError_user, "Third dimension of image matrix must be 1, 3, or 4");
    }
        
	// Get the window and get the rect and stuff.
	PsychAllocInWindowRecordArg(kPsychUseDefaultArgPosition, TRUE, &windowRecord);

    // A no-go on OES:
    if (PsychIsGLES(windowRecord)) {
        PsychErrorExitMsg(PsychError_unimplemented, "Sorry, Screen('PutImage') is not supported on OpenGL-ES embedded graphics hardware. Use 'MakeTexture' and 'DrawTexture' instead.");
    }

	PsychGetRectFromWindowRecord(windowRect, windowRecord);
	if (PsychCopyInRectArg(3, FALSE, positionRect)) {
		if (IsPsychRectEmpty(positionRect)) {
			return PsychError_none;
		}
		positionRectWidth  = (int) PsychGetWidthFromRect(positionRect);
		positionRectHeight = (int) PsychGetHeightFromRect(positionRect);
		if (positionRectWidth != inputN  || positionRectHeight != inputM) {
			// Calculate the zoom factor.
			xZoom = (GLfloat)   positionRectWidth  / (GLfloat) inputN;
			yZoom = -((GLfloat) positionRectHeight / (GLfloat) inputM);
		}
	}
	else {
	   positionRect[kPsychLeft] = 0;
	   positionRect[kPsychTop] = 0;
	   positionRect[kPsychRight] = inputN;
	   positionRect[kPsychBottom] = inputM;
	   PsychCenterRect(positionRect, windowRect, positionRect);
	}
        
	// Allocate memory to hold the pixel data that we'll later pass to OpenGL.
	pixelData = (GLfloat*) PsychMallocTemp(sizeof(GLfloat) * (size_t) inputN * (size_t) inputM * 4);
	
	// Loop through all rows and columns of the pixel data passed from Matlab, extract it,
	// and stick it into 'pixelData'.
	for (iy = 0; iy < inputM; iy++) {
		for (ix = 0; ix < inputN; ix++) {
			if (inputP == 1) { // Grayscale
							   // Extract the grayscale value.
				matrixGrayIndex = PSYCHINDEXELEMENTFROM3DARRAY((size_t) inputM, (size_t) inputN, 1, (size_t) iy, (size_t) ix, 0);
				if (inputMatrixType == PsychArgType_uint8) {
					// If the color range is > 255, then force it to 255 for 8-bit values.
					matrixGrayValue = (GLfloat)inputMatrixByte[matrixGrayIndex];
					if (windowRecord->colorRange > 255) {
						matrixGrayValue /= (GLfloat)255;
					}
					else {
						matrixGrayValue /= (GLfloat)windowRecord->colorRange;
					}
				}
				else {
					matrixGrayValue = (GLfloat)(inputMatrixDouble[matrixGrayIndex] / windowRecord->colorRange);
				}
				
				// RGB will all be the same for grayscale.  We'll go ahead and fix alpha to the max value.
				pixelData[pixelIndex++] = matrixGrayValue; // R
				pixelData[pixelIndex++] = matrixGrayValue; // G
				pixelData[pixelIndex++] = matrixGrayValue; // B
				pixelData[pixelIndex++] = (GLfloat) 1.0; // A
			}
			else if (inputP == 3) { // RGB
				matrixRedIndex = PSYCHINDEXELEMENTFROM3DARRAY((size_t) inputM, (size_t) inputN, 3, (size_t) iy, (size_t) ix, 0);
				matrixGreenIndex = PSYCHINDEXELEMENTFROM3DARRAY((size_t) inputM, (size_t) inputN, 3, (size_t) iy, (size_t) ix, 1);
				matrixBlueIndex = PSYCHINDEXELEMENTFROM3DARRAY((size_t) inputM, (size_t) inputN, 3, (size_t) iy, (size_t) ix, 2);
				
				if (inputMatrixType == PsychArgType_uint8) {
					// If the color range is > 255, then force it to 255 for 8-bit values.
					matrixRedValue = (GLfloat)inputMatrixByte[matrixRedIndex];
					matrixGreenValue = (GLfloat)inputMatrixByte[matrixGreenIndex];
					matrixBlueValue = (GLfloat)inputMatrixByte[matrixBlueIndex];
					if (windowRecord->colorRange > 255) {
						matrixRedValue /= (GLfloat)255;
						matrixGreenValue /= (GLfloat)255;
						matrixBlueValue /= (GLfloat)255;
					}
					else {
						matrixRedValue /= (GLfloat)windowRecord->colorRange;
						matrixGreenValue /= (GLfloat)windowRecord->colorRange;
						matrixBlueValue /= (GLfloat)windowRecord->colorRange;
					}
				}
				else {
					matrixRedValue = (GLfloat)(inputMatrixDouble[matrixRedIndex] / windowRecord->colorRange);
					matrixGreenValue = (GLfloat)(inputMatrixDouble[matrixGreenIndex] / windowRecord->colorRange);
					matrixBlueValue = (GLfloat)(inputMatrixDouble[matrixBlueIndex] / windowRecord->colorRange);
				}
				
				pixelData[pixelIndex++] = matrixRedValue;
				pixelData[pixelIndex++] = matrixGreenValue;
				pixelData[pixelIndex++] = matrixBlueValue;
				pixelData[pixelIndex++] = (GLfloat)1.0;
			}
			else if (inputP == 4) { // RGBA
				matrixRedIndex = PSYCHINDEXELEMENTFROM3DARRAY((size_t) inputM, (size_t) inputN, 4, (size_t) iy, (size_t) ix, 0);
				matrixGreenIndex = PSYCHINDEXELEMENTFROM3DARRAY((size_t) inputM, (size_t) inputN, 4, (size_t) iy, (size_t) ix, 1);
				matrixBlueIndex = PSYCHINDEXELEMENTFROM3DARRAY((size_t) inputM, (size_t) inputN, 4, (size_t) iy, (size_t) ix, 2);
				matrixAlphaIndex = PSYCHINDEXELEMENTFROM3DARRAY((size_t) inputM, (size_t) inputN, 4, (size_t) iy, (size_t) ix, 3);
				
				if (inputMatrixType == PsychArgType_uint8) {
					// If the color range is > 255, then force it to 255 for 8-bit values.
					matrixRedValue = (GLfloat)inputMatrixByte[matrixRedIndex];
					matrixGreenValue = (GLfloat)inputMatrixByte[matrixGreenIndex];
					matrixBlueValue = (GLfloat)inputMatrixByte[matrixBlueIndex];
					matrixAlphaValue = (GLfloat)inputMatrixByte[matrixAlphaIndex];
					if (windowRecord->colorRange > 255) {
						matrixRedValue /= (GLfloat)255;
						matrixGreenValue /= (GLfloat)255;
						matrixBlueValue /= (GLfloat)255;
						matrixAlphaValue /= (GLfloat)255;
					}
					else {
						matrixRedValue /= (GLfloat)windowRecord->colorRange;
						matrixGreenValue /= (GLfloat)windowRecord->colorRange;
						matrixBlueValue /= (GLfloat)windowRecord->colorRange;
						matrixAlphaValue /= (GLfloat)windowRecord->colorRange;
					}
				}
				else {
					matrixRedValue = (GLfloat)(inputMatrixDouble[matrixRedIndex] / windowRecord->colorRange);
					matrixGreenValue = (GLfloat)(inputMatrixDouble[matrixGreenIndex] / (GLfloat)windowRecord->colorRange);
					matrixBlueValue = (GLfloat)(inputMatrixDouble[matrixBlueIndex] / (GLfloat)windowRecord->colorRange);
					matrixAlphaValue = (GLfloat)(inputMatrixDouble[matrixAlphaIndex] / (GLfloat)windowRecord->colorRange);
				}
				
				pixelData[pixelIndex++] = matrixRedValue;
				pixelData[pixelIndex++] = matrixGreenValue;
				pixelData[pixelIndex++] = matrixBlueValue;
				pixelData[pixelIndex++] = matrixAlphaValue;
			}
		} // for (iy = 0; iy < inputM; iy++)
	} // for (ix = 0; ix < inputN; ix++)
	
	// Enable this windowRecords framebuffer as current drawingtarget:
	PsychSetDrawingTarget(windowRecord);
	
	// Disable draw shader:
	PsychSetShader(windowRecord, 0);
	
	PsychUpdateAlphaBlendingFactorLazily(windowRecord);
	
	// Set the raster position so that we can draw starting at this location.
	glRasterPos2f((GLfloat)(positionRect[kPsychLeft]), (GLfloat)(positionRect[kPsychTop]));
	
	// Tell glDrawPixels to unpack the pixel array along GLfloat boundaries.
	glPixelStorei(GL_UNPACK_ALIGNMENT, (GLint)sizeof(GLfloat));
	
	// Dump the pixels onto the screen.
	glPixelZoom(xZoom, yZoom);
	glDrawPixels(inputN, inputM, GL_RGBA, GL_FLOAT, pixelData);
	glPixelZoom(1,1);
	
	PsychFlushGL(windowRecord);  // This does nothing if we are multi buffered, otherwise it glFlushes
	PsychTestForGLErrors();
	
	return PsychError_none;
}
PsychError SCREENMakeTexture(void) 
{
    int									ix;
    PsychWindowRecordType				*textureRecord;
    PsychWindowRecordType			*windowRecord;
    PsychRectType						rect;
    Boolean								isImageMatrixBytes, isImageMatrixDoubles;
    int									numMatrixPlanes, xSize, ySize; 
    unsigned char						*byteMatrix;
    double								*doubleMatrix;
    GLuint                              *texturePointer;
    GLubyte                             *texturePointer_b;
    
    
    if(PsychPrefStateGet_DebugMakeTexture())	//MARK #1
        StoreNowTime();
    
    //all subfunctions should have these two lines.  
    PsychPushHelp(useString, synopsisString, seeAlsoString);
    if(PsychIsGiveHelp()){PsychGiveHelp();return(PsychError_none);};
    
    //Get the window structure for the onscreen window.  It holds the onscreein GL context which we will need in the
    //final step when we copy the texture from system RAM onto the screen.
    PsychErrorExit(PsychCapNumInputArgs(2));   	
    PsychErrorExit(PsychRequireNumInputArgs(2)); 	
    PsychErrorExit(PsychCapNumOutputArgs(1)); 
    
    //get the window record from the window record argument and get info from the window record
    PsychAllocInWindowRecordArg(kPsychUseDefaultArgPosition, TRUE, &windowRecord);
    
    if((windowRecord->windowType!=kPsychDoubleBufferOnscreen) && (windowRecord->windowType!=kPsychSingleBufferOnscreen))
        PsychErrorExitMsg(PsychError_user, "MakeTexture called on something else than a onscreen window");
    
    //get the argument and sanity check it.
    isImageMatrixBytes=PsychAllocInUnsignedByteMatArg(2, kPsychArgAnything, &ySize, &xSize, &numMatrixPlanes, &byteMatrix);
    isImageMatrixDoubles=PsychAllocInDoubleMatArg(2, kPsychArgAnything, &ySize, &xSize, &numMatrixPlanes, &doubleMatrix);
    if(numMatrixPlanes > 4)
        PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Specified image matrix exceeds maximum depth of 4 layers");
    if(ySize<1 || xSize <1)
        PsychErrorExitMsg(PsychError_inputMatrixIllegalDimensionSize, "Specified image matrix must be at least 1 x 1 pixels in size");
    if(! (isImageMatrixBytes || isImageMatrixDoubles))
        PsychErrorExitMsg(PsychError_user, "Illegal argument type");  //not  likely. 
    PsychMakeRect(rect, 0, 0, xSize, ySize);
    
    
    //Create a texture record.  Really just a window recored adapted for textures.  
    PsychCreateWindowRecord(&textureRecord);						//this also fills the window index field.
    textureRecord->windowType=kPsychTexture;
    // MK: We need to assign the screen number of the onscreen-window, so PsychCreateTexture()
    // can query the size of the screen/onscreen-window...
    textureRecord->screenNumber=windowRecord->screenNumber;
    textureRecord->depth=32;
    PsychCopyRect(textureRecord->rect, rect);
    
    //Allocate the texture memory and copy the MATLAB matrix into the texture memory.
    // MK: We only allocate the amount really needed for given format, aka numMatrixPlanes - Bytes per pixel.
    textureRecord->textureMemorySizeBytes= numMatrixPlanes * xSize * ySize;
    // MK: Allocate memory page-aligned... -> Helps Apple texture range extensions et al.
    if(PsychPrefStateGet_DebugMakeTexture()) 	//MARK #2
        StoreNowTime();
    textureRecord->textureMemory=valloc(textureRecord->textureMemorySizeBytes);
    if(PsychPrefStateGet_DebugMakeTexture()) 	//MARK #3
        StoreNowTime();	
    texturePointer=textureRecord->textureMemory;
    
    // Original implementation: Takes 80 ms on a 800x800 texture...
    /*        if(isImageMatrixDoubles && numMatrixPlanes==1){
        for(ix=0;ix<xSize;ix++){
            for(iy=0;iy<ySize;iy++){
                textureIndex=xSize*iy+ix;
                matrixIndex=iy + ySize * ix;
                textureRecord->textureMemory[textureIndex]= ((((((GLuint)255 << 8) | 
                                                                (GLuint)(doubleMatrix[matrixIndex])) << 8 ) | 
                                                              (GLuint)(doubleMatrix[matrixIndex]) ) << 8) | 
                    (GLuint)(doubleMatrix[matrixIndex]);
            }
        }
    }
*/	
    
    // Improved implementation: Takes 13 ms on a 800x800 texture...
    if(isImageMatrixDoubles && numMatrixPlanes==1){
        texturePointer_b=(GLubyte*) texturePointer;
        int iters=xSize*ySize;
        for(ix=0;ix<iters;ix++){
            *(texturePointer_b++)= (GLubyte) *(doubleMatrix++);  
        }
        textureRecord->depth=8;
    }
    
    
    // Original implementation: Takes 30 ms on a 800x800 texture...
    /*        if(isImageMatrixBytes && numMatrixPlanes==1){
        for(ix=0;ix<xSize;ix++){
            for(iy=0;iy<ySize;iy++){
                textureIndex=xSize*iy+ix;
                matrixIndex=iy + ySize * ix;
                textureRecord->textureMemory[textureIndex]= ((((((GLuint)255 << 8) | 
                                                                (GLuint)(byteMatrix[matrixIndex])) << 8 ) | 
                                                              (GLuint)(byteMatrix[matrixIndex]) ) << 8) | 
                    (GLuint)(byteMatrix[matrixIndex]);
            }
        }
    
    }
*/	
    // Improved version: Takes 3 ms on a 800x800 texture...
    // NB: Implementing memcpy manually by a for-loop takes 10 ms! This is a huge difference.
    // -> That's because memcpy on MacOS-X is implemented with hand-coded, highly tuned Assembler code for PowerPC.
    // -> It's always wise to use system-routines if available, instead of coding it by yourself!
    if(isImageMatrixBytes && numMatrixPlanes==1){
        memcpy((void*) texturePointer, (void*) byteMatrix, xSize*ySize);
        textureRecord->depth=8;
    }
    
    // New version: Takes 33 ms on a 800x800 texture...
    if(isImageMatrixDoubles && numMatrixPlanes==2){
        texturePointer_b=(GLubyte*) texturePointer;
        int iters=xSize*ySize;
        double *rp, *ap;
        rp=(double*) ((unsigned long long) doubleMatrix);
        ap=(double*) ((unsigned long long) doubleMatrix + (unsigned long long) iters*sizeof(double));
        for(ix=0;ix<iters;ix++){
            *(texturePointer_b++)= (GLubyte) *(rp++);  
            *(texturePointer_b++)= (GLubyte) *(ap++);  
        }
        textureRecord->depth=16;
    }
    
    // New version: Takes 20 ms on a 800x800 texture...
    if(isImageMatrixBytes && numMatrixPlanes==2){
        texturePointer_b=(GLubyte*) texturePointer;
        int iters=xSize*ySize;
        GLubyte *rp, *ap;
        rp=(GLubyte*) ((unsigned long long) byteMatrix);
        ap=(GLubyte*) ((unsigned long long) byteMatrix + (unsigned long long) iters);
        for(ix=0;ix<iters;ix++){
            *(texturePointer_b++)= *(rp++);  
            *(texturePointer_b++)= *(ap++);  
        }
        textureRecord->depth=16;
    }
    
    // Original version: Takes 160 ms on a 800x800 texture...
    /*	if(isImageMatrixDoubles && numMatrixPlanes==3){
        for(ix=0;ix<xSize;ix++){
            for(iy=0;iy<ySize;iy++){
                textureIndex=xSize*iy+ix;
                redIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 0);
                greenIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 1);
                blueIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 2);
                red=(GLuint)doubleMatrix[redIndex];
                green=(GLuint)doubleMatrix[greenIndex];
                blue=(GLuint)doubleMatrix[blueIndex];
                alpha=(GLuint)255;
                textureRecord->textureMemory[textureIndex]= (((((alpha << 8) | red) << 8 ) | green ) << 8) | blue;
            }
        }
    textureRecord->depth=24;
    }
*/	
    // Improved version: Takes 43 ms on a 800x800 texture...
    if(isImageMatrixDoubles && numMatrixPlanes==3){
        texturePointer_b=(GLubyte*) texturePointer;
        int iters=xSize*ySize;
        double *rp, *gp, *bp;
        rp=(double*) ((unsigned long long) doubleMatrix);
        gp=(double*) ((unsigned long long) doubleMatrix + (unsigned long long) iters*sizeof(double));
        bp=(double*) ((unsigned long long) gp + (unsigned long long) iters*sizeof(double));
        for(ix=0;ix<iters;ix++){
            *(texturePointer_b++)= (GLubyte) *(rp++);  
            *(texturePointer_b++)= (GLubyte) *(gp++);  
            *(texturePointer_b++)= (GLubyte) *(bp++);  
        }
        textureRecord->depth=24;
    }
    
    /*        // Original version: Takes 94 ms on a 800x800 texture...
        if(isImageMatrixBytes && numMatrixPlanes==3){
            for(ix=0;ix<xSize;ix++){
                for(iy=0;iy<ySize;iy++){
                    textureIndex=xSize*iy+ix;
                    redIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 0);
                    greenIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 1);
                    blueIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 2);
                    red=(GLuint)byteMatrix[redIndex];
                    green=(GLuint)byteMatrix[greenIndex];
                    blue=(GLuint)byteMatrix[blueIndex];
                    alpha=(GLuint)255;
                    textureRecord->textureMemory[textureIndex]= (((((alpha << 8) | red) << 8 ) | green ) << 8) | blue;
                }
            }
	}
    */	
    // Improved version: Takes 25 ms on a 800x800 texture...
    if(isImageMatrixBytes && numMatrixPlanes==3){
        texturePointer_b=(GLubyte*) texturePointer;
        int iters=xSize*ySize;
        GLubyte *rp, *gp, *bp;
        rp=(GLubyte*) ((unsigned long long) byteMatrix);
        gp=(GLubyte*) ((unsigned long long) byteMatrix + (unsigned long long) iters);
        bp=(GLubyte*) ((unsigned long long) gp + (unsigned long long) iters);
        for(ix=0;ix<iters;ix++){
            *(texturePointer_b++)= *(rp++);  
            *(texturePointer_b++)= *(gp++);  
            *(texturePointer_b++)= *(bp++);  
        }
        textureRecord->depth=24;
    }
    
    // Original version: 190 ms on a 800x800 texture...
    /*        if(isImageMatrixDoubles && numMatrixPlanes==4){
        for(ix=0;ix<xSize;ix++){
            for(iy=0;iy<ySize;iy++){
                textureIndex=xSize*iy+ix;
                redIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 0);
                greenIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 1);
                blueIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 2);
                alphaIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 3);
                red=(GLuint)doubleMatrix[redIndex];
                green=(GLuint)doubleMatrix[greenIndex];
                blue=(GLuint)doubleMatrix[blueIndex];
                alpha=(GLuint)doubleMatrix[alphaIndex];
                textureRecord->textureMemory[textureIndex]= (((((alpha << 8) | red) << 8 ) | green ) << 8) | blue;
            }
        }
    }
*/
    
    // Improved version: Takes 55 ms on a 800x800 texture...
    if(isImageMatrixDoubles && numMatrixPlanes==4){
        texturePointer_b=(GLubyte*) texturePointer;
        int iters=xSize*ySize;
        double *rp, *gp, *bp, *ap;
        rp=(double*) ((unsigned long long) doubleMatrix);
        gp=(double*) ((unsigned long long) doubleMatrix + (unsigned long long) iters*sizeof(double));
        bp=(double*) ((unsigned long long) gp + (unsigned long long) iters*sizeof(double));
        ap=(double*) ((unsigned long long) bp + (unsigned long long) iters*sizeof(double));
        for(ix=0;ix<iters;ix++){
            *(texturePointer_b++)= (GLubyte) *(ap++);  
            *(texturePointer_b++)= (GLubyte) *(rp++);  
            *(texturePointer_b++)= (GLubyte) *(gp++);  
            *(texturePointer_b++)= (GLubyte) *(bp++);  
        }
        textureRecord->depth=32;
    }
    
    // Original version: Takes 125 ms on a 800x800 texture...
    /*        if(isImageMatrixBytes && numMatrixPlanes==4){
        for(ix=0;ix<xSize;ix++){
            for(iy=0;iy<ySize;iy++){
                textureIndex=xSize*iy+ix;
                redIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 0);
                greenIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 1);
                blueIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 2);
                alphaIndex=PsychIndexElementFrom3DArray(ySize, xSize, 3, iy, ix, 3);
                red=(GLuint)byteMatrix[redIndex];
                green=(GLuint)byteMatrix[greenIndex];
                blue=(GLuint)byteMatrix[blueIndex];
                alpha=(GLuint)byteMatrix[alphaIndex];
                textureRecord->textureMemory[textureIndex]= (((((alpha << 8) | red) << 8 ) | green ) << 8) | blue;
            }
        }
    }
*/
    
    // Improved version: Takes 33 ms on a 800x800 texture...
    if(isImageMatrixBytes && numMatrixPlanes==4){
        texturePointer_b=(GLubyte*) texturePointer;
        int iters=xSize*ySize;
        GLubyte *rp, *gp, *bp, *ap;
        rp=(GLubyte*) ((unsigned long long) byteMatrix);
        gp=(GLubyte*) ((unsigned long long) byteMatrix + (unsigned long long) iters);
        bp=(GLubyte*) ((unsigned long long) gp + (unsigned long long) iters);
        ap=(GLubyte*) ((unsigned long long) bp + (unsigned long long) iters);
        for(ix=0;ix<iters;ix++){
            *(texturePointer_b++)= *(ap++);  
            *(texturePointer_b++)= *(rp++);  
            *(texturePointer_b++)= *(gp++);  
            *(texturePointer_b++)= *(bp++);  
        }
        textureRecord->depth=32;
    }
    
    
    // The memory buffer now contains our texture data in a format ready to submit to OpenGL.
    
    // Assign proper OpenGL-Renderingcontext to texture:
    // MK: Is this the proper way to do it???
    textureRecord->targetSpecific.contextObject = windowRecord->targetSpecific.contextObject;
    
    // Let's create and bind a new texture object and fill it with our new texture data.
    PsychCreateTexture(textureRecord);
    
    // Texture ready. Mark it valid and return handle to userspace:
    PsychSetWindowRecordValid(textureRecord);
    PsychCopyOutDoubleArg(1, FALSE, textureRecord->windowIndex);
    
    if(PsychPrefStateGet_DebugMakeTexture()) 	//MARK #4
        StoreNowTime();
    
    return(PsychError_none);
}