示例#1
0
/*{
** Name:	IIME_adAddTag	- Add a node of allocated memory to a tag.
**
** Description:
**	This routine is called when a new block of dynamic memory is being
**	allocated under a tag.  It is called by MEdoAlloc.  The job
**	of this routine is to store the allocated memory so that it
**	will be freed with the other blocks allocated under this tag when
**	MEtfree is called.
**
**	It works by checking the hash table for an METAGNODE for this tag.
**	If none is found, a new METAGNODE is allocated for this tag.
**	Then the block of memory is put on the QUEUE for the METAGNODE.
**
** Inputs:
**	tag		The tag under which this block of memory is
**			being allocated.
**
**	node		The block of memory being allocated.
**
** Side Effects:
**	This will take a node off freelist, and if necessary will allocate
**	dynamic memory.
**
** History:
**	5-dec-1989 (Joe)
**	    First Written
*/
VOID
IIME_atAddTag(
	i4	tag,
	ME_NODE	*node)
{
    register METAGNODE	**first;

# ifdef OS_THREADS_USED
    CS_synch_lock( &MEtaglist_mutex );
# endif /* OS_THREADS_USED */

    /*
    ** Note that first is a pointer to a pointer.
    ** The loop will cause a return from the routine if the tag already
    ** has an METAGNODE in the hash table.
    ** If the loop finishes, then first will point to the pointer
    ** that must contain the METAGNODE.
    */
    for (first = &(htab[tag%256]);
	 *first != NULL;
	 first = &((*first)->met_hash))
    {
	if ((*first)->met_tag == tag)
	{
	    (void)QUinsert((QUEUE *) node, (QUEUE *) (*first)->met_list.MElast);
# ifdef OS_THREADS_USED
	    CS_synch_unlock( &MEtaglist_mutex );
# endif /* OS_THREADS_USED */
	    return;
	}
    }
    if (freelist == NULL)
    {
	register METAGNODE	*next;
	register int		i;

	freelist = (METAGNODE *)
			MEreqmem(0, sizeof(METAGNODE)*50, TRUE, NULL);
	for (i = 0, next = freelist; i < 49; i++)
	{
	    next->met_hash = next + 1;
	    next = next->met_hash;
	}
	next->met_hash = NULL;
    }
    *first = freelist;
    freelist = freelist->met_hash;
    (*first)->met_hash = NULL;
    (*first)->met_tag = tag;
    QUinit((QUEUE *)&((*first)->met_list));
    (void)QUinsert((QUEUE *) node, (QUEUE *) (*first)->met_list.MElast);
# ifdef OS_THREADS_USED
    CS_synch_unlock( &MEtaglist_mutex );
# endif /* OS_THREADS_USED */
    return;
}
示例#2
0
/*
**
**  Name: PCisthread_alive - is a thread alive?
**
**  Description:
**	This function grabs a thread id's handle off of the PID queue and
**	tests to see if it is alive. If it cannot be found, then it
**	executes PCis_alive().
**
** History:
**      10-nov-1999 (somsa01)
**	    Created.
*/
bool
PCisthread_alive(PID pid)
{
    PIDQUE	*qp;
    bool	pid_found = FALSE;
    HANDLE	hPid;

    /* No queue means nothing has been started */
    if (!Pidq_init)
	return(PCis_alive(pid));

    /*
    ** Find the thread id in the PID queue.
    */
    CS_synch_lock(&Pidq_mutex);
    for (qp = (PIDQUE *)Pidq.q_next;
	 qp != (PIDQUE *)&Pidq;
	 qp = (PIDQUE *)qp->pidq.q_next)
    {
	if ((qp->pid == pid) && qp->hPid)
	{
	    pid_found = TRUE;
	    hPid = qp->hPid;
	    break;
	}
    }
    CS_synch_unlock(&Pidq_mutex);

    if (!pid_found)
	return(PCis_alive(pid));
    else
    {
	DWORD	status;

	/*
	** Let's see if this thread is still alive.
	*/
	GetExitCodeThread(hPid, &status);
	if (status == STILL_ACTIVE)
	    return(TRUE);
	else
	{
	    /*
	    ** Set the thread's exit status in the queue.
	    */
	    CS_synch_lock(&Pidq_mutex);
	    qp->stat = status;
	    qp->hPid = NULL;
	    CS_synch_unlock(&Pidq_mutex);
	    CloseHandle(hPid);

	    return(FALSE);
	}
    }
}
示例#3
0
/*{
** Name:	CS_scb_attach	- make an SCB known to MO/IMA
**
** Description:
**	Links the specified SCB into the known thread tree.  Logs
**	error to server log if it's already present (it shouldn't).
**
** Re-entrancy:
**	no.  Called with inkernel set, presumabely.
**	(OS-threads: yes, locks global tree mutex)
**
** Inputs:
**	scb		the thread to link in.
**
** Outputs:
**	scb		cs_spblk is updated.
**
** Returns:
**	none.	
**
** Side Effects:
**	May TRdisplay debug information.
**
** History:
**	26-Oct-1992 (daveb)
**	    documented.
**	06-oct-1993 (tad)
**	    Bug #56449
**	    Changed %x to %p for pointer values.
**	13-Feb-98 (fanra01)
**	    Modified to use the SID as the key.
*/
void
CS_scb_attach( CS_SCB *scb )
{
    SPBLK node;
    SPBLK *sp;

# ifdef OS_THREADS_USED
    CS_synch_lock( &Cs_srv_block.cs_scb_mutex );
# endif /* OS_THREADS_USED */
    node.key = (PTR) scb->cs_self;
    sp = SPlookup( &node, Cs_srv_block.cs_scb_ptree );
    if( sp != NULL )
    {
	TRdisplay("CS_scb_attach: attempt to attach existing SCB %p!!!\n",
		  scb );
    }
    else
    {
        scb->cs_spblk.uplink = NULL;
	scb->cs_spblk.leftlink = NULL;
	scb->cs_spblk.rightlink = NULL;
	scb->cs_spblk.key = (PTR) scb->cs_self;
	SPinstall( &scb->cs_spblk, Cs_srv_block.cs_scb_ptree );
    }
# ifdef OS_THREADS_USED
    CS_synch_unlock( &Cs_srv_block.cs_scb_mutex );
# endif /* OS_THREADS_USED */
}
示例#4
0
STATUS
TMhrnow(HRSYSTIME *stime)
{
#if defined(sqs_ptx)
	struct timespec cur_syst;
#else
	SYSTIME cur_syst;
#endif /* sqs_ptx */

#ifdef TMHRNOW_WRAPPED_CLOCKGETTIME
	return clock_gettime( CLOCK_REALTIME, stime );
#endif

#ifndef WRAPPED

	if ( !initialized )
	{
		initialized = TRUE;
#ifdef OS_THREADS_USED
		CS_synch_init(&nanomutex);
#endif /* OS_THREADS_USED */
	}

#ifdef sqs_ptx
	getclock(TIMEOFDAY, &cur_syst);
	stime->tv_sec = cur_syst.tv_sec;
	stime->tv_nsec = cur_syst.tv_nsec;
#else
    	TMet(&cur_syst);
	stime->tv_sec = cur_syst.TM_secs;
	stime->tv_nsec = cur_syst.TM_msecs * NANO_PER_MILLI;
#endif /* sqs_ptx */

	/*
	** if we have been called twice within the same 
	** interval, increment the time by one nanosecond.
	*/
#ifdef OS_THREADS_USED
	CS_synch_lock(&nanomutex);
#endif /* OS_THREADS_USED */
	if ( stime->tv_sec == lasttime.tv_sec &&
	     stime->tv_nsec <= lasttime.tv_nsec )
	{
		stime->tv_nsec = lasttime.tv_nsec + 1;
	}
	lasttime.tv_sec = stime->tv_sec;
	lasttime.tv_nsec = stime->tv_nsec;
#ifdef OS_THREADS_USED
	CS_synch_unlock(&nanomutex);
#endif /* OS_THREADS_USED */
 
	return OK;
#endif /* WRAPPED */

}
示例#5
0
STATUS
CS_scb_index(i4 msg,
	     PTR cdata,
	     i4  linstance,
	     char *instance, 
	     PTR *instdata )
{
    STATUS stat = OK;
    PTR ptr;

# ifdef OS_THREADS_USED
    CS_synch_lock( &Cs_srv_block.cs_scb_mutex );
# endif /* OS_THREADS_USED */
    switch( msg )
    {
    case MO_GET:
	if( OK == (stat = CS_get_block( instance,
				       Cs_srv_block.cs_scb_ptree,
				       &ptr ) ) )
	    *instdata = (PTR) CS_find_scb((CS_SID) ptr);
	break;

    case MO_GETNEXT:
	if( OK == ( stat = CS_nxt_block( instance,
					Cs_srv_block.cs_scb_ptree,
					&ptr ) ) )
	{
	    *instdata = (PTR) CS_find_scb((CS_SID) ptr);
	    stat = MOptrout( MO_INSTANCE_TRUNCATED,
			      ptr,
			      linstance,
			      instance );
	}
	break;

    default:
	stat = MO_BAD_MSG;
	break;
    }
# ifdef OS_THREADS_USED
    CS_synch_unlock( &Cs_srv_block.cs_scb_mutex );
# endif /* OS_THREADS_USED */
    return( stat );
}
示例#6
0
/******************************************************************************
**
** Name: DIalloc - Allocates a page to a direct access file.
**
** Description:
**      The DIalloc routine is used to add pages to a direct
**      access file.  This routine can add more than one page
**      at a time by accepting a count of the number of pages to add.
**
**      The end of file and allocated are not updated on disk until a DIflush
**      call is issued.  This insures that pages are not considered valid
**      until after they are formatted.  The allocation can be ignored if
**      the file is closed or the system crashes before the DIflush.
**
** Inputs:
**      f                Pointer to the DI file
**                       context needed to do I/O.
**      n                The number of pages to allocate.
**
** Outputs:
**      page             Pointer to variable used to
**                       return the page number of the
**                       first page allocated.
**      err_code         Pointer to a variable used
**                       to return operating system
**                       errors.
**    Returns:
**        OK
**        DI_BADEXTEND      Can't allocate disk space
**        DI_BADFILE        Bad file context.
**        DI_EXCEED_LIMIT   Too many open files.
**    Exceptions:
**        none
**
** Side Effects:
**        none
** History:
**	09-feb-1996 (canor01)
**	    Get exclusive semaphore on DI_IO before updating it in DI_sense
**	08-dec-1997 (canor01)
**	    Implement LRU for open files (initial copy from Unix).
**	28-jan-1998 (canor01)
**	    Optimize LRU--only call DIlru_open if file has been closed.
**      06-aug-1999 (mcgem01)
**          Replace nat and longnat with i4.
**	13-Nov-2009 (kschendel) SIR 122757
**	    Make io-sem a SYNCH.
**
******************************************************************************/
STATUS
DIalloc(DI_IO      *f,
        i4         n,
        i4         *page,
        CL_SYS_ERR *err_code)
{
    STATUS status = OK;

    CLEAR_ERR(err_code);

    /*
     * Check file control block pointer, return if bad.
     */

    if (f->io_type != DI_IO_ASCII_ID)
    	return (DI_BADFILE);

    CS_synch_lock( &f->io_sem );

    /* get file descriptor for this file */
    do
    {
	if ( f->io_nt_fh == INVALID_HANDLE_VALUE )
        status = DIlru_open( f, FALSE, err_code);
        if ( status != OK )
            break;
 
        status = DI_sense( f, page, err_code );
 
	if ( status != OK )
	    break;

	*page = (i4) (f->io_system_eof + 1);
	f->io_system_eof += n;
 
    } while (FALSE);
 
    CS_synch_unlock( &f->io_sem );
 
    return( status );

}
示例#7
0
/******************************************************************************
** Name:
** 	MEfree.c
**
** Function:
** 	MEfree
**
** Arguments:
** 	void *	block;
**
** Result:
** 	Frees the block of memory pointed to by 'block'.
**
** 	Removes the block from the tag queue if appropriate.
**
** Returns:
**	STATUS: OK, ME_00_FREE, ME_FREE_FIRST
**
** Side Effects:
** 	None
**
** History:
**	21-mar-1996 (canor01)
**	    Free memory from calling process's heap.  Compact the
**	    heap after every several frees.
**	03-jun-1996 (canor01)
**	    Internally, store the tag as an i4 instead of an i2. This makes
**	    for more efficient code on byte-aligned platforms that do fixups.
**	08-aug-1999 (mcgem01)
**	    Changed longnat to i4.
**	08-feb-2001 (somsa01)
**	    Changed types of i_meactual and i_meuser  to be SIZE_TYPE.
**	21-jun-2002 (somsa01)
**	    Sync'ed up with UNIX. Rely on ME_NODE rather than a ptr UNION.
**	    Also, removed call to HeapCompact() logic.
**	05-Jul-2005 (drivi01)
**		Replaced HeapFree call with free.
**	11-May-2009 (kschendel) b122041
**	    Change pointer arg to void *, more appropriate.
** 	23-Sep-2009 (wonst02) Bug 122427
** 	    Fix possibly trashing memory (alloc'd by tag) by using taglist mutex
**
******************************************************************************/
STATUS
MEfree(void *block)
{
    register ME_NODE	*this;
    STATUS		rv = OK;

    if ( block == NULL )
	rv = ME_00_FREE;
    else 
    {
	this = (ME_NODE *)((char *)block - sizeof(ME_NODE));

	/*
	** assume block is legit if the node looks like it points to an
	** allocated block.
	*/
	if (this->MEaskedfor == 0)
	    rv = ME_NO_FREE;

	if (rv == OK)
	{
	    i_meactual -= this->MEsize;
	    i_meuser -= this->MEaskedfor;

	    if (this->MEtag)
	    {
	    	CS_synch_lock(&MEtaglist_mutex);	  
		QUremove((QUEUE *)this);
		CS_synch_unlock(&MEtaglist_mutex);
	    }

	    free((char *)this);
	}
    }

    return(rv);
}
示例#8
0
void
CS_detach_scb( CS_SCB *scb )
{
    SPBLK node;
    SPBLK *sp;

# ifdef OS_THREADS_USED
    CS_synch_lock( &Cs_srv_block.cs_scb_mutex );
# endif /* OS_THREADS_USED */
    node.key = (PTR) scb->cs_self;
    sp = SPlookup( &node, Cs_srv_block.cs_scb_ptree );
    if( sp == NULL )
    {
	TRdisplay("CS_detach_scb: attempt to detach unknown SCB %p\n",
		  scb );
    }
    else
    {
	SPdelete( &scb->cs_spblk, Cs_srv_block.cs_scb_ptree );
    }
# ifdef OS_THREADS_USED
    CS_synch_unlock( &Cs_srv_block.cs_scb_mutex );
# endif /* OS_THREADS_USED */
}
示例#9
0
PTR
MEreqmem(
	u_i2	tag,
	SIZE_TYPE size,
	bool	zero,
	STATUS	*status)
{
    PTR	block=NULL;
    register ME_NODE *node;		/* the node to return */
    register ME_NODE *start;		/* for searching free list */
    register ME_NODE *this;		/* block to get node from */
    register ME_NODE *frag;		/* fragment block */
    
    ME_NODE 	*tmp;			/* not register for MEadd */
    SIZE_TYPE	nsize;			/* nsize node to obtain */
    SIZE_TYPE	fsize;			/* size of 'this' fragment block */
    SIZE_TYPE	newstuff;		/* size to add to process, or  0 */
    SIZE_TYPE	prev_actual;		/* rescan free list? */
    SIZE_TYPE	alloc_pages;
    CL_ERR_DESC	err_code;

    STATUS	MEstatus = OK;
    
    i_meuser += size;
    
    if (!size)
	MEstatus = ME_NO_ALLOC;
    
    if( !MEsetup )
        MEinitLists();
    
    /*
    **	Try to do the allocation.
    */
    if( MEstatus == OK )
    {
	nsize = SIZE_ROUND( size );
	/*
	** Get memory with malloc().
	*/
	if( MEadvice == ME_USER_ALLOC )
	{
	    if( (node = (ME_NODE *)malloc( nsize )) == NULL )
	    	MEstatus = ME_GONE;
	}
	/*
	** Get block from private free list.
	*/
	else
	{
# ifdef OS_THREADS_USED
	    CS_synch_lock( &MEfreelist_mutex );
# endif /* OS_THREADS_USED */

	    /*
	    **  Look on free list for 1st block big enough
	    **  to hold request.  This linear search can be slow.
	    */
	    start = (ME_NODE *)&MEfreelist;
	    this = MEfreelist.MEfirst;
	    while ( this != NULL && this != start && this->MEsize < nsize )
		this = this->MEnext;
	    
	    if( this == NULL )
	        MEstatus = ME_CORRUPTED;
	    
	    /*
	    ** At this point, we are in one of three states:
	    ** 1)  Corrupted memory; MEstatus != OK
	    ** 2)  this is good node, this != start
	    ** 3)  No good node; this == start;
	    */
	    if ( MEstatus == OK )
	    {
		/*
		** If nothing on free list is big enough
		** get one or more standard blocks from system,
		** take what is needed and add remainder
		** to free list.
		*/
		if (this != start)
		{
		    /* take right off the free list */
		    newstuff = 0;
		}
		else	/* this == start */
		{
		    /*
		     * Expand the free list by calling getpages
		     * newstuff holds the number of pages needed
		     */
		    newstuff = (nsize + ME_MPAGESIZE-1)/ME_MPAGESIZE;
		    /* if first time allocation, get enough for MO overhead */
		    if ( (prev_actual = i_meactual) == (SIZE_TYPE) 0 )
			newstuff += 4;
# ifdef OS_THREADS_USED
	            CS_synch_unlock( &MEfreelist_mutex );
# endif /* OS_THREADS_USED */
		    MEstatus = MEget_pages(ME_SPARSE_MASK, newstuff, NULL, 
			(PTR *)&tmp, &alloc_pages, &err_code);
# ifdef OS_THREADS_USED
	            CS_synch_lock( &MEfreelist_mutex );
# endif /* OS_THREADS_USED */
		    if (MEstatus == OK)
		    {
			/* now we need to find where to put this new memory
			   on the sorted free list - we search in reverse */
			tmp->MEsize = newstuff * ME_MPAGESIZE;
			this = MEfreelist.MElast;
			while (start != this && this != NULL &&
			       this > tmp)
			    this = this->MEprev;
			if (this != start && NEXT_NODE(this) == tmp)
			{
			    this->MEsize += tmp->MEsize;
			}
			else
			{
			    (void)QUinsert( (QUEUE *) tmp, (QUEUE *)this );
			    this = tmp;
			}
			if (this->MEnext != start &&
			    NEXT_NODE(this) == this->MEnext)
			{
			    this->MEsize += this->MEnext->MEsize;
			    (void)QUremove( (QUEUE *) this->MEnext);
			}
			/*
			** While the free list mutex was released, another
			** thread may have freed up a big enough piece of
			** memory for our needs, or may have extended the
			** free list.
			** If that's the case, research the free list;
			** we'll find either a right-sized node or 
			** the new memory we just added to the free list.
			*/
			if ( prev_actual != i_meactual )
			{
			    this = MEfreelist.MEfirst;
			    while ( this != NULL && this != start && this->MEsize < nsize )
				this = this->MEnext;
		
			    if( this == NULL )
				MEstatus = ME_CORRUPTED;
			}
		    }
		    else
			if (MEstatus == ME_OUT_OF_MEM)
			    MEstatus = ME_GONE;
		}

		/*
		** At this point, we can be in two states.
		** 1)  Corrupted memory, MEstatus != OK
		** 2)  'this' is an OK node from the free list.
		*/
		
		if ( MEstatus == OK )
		{
		    node = this;
		    
		    /*
		    ** if this is correct size or would
		    **   leave useless block in chain
		    **	just move block to allocated list
		    ** else
		    **	grab what is needed from 'this'
		    **	  block and then update 'this'
		    */
		    
		    fsize = node->MEsize - nsize;
		    if ( fsize <= sizeof(ME_NODE) )
		    {
			(void)QUremove( (QUEUE *) node );
			
			/* fudge size in node to eat leftover amount. */
			fsize = 0;
			nsize = node->MEsize;
		    }
		    else	/* make fragment block */
		    {
			/*
			** Make a leftover block after the
			** allocated space in node, in 'this'
			*/
			frag = (ME_NODE *)((char *) node + nsize );
			frag->MEsize = fsize;
			frag->MEtag = 0;
			
			/* remove node, add fragment to free list */
			(void)QUremove( (QUEUE *) node );
			MEstatus = MEfadd( frag, FALSE );
			
		    }  /* fragment left over */
		    /* Increment meactual while mutex held */
		    i_meactual += nsize;
		}  /* Got a node */
	    }  /* free list search OK */
# ifdef OS_THREADS_USED
	    CS_synch_unlock( &MEfreelist_mutex );
# endif /* OS_THREADS_USED */
	}  /* ME_USER_ALLOC */
	
	/*
	** At this point we are in one of two states:
	** 1.  Corrupted, MEstatus != OK.
	** 2.  Have a 'node' to use, from freelist or malloc.
	**     The freelist is consistant, but the allocated list is
	**     not setup for the node. "nsize" is the actual size of "node".
	*/
	
	if( MEstatus == OK )
	{
	    /* splice into allocated object queue */
	    if (0 == tag)
	    {
# ifdef OS_THREADS_USED
	    	CS_synch_lock( &MElist_mutex );
# endif /* OS_THREADS_USED */
	    	(void)QUinsert( (QUEUE *) node, (QUEUE *) MElist.MElast );
# ifdef OS_THREADS_USED
		CS_synch_unlock( &MElist_mutex );
# endif /* OS_THREADS_USED */
	    }
	    else
	    {
		IIME_atAddTag(tag, node);
	    }
	    /* Set values in block to be returned */
	    node->MEtag = tag;
	    node->MEsize = nsize;
	    node->MEaskedfor = size;
	    
	    /* Fill in the returned pointer */
	    block = (PTR)((char *)node + sizeof(ME_NODE));
	    
	    if (zero)
		MEfill( (nsize - sizeof(ME_NODE)), 0, block);
	}  /* got node OK */
    }
    if (status != NULL)
	*status = MEstatus;
    if (MEstatus != OK)
	return((PTR)NULL);
    else
	return(block);
}
示例#10
0
/*{
** Name: DI_inproc_write -   writes page(s) to a file on disk.
**
** Description:
**	This routine was created to make DIwrite more readable once
**	error checking had been added. See DIwrite for comments.
**
** Inputs:
**      f                    Pointer to the DI file
**                           context needed to do I/O.
**	diop		     Pointer to dilru file context.
**      buf                  Pointer to page(s) to write.
**      page                 Value indicating page(s) to write.
**	num_of_pages	     number of pages to write
**      
** Outputs:
**      err_code             Pointer to a variable used
**                           to return operating system 
**                           errors.
**    Returns:
**          OK
**	    other errors.
**    Exceptions:
**        none
**
** Side Effects:
**        none
**
** History:
**    30-nov-1992 (rmuth)
**	    Created.
**    03-jun-1996 (canor01)
**	    Note in the scb that this is a DI wait.
**    05-May-1997 (merja01)
**      Changed preprocessor stmt for pwrite.  Not all platforms
**      using OS_THREADS have a pwrite function.  This function
**      seems to only be available on Solaris 2.4 where async IO
**      is not yet supported.
**    14-July-1997 (schte01)
**      For those platforms that do direct i/o (where the
**      seek and the write are separate functions), do not release and
**      reaquire the semaphore on the DI_IO block. This will protect
**      against i/o being done by a different thread in between the 
**      lseek and the write.
**    14-Aug-1997 (schte01)    
**      Add xCL_DIRECT_IO as a condition to the 14-July-1997 change
**      instead of the test for !xCL_ASYNCH_IO.
**	22-Dec-1998 (jenjo02)
**	    If DI_FD_PER_THREAD is defined, call IIdio_write() instead of
**	    pwrite().
**	01-oct-1998 (somsa01)
**	    Return DI_NODISKSPACE when we are out of disk space.
**  01-Apr-2004 (fanch01)
**      Add O_DIRECT support on Linux depending on the filesystem
**      properties, pagesize.  Fixups for misaligned buffers on read()
**      and write() operations.
**    13-apr-04 (toumi01)
**	Move stack variable declaration to support "standard" C compilers.
**	29-Jan-2005 (schka24)
**	    Ditch attempt to gather diow timing stats, not useful in
**	    the real world and generates excess syscalls on some platforms.
**	15-Mar-2006 (jenjo02)
**	    io_sem is not needed with thread affinity.
**	6-Nov-2009 (kschendel) SIR 122757
**	    Make io-sem a SYNCH, avoid entirely if PRIVATE.
**	    Delete copy-to-align, caller is supposed to do it now.
**	    Don't attempt SCB updating if not backend.
*/
static STATUS
DI_inproc_write(
    DI_IO	*f,
    DI_OP	*diop,
    char        *buf,
    i4	page,
    i4	num_of_pages,
    CL_ERR_DESC *err_code )
{
    STATUS	status = OK;
    CS_SCB	*scb;
    i4		saved_state;

    /* unix variables */
    int		bytes_written;
    int		bytes_to_write;
    OFFSET_TYPE lseek_offset;
    /* 
    ** seek to place to write 
    */
    lseek_offset = 
	(OFFSET_TYPE)(f->io_bytes_per_page) * (OFFSET_TYPE)page;

    bytes_to_write = (f->io_bytes_per_page * (num_of_pages));

    if (Di_backend)
    {
	CSget_scb(&scb);
	if ( scb )
	{
	    saved_state = scb->cs_state;
	    scb->cs_state = CS_EVENT_WAIT;

	    if (f->io_open_flags & DI_O_LOG_FILE_MASK)
	    {
		scb->cs_memory = CS_LIOW_MASK;
		scb->cs_liow++;
		Cs_srv_block.cs_wtstatistics.cs_liow_done++;
		Cs_srv_block.cs_wtstatistics.cs_liow_waits++;
		Cs_srv_block.cs_wtstatistics.cs_liow_kbytes
		    += bytes_to_write / 1024;
	    }
	    else
	    {
		scb->cs_memory = CS_DIOW_MASK;
		scb->cs_diow++;
		Cs_srv_block.cs_wtstatistics.cs_diow_done++;
		Cs_srv_block.cs_wtstatistics.cs_diow_waits++;
		Cs_srv_block.cs_wtstatistics.cs_diow_kbytes
		    += bytes_to_write / 1024;
	    }
	}
    }

# if  defined(OS_THREADS_USED) && defined(xCL_NO_ATOMIC_READ_WRITE_IO)
    if ( !Di_thread_affinity && (f->io_fprop & FPROP_PRIVATE) == 0)
	CS_synch_lock( &f->io_sem );
# endif /* OS_THREADS_USED && !xCL_NO_ATOMIC_READ_WRITE_IO */

# if  defined(OS_THREADS_USED) && !defined(xCL_NO_ATOMIC_READ_WRITE_IO)
    bytes_written =
#ifdef LARGEFILE64
 	 pwrite64( (int)diop->di_fd, buf, bytes_to_write, lseek_offset );
#else /*  LARGEFILE64 */
 	 pwrite( (int)diop->di_fd, buf, bytes_to_write, lseek_offset );
#endif /* LARGEFILE64 */
# else /* OS_THREADS_USED  !xCL_NO_ATOMIC_READ_WRITE_IO */
    bytes_written =
 	 IIdio_write( (int)diop->di_fd, buf, bytes_to_write,
 	 	       lseek_offset, 0, 
		       f->io_fprop,
		       err_code );
# endif /* OS_THREADS_USED */

    if ( bytes_written != bytes_to_write )
    {
	SETCLERR(err_code, 0, ER_write);

	switch( err_code->errnum )
	{
	case EFBIG:
	    status = DI_BADEXTEND;
	    break;
	case ENOSPC:
	    status = DI_NODISKSPACE;
	    break;
#ifdef EDQUOTA
	case EDQUOT:
	    status = DI_EXCEED_LIMIT;
	    break;
#endif
	default:
	    if (err_code->errnum == 0)
		status = DI_ENDFILE;
	    else
		status = DI_BADWRITE;
	    break;
	}
    }

# if  defined(OS_THREADS_USED) && defined(xCL_NO_ATOMIC_READ_WRITE_IO)
    if ( !Di_thread_affinity && (f->io_fprop & FPROP_PRIVATE) == 0)
	CS_synch_unlock( &f->io_sem );
# endif /* OS_THREADS_USED && xCL_NO_ATOMIC_READ_WRITE_IO */

    if ( Di_backend && scb )
    {

	scb->cs_memory &= ~(CS_DIOW_MASK | CS_LIOW_MASK);
	scb->cs_state = saved_state;
    }

    return( status );
}
示例#11
0
static STATUS
DI_galloc(
    DI_IO	*f,
    i4	n,
    DI_OP	*diop,
    i4		*end_of_file,
    CL_ERR_DESC *err_code)
{
    STATUS                      big_status = OK, small_status =OK;
    STATUS                      intern_status = OK;
    register DI_SLAVE_CB        *disl;
    i4				last_page;
    OFFSET_TYPE			lseek_ret;

    do
    {
# ifdef OS_THREADS_USED
	/* Seek/write must be semaphore protected */
	if ((f->io_fprop & FPROP_PRIVATE) == 0)
	    CS_synch_lock( &f->io_sem );
# endif /* OS_THREADS_USED */

        if (Di_slave)
        {
	    disl = diop->di_evcb;

	    disl->file_op = DI_SL_ZALLOC;
	    disl->length = n * f->io_bytes_per_page;
	    /* Pass file properties to slave */
	    FPROP_COPY(f->io_fprop,disl->io_fprop);

	    DI_slave_send( disl->dest_slave_no, diop,
			   &big_status, &small_status, &intern_status );

	    if (( big_status != OK ) || ( small_status != OK ))
		break;

	    if ( disl->status != OK )
	    {
	        STRUCT_ASSIGN_MACRO(disl->errcode, *err_code);
	        small_status = DI_BADEXTEND;
	        break;
	     }
	     else
	     {
	        lseek_ret = disl->length;
	     }

	}
    	else
    	{
	    /* 
	    ** Running without slaves 
	    */
	    OFFSET_TYPE	lseek_offset;
	    i8		reservation;
	    i4		buf_size;
	    i4		bytes_written;
	    i4		pages_remaining = n;
	    i4		pages_at_a_time = Di_zero_bufsize /
					  f->io_bytes_per_page;

	    /* find current end-of-file */

	    lseek_ret = IIdio_get_file_eof(diop->di_fd, f->io_fprop);
	    if ( lseek_ret == (OFFSET_TYPE)-1L )
	    {
	    	SETCLERR(err_code, 0, ER_lseek);
	    	small_status = DI_BADINFO;
		break;
	    }
	    else
	    {
		lseek_offset = lseek_ret;
		/* If this filesystem can do reservations, see if we
		** should reserve more space.
		** Even though we have to write the zeros anyway, the
		** reservation may well be larger than the zeroing
		** buffer, and this way helps maintain contiguity.
		** Not worth it for tiny writes.
		*/
		if (pages_remaining > 2
		  && FPROP_ALLOCSTRATEGY_GET(f->io_fprop) == FPROP_ALLOCSTRATEGY_RESV)
		{
		    reservation = lseek_offset + (pages_remaining * f->io_bytes_per_page);
		    if (reservation > f->io_reserved_bytes)
		    {
			/* Re-check in case some other server reserved */
			small_status = IIdio_get_reserved(diop->di_fd,
				&f->io_reserved_bytes, err_code);
			if (small_status == OK && reservation > f->io_reserved_bytes)
			{
			    small_status = IIdio_reserve(diop->di_fd,
					f->io_reserved_bytes,
					reservation - f->io_reserved_bytes,
					err_code);
			    if (small_status == OK)
			    {
				f->io_reserved_bytes = reservation;
			    }
			    else
			    {
				if (small_status != DI_BADFILE)
				    break;
				/* Fallocate not supported, turn off
				** "reserve" strategy, continue without.
				*/
				small_status = OK;
				FPROP_ALLOCSTRATEGY_SET(f->io_fprop, FPROP_ALLOCSTRATEGY_VIRT);
			    }
			}
		    }
		} /* end reservations */

		while ( pages_remaining > 0 )
		{
		    if ( pages_remaining < pages_at_a_time )
			buf_size = pages_remaining *
				    f->io_bytes_per_page;
		    else
			buf_size = Di_zero_bufsize;

# if  defined(OS_THREADS_USED) && !defined(xCL_NO_ATOMIC_READ_WRITE_IO)
		    bytes_written =
#ifdef LARGEFILE64
			pwrite64( diop->di_fd, Di_zero_buffer, 
				    buf_size, lseek_offset );
#else /*  LARGEFILE64 */
			pwrite( diop->di_fd, Di_zero_buffer, 
				    buf_size, lseek_offset );
#endif /* LARGEFILE64 */
# else /* OS_THREADS_USED  !xCL_NO_ATOMIC_READ_WRITE_IO */
		    bytes_written =
			IIdio_write( diop->di_fd, Di_zero_buffer, 
				    buf_size, 
				    lseek_offset, 
				    &lseek_offset, 
				    f->io_fprop,
				    err_code );
# endif /* OS_THREADS_USED */

		    if ( bytes_written != buf_size )
		    {
			SETCLERR(err_code, 0, ER_write);
			small_status = DI_BADEXTEND;
			break;
		    }

		    lseek_offset += buf_size;
		    pages_remaining -= pages_at_a_time;
		}

		if ( small_status != OK )
		    break;
	    }
	}

	*end_of_file = ( lseek_ret / f->io_bytes_per_page) - 1;

    } while (FALSE);

    if (big_status == OK && small_status == OK)
    {
	/*
	** Update the current allocated end-of-file under mutex protection
	*/
	last_page = *end_of_file + n;
	if (last_page > f->io_alloc_eof)
	    f->io_alloc_eof = last_page;
    }

# ifdef OS_THREADS_USED
    if ((f->io_fprop & FPROP_PRIVATE) == 0)
	CS_synch_unlock( &f->io_sem );
# endif /* OS_THREADS_USED */

    if ( big_status != OK )
	small_status = big_status;

    if ( small_status != OK )
	DIlru_set_di_error( &small_status, err_code, intern_status,
			    DI_GENERAL_ERR);

    return(small_status);

}
示例#12
0
/*{
** Name: DIrename - Renames a file. 
**
** Description:
**      The DIrename will change the name of a file. 
**	The file MUST be closed.  The file can be renamed
**      but the path cannot be changed.  A fully qualified
**      filename must be provided for old and new names.
**      This includes the type qualifier extension.
**   
** Inputs:
**	di_io_unused	     UNUSED DI_IO pointer (always set to 0 by caller)
**      path                 Pointer to the path name.
**      pathlength           Length of path name.
**      oldfilename          Pointer to old file name.
**      oldlength            Length of old file name.
**      newfilename          Pointer to new file name.
**      newlength            Length of new file name.
** Outputs:
**      err_code             Pointer to a variable used
**                           to return operating system 
**                           errors.
**    Returns:
**        OK
**        DI_BADRNAME        Any i/o error during rename.
**        DI_BADPARAM        Parameter(s) in error.
**        DI_DIRNOTFOUND     Path not found.
**    Exceptions:
**        none
**
** Side Effects:
**        none
**
** History:
**    26-mar-87 (mmm)    
**          Created new for 6.0.
**    06-feb-89 (mikem)
**	    Clear the CL_ERR_DESC.
**	15-apr-1992 (bryanp)
**	    Remove DI_IO argument and no longer support renaming open files.
**    30-nov-1992 (rmuth)
**	    - Prototype.
**	    - DIlru error checking
**    17-sep-1994 (nanpr01)
**          - Needs to check for interrupted system calls specially for
**            SIGUSR2. Curren implementation of 1 more retry is optimistic.
**            In lot of UNIX systems, link, unlink, rename cannot be 
**            interrupted(HP-UX).But Solaris returns EINTR. Bug # 57938. 
**    10-oct-1994 (nanpr01)
**          - Wrong number of parameter in DIlru_flush. Bug # 64169
**	20-Feb-1998 (jenjo02)
**	    DIlru_flush() prototype changed, it now computes the number of
**	    FDs to close instead of being passed an arbitrary number.
**	    Cleaned up handling of errno, which will be invalid after calling
**	    DIlru_flush().
**  15-Apr-2004 (fanch01)
**      Force closing of LRU file descriptors when a rename error is
**      is encountered.  Only occurs on a rename failure and the only
**      file that is closed is the file associated with the error.
**      Relieves problems on filesystems which don't accomodate renaming
**      open files.  "Interesting" semaphore usage is consistent with other
**      DI usage.
**	21-Apr-2004 (schka24)
**	    retry declaration got misplaced somehow, fix so it compiles.
**	26-Jul-2005 (schka24)
**	    Don't flush fd's on any random rename failure.  Do a better job
**	    of re-verifying the fd and di-io after locking the fd when we're
**	    searching for a file-open conflict.
**	30-Sep-2005 (jenjo02)
**	    htb_fd_list_mutex, fd_mutex are now CS_SYNCH objects.
**	15-Nov-2010 (kschendel) SIR 124685
**	    Delete unused variables.
*/
STATUS
DIrename(
    DI_IO	   *di_io_unused,
    char           *path,
    u_i4          pathlength,
    char           *oldfilename,
    u_i4          oldlength,
    char           *newfilename,
    u_i4          newlength,
    CL_ERR_DESC     *err_code)
{
    char    oldfile[DI_FULL_PATH_MAX];
    char    newfile[DI_FULL_PATH_MAX];
    STATUS  ret_val;
    CL_ERR_DESC	    local_err;

    /* unix variables */
    int	    os_ret;

	/* retry variables */
	i4 retry = 0, failflag = 0;

    /* default returns */
    ret_val = OK;

    if ((pathlength > DI_PATH_MAX)	|| 
	(pathlength == 0)		||
	(oldlength > DI_FILENAME_MAX)	|| 
	(oldlength == 0)		|| 
	(newlength > DI_FILENAME_MAX)	||
	(newlength == 0))
	return (DI_BADPARAM);		

    /* get null terminated path and filename for old file */

    MEcopy((PTR) path, pathlength, (PTR) oldfile);
    oldfile[pathlength] = '/';
    MEcopy((PTR) oldfilename, oldlength, (PTR) &oldfile[pathlength + 1]);
    oldfile[pathlength + oldlength + 1] = '\0';

    /* get null terminated path and filename for new file */

    MEcopy((PTR) path, pathlength, (PTR) newfile);
    newfile[pathlength] = '/';
    MEcopy((PTR) newfilename, newlength, (PTR) &newfile[pathlength + 1]);
    newfile[pathlength + newlength + 1] = '\0';

	do
	{
		if (retry > 0 && failflag++ == 0)
			TRdisplay("%@ DIrename: retry on %t/%t\n",
					  pathlength, path, oldlength, oldfilename);
		retry = 0;
		CL_CLEAR_ERR( err_code );
#ifdef	xCL_035_RENAME_EXISTS
		/* Now rename the file. */    
		while  ((os_ret = rename(oldfile, newfile)) == -1) 
		{
			SETCLERR(err_code, 0, ER_rename);
			if (err_code->errnum != EINTR)
				break;
		}
#else /* xCL_035_RENAME_EXISTS */
		/* Now rename the file. */    
		while ((os_ret = link(oldfile, newfile)) == -1) 
		{
			SETCLERR(err_code, 0, ER_rename);
			if (err_code->errnum != EINTR)
				break;
		}
		if (os_ret != -1)
		{
			while ((os_ret = unlink(oldfile)) == -1) 
			{
				if (err_code->errnum != EINTR)
					break;
			}
		}
#endif /* xCL_035_RENAME_EXISTS */

		/* if the rename failed, see if we're holding the file open */
		if (os_ret == -1 && htb_initialized)
		{
			QUEUE *p, *q, *next;
			CS_synch_lock(&htb->htb_fd_list_mutex);
			q = &htb->htb_fd_list;
			for (p = q->q_prev; p != q; p = next)
			{
				DI_FILE_DESC *di_file = (DI_FILE_DESC *) p;
				DI_IO *di_io = (DI_IO *) di_file->fd_uniq.uniq_di_file;
				next = p->q_prev;
				if (di_io != NULL && di_file->fd_state == FD_IN_USE
				  && di_io->io_type == DI_IO_ASCII_ID
				  && pathlength == di_io->io_l_pathname
				  && oldlength == di_io->io_l_filename)
				{
					CS_synch_unlock(&htb->htb_fd_list_mutex);
					CS_synch_lock(&di_file->fd_mutex);
					/* Make sure it's still the right
					** DI_IO and compare the filename */
					if ((DI_IO *) di_file->fd_uniq.uniq_di_file == di_io &&
					      di_file->fd_state == FD_IN_USE &&
					      di_file->fd_unix_fd != -1 &&
					      !(di_io->io_open_flags & DI_O_NOT_LRU_MASK) &&
						di_io->io_type == DI_IO_ASCII_ID &&
						pathlength == di_io->io_l_pathname &&
						MEcmp((PTR) di_io->io_pathname, path, pathlength) == 0
						&& oldlength == di_io->io_l_filename &&
						MEcmp((PTR) di_io->io_filename, oldfilename,
							  oldlength) == 0)
					{
						/* have a match, print out stats */
						/* try to close it */
						CS_synch_unlock(&di_file->fd_mutex);
						DIlru_close(di_io, &local_err);
						retry++;
					}
					else
						CS_synch_unlock(&di_file->fd_mutex);
					CS_synch_lock(&htb->htb_fd_list_mutex);
				}
			}
			CS_synch_unlock(&htb->htb_fd_list_mutex);
		}
	} while (retry);

    if (os_ret == -1)
    {
	if ((err_code->errnum == ENOTDIR) || (err_code->errnum == EACCES))
	{
	    ret_val = DI_DIRNOTFOUND;
	}
	else
	{
	    ret_val = DI_BADRNAME;
	}
    }
    else
	CL_CLEAR_ERR( err_code );

    return(ret_val);
}
示例#13
0
/*{
** Name:	IIME_ftFreeTag	- Free all allocated memory for a tag.
**
** Description:
**	This routine is called by MEtfree to free all the allocated 
**	memory for a tag.
**
**	It works by finding the METAGNODE for the tag in the hash table
**	and then traversing the QUEUE of allocated blocks freeing
**	each block.
**
** Inputs:
**	tag		The tag whose memory is to be freed.
**
** Outputs:
**	Returns:
**		OK if all the allocated memory for the tag was freed.
**		ME_NO_TFREE if the tag does not have a record in the hash table.
**		other failure status if the nodes can't be freed.
**
** Side Effects:
**	Will return the METAGNODE for the tag to freelist.
**
** History:
**	5-dec-1989 (Joe)
**	    First Written
**      30-May-96 (stial01)
**          New advice ME_TUXEDO_ALLOC should behave like ME_INGRES_ALLOC
**	12-feb-1997 (canor01)
**	    Initialize local MEstatus.
**      27-Jan-1999 (fanra01)
**          Add thread alloc case for tag free.  Otherwise our memory is
**          returned to the system heap causing wonderfully esoteric execution.
*/
STATUS
IIME_ftFreeTag(
	i4	tag )
{
    register METAGNODE	**first;
    STATUS MEstatus = OK;

# ifdef OS_THREADS_USED
    CS_synch_lock( &MEtaglist_mutex );
# endif /* OS_THREADS_USED */
    for (first = &(htab[tag%256]);
	 *first != NULL;
	 first = &((*first)->met_hash))
    {
	if ((*first)->met_tag == tag)
	{
	    register ME_NODE	*this;
	    register ME_NODE	*next;
	    register METAGNODE	*freenode;

	    for (this = (*first)->met_list.MEfirst;
		 this != NULL && this != (ME_NODE *) &((*first)->met_list);)
	    {
		next = this->MEnext;
		if ( MEstatus == OK )
		{
		    i_meactual -= this->MEsize;
		    i_meuser -= this->MEaskedfor;
		    (void)QUremove( (QUEUE *) this );
		    if( (MEadvice == ME_INGRES_ALLOC )
			|| (MEadvice == ME_INGRES_THREAD_ALLOC)
			|| (MEadvice == ME_TUXEDO_ALLOC) )
		    {
# ifdef OS_THREADS_USED
			CS_synch_lock( &MEfreelist_mutex );
# endif /* OS_THREADS_USED */
			MEstatus = MEfadd(this, TRUE);
# ifdef OS_THREADS_USED
			CS_synch_unlock( &MEfreelist_mutex );
# endif /* OS_THREADS_USED */
		    }
		    else
			free( (char *)this );
		}
		if (MEstatus == OK)
		    this = next;
		else
		    break;
	    }
	    freenode =  *first;
	    *first = freenode->met_hash;
	    freenode->met_hash = freelist;
	    freelist = freenode;
# ifdef OS_THREADS_USED
	    CS_synch_unlock( &MEtaglist_mutex );
# endif /* OS_THREADS_USED */
	    return MEstatus;
	}
    }
# ifdef OS_THREADS_USED
    CS_synch_unlock( &MEtaglist_mutex );
# endif /* OS_THREADS_USED */
    return ME_NO_TFREE;
}
示例#14
0
/*{
** Name: do_writev -  Perform writev() call.
**
** Description:
**	This function collects the queued write requests, 
**	chooses the optimum function to perform the write(s),
**	and invokes the completion handler for each request.
**
** Inputs:
**	DI_TGIO * tgio  - Control block for current thread. 
**      
** Outputs:
**    None.
**
** Returns:
**    OK
**    FAIL - One of more of the write requests failed.
**
**    Exceptions:
**        none
**
** Side Effects:
**	  The completion handler for each I/O request is invoked.
**
** History:
**	19-May-1999 (jenjo02)
**	    Created.
**	09-Jul-1999 (jenjo02)
**	    If queued list is ordered, skip the quicksort.
**	09-Apr-2001 (jenjo02)
**	    Increment first gio's io_count stat for each physical I/O,
**	    gw_pages for multi-page writes.
**	05-Nov-2002 (jenjo02)
**	    Cleaned up use of io_sem: only write() and writev() need
**	    the mutex to protect the (separate) seek. pwrite(64)
**	    atomically seeks and does not need the mutex.
**	25-Aug-2005 (schka24)
**	    Don't bother with IO timing, too slow on some platforms (Linux)
**	    and the results aren't all that interesting.
**	14-Oct-2005 (jenjo02)
**	    Chris's file descriptor properties now cached in io_fprop
**	    (file properties) and established on the first open, 
**	    not every open.
**	24-Jan-2006 (jenjo02)
**	    Break on change in file ("f"), then lru-open to get an
**	    FD, do the write(v), and lru_release the FD. This keeps
**	    gather_write from hogging FDs while waiting for the
**	    signal to actually do something.
**	15-Mar-2006 (jenjo02)
**	    f->io_sem not needed if running with thread affinity,
**	    the fd is not shared by multiple threads.
*/
static STATUS
do_writev( DI_TGIO * tgio, CL_ERR_DESC *err_code )
{
    CL_ERR_DESC lerr_code;
    STATUS 	big_status = OK, small_status = OK;
    i4 		i, j, k;
    DI_GIO 	*gio, *first_gio;
    DI_IO	*f;
    DI_OP	*diop;
    OFFSET_TYPE next_offset, lseek_offset;
    i4		bytes_to_write, bytes_written;
    i4		saved_state;

    i4		num_writev = 0, num_write = 0;
    i4		num_writev_gio = 0, num_write_gio = 0;

#if defined(sgi_us5)
    if( iov_max == 0 )
    {
	iov_max = sysconf(_SC_IOV_MAX);
	if( iov_max <= 0 )
        {
	    iov_max = 16;	/* arbitrary minimum value */
#ifdef DEBUG_THIS_PUPPY
	    TRdisplay("%@ %x do_writev: %t ERROR sysconf failed with %d\n",
		    tgio->tgio_scb->cs_self, 
		    f->io_l_filename, f->io_filename,
		    iov_max);
#endif /* DEBUG_THIS_PUPPY */
        }
        else if( iov_max > 2048 )
        {
	    iov_max = 2048;	/* arbitrary maximum value */
        }
    }
#else
    iov_max = IOV_MAX;
#endif

    /* If unordered, sort the queued list into file,offset order */
    if ( tgio->tgio_state & TGIO_UNORDERED )
    {
	gio_sort( tgio->tgio_queue, 0, tgio->tgio_queued-1 );
	tgio->tgio_state &= ~(TGIO_UNORDERED);
    }


    /*
    ** Method:
    **
    **	Collect requests by file/offset into an iovec until
    **	the next file offset becomes discontiguous. Additionally, if
    **	the buffer addresses are contiguous, colaesce those requests.
    **
    **  Up to IOV_MAX iovecs can be written by a single writev().
    **
    **	If but a single iovec results, the probably-more-efficient
    **	function (p)write() is called instead of writev().
    */
    k = 0;

    while ( (j = k) < tgio->tgio_queued )
    {
	#if defined(sgi_us5)
		struct iovec iov[iov_max];
	#else
		struct iovec iov[IOV_MAX];
	#endif

	/*
	** "i" indexes the current iovec element
	** "j" is the first GIO used in this iovec array
	** "k" is the current GIO in the queue
	*/
	i = 0;
	
	gio = first_gio = tgio->tgio_queue[j];
	f = gio->gio_f;
	lseek_offset = next_offset = gio->gio_offset;
	small_status = OK;

	iov[0].iov_base = gio->gio_buf;
	iov[0].iov_len  = 0;

	do
	{
	    /* If this buffer is contiguous with previous, coalesce it */
	    if ( (char *)iov[i].iov_base + iov[i].iov_len == gio->gio_buf )
	    {
		iov[i].iov_len += gio->gio_len;
	    }
	    /* Initialize next iovec if any remain */
	    else if ( i < iov_max - 1 )
	    {
		i++;
		iov[i].iov_base = gio->gio_buf;
		iov[i].iov_len  = gio->gio_len;
	    }
	    else
		break;

	    next_offset += gio->gio_len;

	} while ( ++k < tgio->tgio_queued
		    && (gio = tgio->tgio_queue[k])
		    && gio->gio_f == f
		    && gio->gio_offset == next_offset );

	/* "k" indexes the next, unprocessed GIO */

	bytes_to_write = next_offset - lseek_offset;
	
	saved_state = tgio->tgio_scb->cs_state;
	tgio->tgio_scb->cs_state = CS_EVENT_WAIT;
	tgio->tgio_scb->cs_memory = CS_DIOW_MASK;

	/* Accumulate multi-page write stats */
	if ( k - j > 1 )
	{
	    /*
	    ** Using the first gio, count
	    ** the number of multi-pages written (k-j)
	    ** and a single I/O.
	    */
	    if ( first_gio->gio_io_count )
		++*first_gio->gio_io_count;
	    if ( first_gio->gio_gw_pages )
		*first_gio->gio_gw_pages += k - j;
	}

	/* Count a single I/O write for server */
	tgio->tgio_scb->cs_diow++;
	Cs_srv_block.cs_wtstatistics.cs_diow_done++;

	/* Count a single I/O wait for server */
	Cs_srv_block.cs_wtstatistics.cs_diow_waits++;

	/* Accumulate number of KB written by this I/O */
	Cs_srv_block.cs_wtstatistics.cs_diow_kbytes
	    += bytes_to_write / 1024;
	
	/* Now get an FD to do the write(v) */
	diop = (DI_OP*)&first_gio->gio_diop;
	if ( big_status = DIlru_open(f, FALSE, diop, err_code) )
	    return(big_status);

#ifdef DEBUG_THIS_PUPPY
	{
	    i4	x;
	    i8	offset = lseek_offset;

	    TRdisplay("%@ %p do_writev: %~t doing %d todo %d fd %d lseek from %ld\n",
		    tgio->tgio_scb->cs_self, 
		    f->io_l_filename, f->io_filename,
		    i+1, tgio->tgio_queued - j,
		    diop->di_fd, offset);
	    for (x = 0; x <= i; x++)
	    {
	TRdisplay("%@ do_writev: iovec[%d] base %p bytes %d (page %d for %d)\n",
			x,
			iov[x].iov_base, iov[x].iov_len,
			(i4)(offset/f->io_bytes_per_page),
			iov[x].iov_len/f->io_bytes_per_page);
		offset += iov[x].iov_len;
	    }
	}
#endif /* DEBUG_THIS_PUPPY */

	/* If more than one iovec, seek and use writev */
	if ( i++ )
	{
	    /* writev needs seek mutex protection */
	    if ( !Di_thread_affinity )
		CS_synch_lock( &f->io_sem );
	    
	    num_writev++;
	    num_writev_gio += k - j;

	    bytes_written = 
		IIdio_writev( diop->di_fd, 
				(char *)iov,
				i,
				lseek_offset, 0, 
				f->io_fprop,
				err_code);
	    if ( !Di_thread_affinity )
		CS_synch_unlock( &f->io_sem );
	}
	else
	{
	    num_write++;
	    num_write_gio += k - j;

# if  !defined(xCL_NO_ATOMIC_READ_WRITE_IO)
	    /* pwrite(64) needs no seek mutex protection */
	    bytes_written =
#ifdef LARGEFILE64
	     pwrite64( diop->di_fd, 
			iov[0].iov_base, 
			bytes_to_write, 
			lseek_offset );
#else /*  LARGEFILE64 */
	     pwrite( diop->di_fd,
			iov[0].iov_base, 
			bytes_to_write, 
			lseek_offset );
#endif /* LARGEFILE64 */
	    if (bytes_written != bytes_to_write)
		SETCLERR(err_code, 0, ER_write);
# else /* !xCL_NO_ATOMIC_READ_WRITE_IO */
	    /* write() needs seek mutex protection */
	    if ( !Di_thread_affinity )
		CS_synch_lock( &f->io_sem );

	    bytes_written =
	     IIdio_write( diop->di_fd,
			    iov[0].iov_base, 
			    bytes_to_write, 
			    lseek_offset, 0, 
			    f->io_fprop,
			    err_code );
	    if ( !Di_thread_affinity )
		CS_synch_unlock( &f->io_sem );

# endif /* !xCL_NO_ATOMIC_READ_WRITE_IO */
	}

	/* Release the FD */
	(VOID)DIlru_release( diop, &lerr_code );
	    
	tgio->tgio_scb->cs_memory &= ~(CS_DIOW_MASK);
	tgio->tgio_scb->cs_state = saved_state;

	if (bytes_written != bytes_to_write)
	{
	    switch ( err_code->errnum )
	    {
		case EFBIG:
		    small_status = DI_BADEXTEND;
		    break;
		case ENOSPC:
		    small_status = DI_EXCEED_LIMIT;
		    break;
#ifdef EDQUOTA
		case EDQUOT:
		    small_status = DI_EXCEED_LIMIT;
		    break;
#endif
		default:
		    if (err_code->errnum == 0)
			small_status = DI_ENDFILE;
		    else
			small_status = DI_BADWRITE;
		    break;
	    }
	    /* Preserve the worst status from all the writes */
	    big_status = (big_status) ? big_status : small_status;
	}

	/* Invoke completion handler for each GIO written */
	do 
	{
	    gio = tgio->tgio_queue[j];
	    (gio->gio_evcomp)( gio->gio_data, small_status, err_code );

	} while ( ++j < k );
    }

#ifdef DEBUG_THIS_PUPPY
    TRdisplay("%@ %p do_writev: %d write requests completed using %d(%d) writev, %d(%d) write\n",
		tgio->tgio_scb->cs_self, 
		tgio->tgio_queued, 
		num_writev, num_writev_gio,
		num_write, num_write_gio);
#endif /* DEBUG_THIS_PUPPY */

    /* Clear the queued count(s) */
    tgio->tgio_queued = *tgio->tgio_uqueued = 0;

    return( big_status );
}
示例#15
0
/*{
** Name: gather_list -  Gather write requests together.
**
** Description:
**	This routine batches up write requests for later submission via     
**	the writev() routine.
**
** Inputs:
**	DI_GIO * gio	  - gio Control block for write operation.
**      
** Outputs:
**      err_code             Pointer to a variable used
**                           to return operating system 
**                           errors.
**    Returns:
**          OK
**          DI_MEREQMEN_ERR       - MEreqmem failed.
**    Exceptions:
**        none
**
** Side Effects:
**        Will call do_writev if number of write requests has reached
**        GIO_MAX_QUEUED.
**
** History:
**	19-May-1999 (jenjo02)
**	    Created.
**	09-Jul-1999 (jenjo02)
**	    Watch I/O queue as it's being constructed. If pre-ordered,
**	    skip the quicksort.
**	25-Aug-2005 (schka24)
**	    Don't blindly lru-open each request;  instead, see if the
**	    file (DI_IO) is already on the queue, and share its fd with the
**	    queued request.  This is essential when doing fd-per-thread,
**	    as each call to lru-open would allocate a new fd, thus negating
**	    the ability to do a writev!  When not doing fd-per-thread,
**	    this change is effectively a no-op.
**	    Also, return without queueing if queue-flush fails.
**	25-Jan-2006 (jenjo02)
**	    Defer lru-open until do_writev to prevent hogging FDs
**	    while waiting for futhur writes.
*/
static STATUS
gather_list( DI_GIO *gio, i4 *uqueued, CL_ERR_DESC *err_code)
{
    DI_GIO	*qgio;			/* a GIO on the queue already */
    DI_TGIO     *tgio;
    STATUS 	status = OK;
    CS_SCB	*scb;

    CSget_scb(&scb);

    if ( (tgio = (DI_TGIO *)scb->cs_ditgiop) == (DI_TGIO *)NULL ||
	  tgio->tgio_scb != scb )
    {
	/*
	** No TGIO for this thread, so reuse an inactive one
	** or allocate a new one.
	*/
	CS_synch_lock( &GWthreadsSem );

	for ( tgio = GWthreads; 
	      tgio && tgio->tgio_state != TGIO_INACTIVE;
	      tgio = tgio->tgio_next );
    
	if (tgio == NULL)
	{
	    tgio = (DI_TGIO *)MEreqmem(0,
		    sizeof( DI_TGIO ),
		      TRUE, NULL);
	    if (tgio == NULL)
	    {
		CS_synch_unlock( &GWthreadsSem );
		return( DI_MEREQMEM_ERR);
	    }
	    tgio->tgio_next = GWthreads;
	    GWthreads = tgio;
	}

	scb->cs_ditgiop = (PTR)tgio;
	tgio->tgio_scb = scb;
	tgio->tgio_uqueued = uqueued;
	*tgio->tgio_uqueued = tgio->tgio_queued = 0;

	tgio->tgio_state = TGIO_ACTIVE;

	CS_synch_unlock( &GWthreadsSem );
    }

    /* If the queue is full, force the writes.
    ** If this fails, we get blamed, but someone has to report it.
    */
    if ( tgio->tgio_queued == GIO_MAX_QUEUED )
    {
	status = do_writev( tgio, err_code );
	if (status != OK)
	    return (status);
    }

    /*
    ** Check for out of sequence GIO.
    ** If all I/O's are presented in file/offset order,
    ** a sort won't be needed.
    */
    if ( (tgio->tgio_state & TGIO_UNORDERED) == 0 && tgio->tgio_queued )
    {
	qgio = tgio->tgio_queue[tgio->tgio_queued - 1];
	
	if ( gio->gio_f < qgio->gio_f ||
	    (gio->gio_f == qgio->gio_f &&
	     gio->gio_offset < qgio->gio_offset) )
	{
	    tgio->tgio_state |= TGIO_UNORDERED;
	}
    }

    /* Add this request to the queue */
    tgio->tgio_queue[tgio->tgio_queued++] = gio;
    
    /* Update caller's queued count */
    *tgio->tgio_uqueued = tgio->tgio_queued;

    return( status );
}
示例#16
0
/*{
** Name: DI_inproc_read -   read page(s) from a file on disk.
**
** Description:
**	This routine was created to make DIread more readable once
**	error checking had been added. See DIread for comments.
**
** Inputs:
**      f                    Pointer to the DI file
**                           context needed to do I/O.
**	diop		     Pointer to dilru file context.
**      buf                  Pointer to page(s) to read.
**      page                 Value indicating page(s) to read.
**	num_of_pages	     number of pages to read
**      
** Outputs:
**      err_code             Pointer to a variable used
**                           to return operating system 
**                           errors.
**    Returns:
**          OK
**	    other errors.
**    Exceptions:
**        none
**
** Side Effects:
**        none
**
** History:
**    30-nov-1992 (rmuth)
**	    Created.
**    03-jun-1996 (canor01)
**	    Note in the scb that this is a DI wait.
**    14-July-1997 (schte01)
**      For those platforms that do direct i/o (where the
**      seek and the read are separate functions), do not release and
**      reaquire the semaphore on the DI_IO block. This will protect
**      against i/o being done by a different thread in between the 
**      lseek and the read.
**    14-Aug-1997 (schte01)    
**      Add xCL_DIRECT_IO as a condition to the 14-July-1997 change
**      instead of the test for !xCL_ASYNCH_IO.
**	22-Dec-1998 (jenjo02)
**	    If DI_FD_PER_THREAD is defined, call IIdio_read() instead of
**	    pread().
**  01-Apr-2004 (fanch01)
**      Add O_DIRECT support on Linux depending on the filesystem
**      properties, pagesize.  Fixups for misaligned buffers on read()
**      and write() operations.
**    13-apr-04 (toumi01)
**	Move stack variable declaration to support "standard" C compilers.
**	29-Jan-2005 (schka24)
**	    Ditch attempt to gather dior timing stats, not useful in
**	    the real world and generates excess syscalls on some platforms.
**	15-Mar-2006 (jenjo02)
**	    io_sem is not needed with thread affinity.
**	6-Nov-2009 (kschendel) SIR 122757
**	    Remove copy to aligned buffer, caller is supposed to do it.
*/
static STATUS
DI_inproc_read(
    DI_IO	*f,
    DI_OP	*diop,
    char        *buf,
    i4	page,
    i4	num_of_pages,
    i4	*n,
    CL_ERR_DESC *err_code )
{
    STATUS	status = OK;
    CS_SCB	*scb;
    i4		saved_state;

    /* unix variables */
    int		unix_fd;
    int		bytes_read = 0;
    int		bytes_to_read;
    OFFSET_TYPE	lseek_offset;

    /*
    ** Seek to place to read
    */
    lseek_offset  = (OFFSET_TYPE)f->io_bytes_per_page * (OFFSET_TYPE)page;

    bytes_to_read = f->io_bytes_per_page * num_of_pages;
    unix_fd = diop->di_fd;

    if (Di_backend)
    {
	CSget_scb(&scb);
	if ( scb )
	{
	    saved_state = scb->cs_state;
	    scb->cs_state = CS_EVENT_WAIT;

	    if (f->io_open_flags & DI_O_LOG_FILE_MASK)
	    {
		scb->cs_memory = CS_LIOR_MASK;
		scb->cs_lior++;
		Cs_srv_block.cs_wtstatistics.cs_lior_done++;
		Cs_srv_block.cs_wtstatistics.cs_lior_waits++;
		Cs_srv_block.cs_wtstatistics.cs_lior_kbytes
		    += bytes_to_read / 1024;
	    }
	    else
	    {
		scb->cs_memory = CS_DIOR_MASK;
		scb->cs_dior++;
		Cs_srv_block.cs_wtstatistics.cs_dior_done++;
		Cs_srv_block.cs_wtstatistics.cs_dior_waits++;
		Cs_srv_block.cs_wtstatistics.cs_dior_kbytes
		    += bytes_to_read / 1024;
	    }
	}
    }

# if defined( OS_THREADS_USED ) && (defined (xCL_NO_ATOMIC_READ_WRITE_IO))
    if ( !Di_thread_affinity && (f->io_fprop & FPROP_PRIVATE) == 0)
    {
	CS_synch_lock( &f->io_sem );
    }
# endif /* OS_THREADS_USED && xCL_NO_ATOMIC_READ_WRITE_IO */

# if defined( OS_THREADS_USED ) && (! defined (xCL_NO_ATOMIC_READ_WRITE_IO))
#ifdef LARGEFILE64
    bytes_read = pread64( unix_fd, buf, bytes_to_read, lseek_offset );
#else /* LARGEFILE64 */
    bytes_read = pread( unix_fd, buf, bytes_to_read, lseek_offset );
#endif /* LARGEFILE64 */

    if ( bytes_read != bytes_to_read )
    {
	SETCLERR(err_code, 0, ER_read);
# else /* OS_THREADS_USED */

    bytes_read = IIdio_read( unix_fd, buf, bytes_to_read,
 	    			  lseek_offset, 0, 
				  f->io_fprop,
				  err_code );

    if ( bytes_read != bytes_to_read )
    {
# endif /* OS_THREADS_USED && ! xCL_NO_ATOMIC_READ_WRITE_IO */

	if (bytes_read == -1)
	{
	    status = DI_BADREAD;
	}
	else
	{
	    status = DI_ENDFILE;
	}
    }
# if defined( OS_THREADS_USED ) && (defined (xCL_NO_ATOMIC_READ_WRITE_IO) )
    if ( !Di_thread_affinity && (f->io_fprop & FPROP_PRIVATE) == 0)
	CS_synch_unlock( &f->io_sem );
# endif /* OS_THREADS_USED && xCL_NO_ATOMIC_READ_WRITE_IO */

    if (Di_backend)
    {
	if ( scb )
	{
	    scb->cs_memory &= ~(CS_DIOR_MASK | CS_LIOR_MASK);
	    scb->cs_state = saved_state;
	}
    }

    if ( bytes_read > 0 )
	*n = bytes_read / f->io_bytes_per_page;

    return(status);
}

# if defined(OS_THREADS_USED) || defined(xCL_ASYNC_IO)
/*{
** Name: DI_async_read -   read page(s) asynchronously from a file on disk.
**
** Description:
**	This routine was created to interface with async io routines
**	where such routines are available.
**
** Inputs:
**      f                    Pointer to the DI file
**                           context needed to do I/O.
**	diop		     Pointer to dilru file context.
**      buf                  Pointer to page(s) to read.
**      page                 Value indicating page(s) to read.
**	num_of_pages	     number of pages to read
**      
** Outputs:
**      err_code             Pointer to a variable used
**                           to return operating system 
**                           errors.
**    Returns:
**          OK
**	    other errors.
**    Exceptions:
**        none
**
** Side Effects:
**        none
**
** History:
**    20-jun-1995 (amo ICL)
**	    Created.
*/
static STATUS
DI_async_read(
    DI_IO	*f,
    DI_OP	*diop,
    char        *buf,
    i4	page,
    i4	num_of_pages,
    i4	*n,
    CL_ERR_DESC *err_code )
{
    STATUS	status = OK;
    CS_SCB	*scb;
    int		saved_state;
    i4 		start_time;

    /* unix variables */
    int		bytes_read = 0;
    int		bytes_to_read;
    OFFSET_TYPE	lseek_offset;

    /*
    ** Seek to place to read
    */
    lseek_offset  = (OFFSET_TYPE)(f->io_bytes_per_page) * (OFFSET_TYPE)(page);
    bytes_to_read = f->io_bytes_per_page * num_of_pages;

    CSget_scb(&scb);
    if ( scb )
    {
	saved_state = scb->cs_state;
	scb->cs_state = CS_EVENT_WAIT;

	if (f->io_open_flags & DI_O_LOG_FILE_MASK)
	{
	    scb->cs_memory = CS_LIOR_MASK;
	    scb->cs_lior++;
	    Cs_srv_block.cs_wtstatistics.cs_lior_done++;
	    Cs_srv_block.cs_wtstatistics.cs_lior_waits++;
	    Cs_srv_block.cs_wtstatistics.cs_lior_kbytes
		+= bytes_to_read / 1024;
	}
	else
	{
	    scb->cs_memory = CS_DIOR_MASK;
	    scb->cs_dior++;
	    Cs_srv_block.cs_wtstatistics.cs_dior_done++;
	    Cs_srv_block.cs_wtstatistics.cs_dior_waits++;
	    Cs_srv_block.cs_wtstatistics.cs_dior_kbytes
		+= bytes_to_read / 1024;
	}
	/* Clock the read */
	start_time = CS_checktime();
    }

# if defined(OS_THREADS_USED) && !defined(xCL_ASYNC_IO)
    bytes_read = DI_thread_rw( O_RDONLY, diop, buf, bytes_to_read,
 	    			      lseek_offset, NULL, err_code);
# else /* OS_THREADS_USED */
    bytes_read = DI_aio_rw( O_RDONLY, diop, buf, bytes_to_read,
 	    			  lseek_offset, NULL, err_code);
# endif /* OS_THREADS_USED */
    if ( bytes_read != bytes_to_read )
    {
	SETCLERR(err_code, 0, ER_read);

	if (bytes_read == -1)
	{
	    status = DI_BADREAD;
	}
	else
	{
	    status = DI_ENDFILE;
	}
    }

    if ( scb )
    {
	scb->cs_memory &= ~(CS_DIOR_MASK | CS_LIOR_MASK);
	scb->cs_state = saved_state;
	if (f->io_open_flags & DI_O_LOG_FILE_MASK)
	    Cs_srv_block.cs_wtstatistics.cs_lior_time 
		+= CS_checktime() - start_time;
	else
	    Cs_srv_block.cs_wtstatistics.cs_dior_time
		+= CS_checktime() - start_time;
    }

    if ( bytes_read > 0 )
	*n = bytes_read / f->io_bytes_per_page;

    return(status);
}