Пример #1
0
/*{
** Name: DIwrite -  Writes  page(s) of a file to disk.
**
** Description:
**      The DIwrite routine is used to write pages of a direct access 
**      file.  This routine should be flexible enough to write multiple
**      contiguous pages.  The number of pages to write is indicated
**      as an input parameter,  This value is updated to indicate the
**      actual number of pages written.  A synchronous write is preferred
**      but not required.
**   
**	The buffer address from which the data is to be written is examined
**	to see if it is in shared memory. If so, we then instruct the slave
**	to write the page(s) directly from the target buffer. Otherwise, we
**	copy the page(s) from the buffer into the server segment, and then
**	instruct the slave to write the page(s) from the server segment.
**
** Inputs:
**      f                    Pointer to the DI file
**                           context needed to do I/O.
**      n                    Pointer to value indicating number of pages to 
**			     write.
**      page                 Value indicating page(s) to write.
**      buf                  Pointer to page(s) to write.
**      
** Outputs:
**      f                    Updates the file control block.
**      n                    Pointer to value indicating number of pages 
**			     written.
**      err_code             Pointer to a variable used
**                           to return operating system 
**                           errors.
**    Returns:
**          OK
**          DI_BADFILE          Bad file context.
**          DI_BADWRITE         Error writing.
**          DI_BADPARAM         Parameter(s) in error.
**          DI_ENDFILE          Write past end of file.
**	    DI_BADLRU_RELEASE	Error releasing open file.
**    Exceptions:
**        none
**
** Side Effects:
**        none
**
** History:
**    26-mar-87 (mmm)    
**          Created new for 6.0.
**    06-feb-89 (mikem)
**	    Return CL_ERR_DESC from DIlru_open().
**    23-mar-89 (mikem)
**	    update io_system_eof when necessary (bug 4854).
**    10-jul-89 (rogerk & mikem)
**	    When asked to write a page that is past our cached EOF marker, call
**	    DIsense to check the actual EOF before signalling an error.  The
**	    page may have been allocated by a different server and our copy of
**	    the EOF has just not been updated yet.
**
**	    This case can also come up in a single server case now, as we 
**	    continue to cache pages in the buffer manager even when the table 
**	    is closed.  This can result in a DIwrite() being performed on a
**	    newly opened file without ever doing a DIread() (where eof info
**	    was previously obtained).
**    10-jul-89 (mikem)
**	    Return DI_EXCEED_LIMIT if out of disk space, rather than
**	    BAD_WRITE.  Also add some debugging code to make it easier to
**	    test that the server handles out of disk space correctly (by
**	    returning out of disk space out of DIwrite based on a gloal set
**	    by DIalloc every N times called).
**    23-Jan-90 (anton)
**	    Call DI_sense instead of DIsense to prevent multiple DIlru_opens
**	    and use of two CSevcbs.
**	2-Feb-90 (anton)
**	    Don't always copy CL_ERR_DESC
**	6-Feb-90 (jkb)
**	    Change write to IIdio_write which combines the write and lseek
**	    commands and makes direct io available for Sequent
**	5-aug-1991 (bryanp)
**	    Added support for I/O directly from server shared memory,
**	    bypassing the copy through the server segment if possible.
**      03-mar-1992 (jnash)
**          Fix LG slave problem noted when Sun mmap() support
**          introduced, change slave logic to send to the slave the
**          segment id "key" rather than "segid" (segid value not
**          the same in the slave).
**	30-October-1992 (rmuth)
**	    Prototype and make sure we have opened the file before
**	    we close it.
**	30-nov-1992 (rmuth)
**	    - Include <cldio.h>
**	    - DIlru error checking, this was a major restructuring of the
**	      code. No Change in functionality.
**	10-dec-1993 (rmuth)
**	    If fail the past io_allocated_eof test then make sure that we
**	    unset the errno value in CL_ERR_DESC set by SETCLERR. This was
**	    causing confusion as we were logging random errno's to the
**	    errlog.log
**      31-jan-94 (mikem)
**          sir #57671
**          The transfer size of slave I/O is now stored in
**          Cs_srv_block.cs_size_io_buf, rather than a constant
**          DI_FILE_BUF_SIZE.
**	18-apr-1994 (jnash)
**	    fsync project.  Call DIforce() on systems where fsync() exists 
**	    but O_SYNC does not (hopefully never). 
**	20-jun-1995 (amo ICL)
**	    Added call on DI_async_write for async io
**	20-Apr-1998 (merja01)
**		Move "#" to column 1 to correct compile errors on axp_osf.
**	01-oct-1998 (somsa01)
**	    Return DI_NODISKSPACE when we are out of disk space.
**	29-Oct-1998 (schte01)
**		Move "#" to column 1 to correct compile errors on axp_osf.
**	14-Oct-2005 (jenjo02)
**	    Chris's file descriptor properties now cached in io_fprop
**	    (file properties) and established on the first open, 
**	    not every open.
*/
STATUS
DIwrite(
    DI_IO	   *f,
    i4             *n,
    i4        page,
    char           *buf,
    CL_ERR_DESC    *err_code)
{
    STATUS			big_status = OK, small_status = OK, r_status;
    i4			num_of_pages;
    i4			last_page_to_write;
    CL_ERR_DESC    		lerr_code;
    DI_OP			diop;

    /* default returns */

    CL_CLEAR_ERR( err_code );

    num_of_pages = *n;
    *n = 0;
    if (num_of_pages <= 0)
	return (DI_BADPARAM);

    last_page_to_write = page + num_of_pages - 1;

    diop.di_flags = 0;

    if (f->io_type != DI_IO_ASCII_ID)
        return(DI_BADFILE);

    if (f->io_mode != DI_IO_WRITE)
        return(DI_BADWRITE);

    /* Count another write */
    f->io_stat.write++;

    /* 
    ** get open file descriptor for the file
    */
    if (big_status = DIlru_open(f, FALSE, &diop, err_code))
	return(big_status);

    /* 
    ** now check for write within bounds of the file 
    */
    if (last_page_to_write > f->io_alloc_eof) 
    {
	i4	real_eof;

	/*
	** DI_sense updates f->io_alloc_eof with the protection
	** of io_sem (OS_THREADS), so there's no need to
	** duplicate that update here.
	*/
	big_status = DI_sense(f, &diop, &real_eof, err_code);

	if (big_status == OK)
	{
	    if (last_page_to_write > f->io_alloc_eof)
	    {
		small_status = DI_ENDFILE;
		SETCLERR(err_code, 0, ER_write);

		/*
		** The above sets errno as errno will be left over from
		** a previous call zero it out to avoid confusion.
		*/
		err_code->errnum = 0;
	     }
	 }
    }

    if (big_status == OK && small_status == OK)
    {
#ifdef xOUT_OF_DISK_SPACE_TEST
	if ((f->io_open_flags & DI_O_NODISKSPACE_DEBUG) &&
	    (last_page_to_write > f->io_logical_eof)    &&
	    (last_page_to_write <= f->io_alloc_eof))
	{
	    f->io_open_flags &= ~DI_O_NODISKSPACE_DEBUG;

	    small_status = DI_NODISKSPACE;
	    SETCLERR(err_code, 0, ER_write);
	    err_code->errnum = ENOSPC;

	    TRdisplay(
		"DIwrite(): Returning false DI_NODISKSPACE, page %d\n", page);
	}
	else
#endif /* xOUT_OF_DISK_SPACE_TEST */
	    
	{
	    if (Di_slave)
	    {
		big_status = DI_slave_write( f, &diop, buf, page, num_of_pages,
					     err_code );
	    }
	    else
# if defined(OS_THREADS_USED) || defined(xCL_ASYNC_IO)
	    if (Di_async_io)
	    {
		big_status = DI_async_write( f, &diop, buf, page, num_of_pages,
					      err_code );
	    }
	    else
# endif /* OS_THREADS_USED || xCL_ASYNC_IO */
	    {
		big_status = DI_inproc_write( f, &diop, buf, page, num_of_pages,
					      err_code );
	    }

	    if (big_status == OK && small_status == OK)

# if defined(xCL_010_FSYNC_EXISTS) && !defined(O_SYNC)
	    {
		/*
		** Due to lru activity, this code assumes that a force on any 
		** file descriptor forces pages for all open files.  If not 
		** the case, fsync() logic must be installed in the slave.
		*/
		big_status = DIforce( f, err_code );
	    }
	    if (big_status == OK && small_status == OK)
# endif

		*n = num_of_pages;
	}
    }

    r_status = DIlru_release(&diop, &lerr_code);

    if (big_status)
	return( big_status );
    else if (small_status)
	return( small_status );

    return(r_status);

}
Пример #2
0
/*{
** Name: DIgalloc - Allocates N pages to a direct access file.
**
** Description:
**	The DIgalloc routine is used to add pages to a direct access
**	file, the disc space for these pages is guaranteed to exist
**	once the routine returns. The contents of the pages allocated
**	are undefined until a DIwrite to the page has happened.
**
**      This routine can add more than one page at a time by accepting 
**	a count of the number of pages to add.
**   
** Inputs:
**      f                Pointer to the DI file
**                       context needed to do I/O.
**      n                The number of pages to allocate.
**
** Outputs:
**      page             Pointer to variable used to 
**                       return the page number of the
**                       first page allocated.
**      err_code         Pointer to a variable used
**                       to return operating system 
**                       errors.
**    Returns:
**        OK
**        DI_BADEXTEND      	Can't allocate disk space
**        DI_BADFILE        	Bad file context.
**        DI_EXCEED_LIMIT   	Too many open files.
**	  DI_BADLRU_RELEASE	Problem releasing open file.
**
**        If running with slaves  :
**	  DI_GENERAL_ERR	More info in the err_code.intern field.
**	  
**	  If running without slaves :
**	  DI_BADINFO	    	Error finding current end-of-file.
**				
**    Exceptions:
**        none
**
** Side Effects:
**        none
**
** History:
**    30-October-1992 (rmuth)
**	Created.
**    30-nov-1992
**	    Call IIdio_get_file_eof instead of lseek as this can
**	    deal with both raw and ordinary files. Change some
**	    types accordingly.
**      10-mar-1993 (mikem)
**          Changed the type of the first parameter to DI_send_ev_to_slave() and
**          the 2nd parameter to DI_slave_send(), so that DI_send_ev_to_slave()
**          could access the slave control block's status.
**          This routine will now initialize the status to DI_INPROGRESS, before
**          making the request and the slave will change the status once the
**          operation is complete.
**	30-feb-1993 (rmuth)
**	    Use the global DIzero_buffer as opposed to the local
**	    zero_alloc.
**	23-jul-1996 (canor01)
**	    Semaphore protect the lseek/write combination when used with
**	    operating-system threads.
**	06-Jan-2005 (jenjo02)
**	    Fix inproc code to use proper version of write/pwrite/pwrite64
**	    instead of always IIdio_write. View bytes-to-be-written in
**	    page terms rather than bytes, expand DI_ZERO_BUFFER_SIZE.
**	30-Sep-2005 (jenjo02)
**	    Ok to use pwrite if FD_PER_THREAD.
**	14-Oct-2005 (jenjo02)
**	    Chris's file descriptor properties now cached in io_fprop
**	    (file properties) and established on the first open, 
**	    not every open.
**
** Design Details:
**
**	UNIX DESIGN:
**
**	Since the only way that UNIX allows one to allocate space to a
**	file is to write to the file. This routine will write zero
**	filled data to file for the required number of pages.
**
**	If we are running with slaves we will request the slave process
**	to do the actual I/O otherwise it is done inline.
**
**	For more information on DI see di.h
*/
STATUS
DIgalloc(
    DI_IO         	*f,
    i4	        n,
    i4	        *page,
    CL_ERR_DESC		*err_code )
{
    i4		end_of_file;
    STATUS	status = OK;
    DI_OP	diop;

    CL_CLEAR_ERR( err_code );

    /* Check file control block pointer, return if bad. */

    if (f->io_type != DI_IO_ASCII_ID)
	return(DI_BADFILE);

    /* Count a galloc */
    f->io_stat.galloc++;

    do
    {
        /* 
	** get file descriptor for this file 
	*/
        status = DIlru_open(f, FALSE, &diop, err_code);
	if ( status != OK )
	    break;

	status = DI_galloc( f, n,&diop, &end_of_file, err_code );

	if ( status != OK )
	{
	    CL_ERR_DESC lerr_code;

	    (VOID) DIlru_release(&diop, &lerr_code);
	}
	else
	{
	    status = DIlru_release(&diop, err_code);
	}

	/*
	** If we failed to extend the table then do not update 
	** the following
	*/
	if (status == OK )
	{
	    /*
	    ** Return page number of first page allocated
	    */
	    *page = end_of_file + 1;

#ifdef xDEV_TST
	    TRdisplay(
		    "DIgalloc: file: %t/%t, alloc_eof: %d, first: %d, count: %d\n",
		      f->io_l_pathname, f->io_pathname,
		      f->io_l_filename, f->io_filename,
		      f->io_alloc_eof, *page, n);
#endif
       
	}

    } while (FALSE);


    return( status );
}
Пример #3
0
/*{
** Name: DIforce - Forces all pages to the disk.
**
** Description:
**      The DIforce routine is used to force all pages held by an operating 
**	system to disk.  This is not necessary on VMS so this routine will just
**	return.  This routine should wait for completion of all I/O to insure 
**	all pages are correctly on disk.  If an error occurs it should return 
**	DI_BADWRITE.
**   
** Inputs:
**      f                    Pointer to the DI file
**                           context needed to do I/O.
**      
** Outputs:
**      err_code             Pointer to a variable used
**                           to return operating system 
**                           errors.
**    Returns:
**          OK
**          DI_BADFILE       Bad file context.
**          DI_BADWRITE      Error forcing pages to disk.
**    Exceptions:
**        none
**
** Side Effects:
**        none
**
** History:
**    26-mar-87 (mmm)    
**          Created new for 6.0.
**    21-jan-89 (mikem)
**	    DI_SYNC_MASK support (O_SYNC, fsync()).
**	06-feb-89 (mikem)
**	    Added better support for DI CL_ERR_DESC, including initializing to 
**	    zero and passing back DIlru_open() err_code info.  And ifdef'd
**	    variables only used by "FSYNC" case to shut up lint.
**	21-Apr-89 (GordonW)
**	    change "#ifdef FSYNC_EXISTS" -> it's correct xCL_xx define.
**	2-Feb-90 (anton)
**	    Don't always copy CL_ERR_DESC
**	25-sep-1991 (mikem) integrated following change: 27-jul-91 (mikem)
**	    DIopen() now sets the flag DI_O_FSYNC_MASK if fsync() should be
**	    used to sync a DIforce of a file.  Change the code to use 
**	    DI_O_FSYNC_MASK rather than DI_SYNC_MASK.
**	30-nov-1992 (rmuth)
**	    - Prototype.
**	    - Add error checking.
**      10-mar-1993 (mikem)
**          Changed the type of the first parameter to DI_send_ev_to_slave() and
**          the 2nd parameter to DI_slave_send(), so that DI_send_ev_to_slave()
**          could access the slave control block's status.
**          This routine will now initialize the status to DI_INPROGRESS, before
**          making the request and the slave will change the status once the
**          operation is complete.
**	18-apr-1994 (jnash)
**   	    fsync project.  DIforce() now calls fsync unconditionally
**	    (assuming that it exists).
**	14-Oct-2005 (jenjo02)
**	    Chris's file descriptor properties now cached in io_fprop
**	    (file properties) and established on the first open, 
**	    not every open.
**	11-Jan-2008 (kschendel) b122122
**	    Force has long been a no-op on unix, but that's incorrect.
**	    It should fsync or fdatasync the file unless the file is
**	    already open in sync mode.
*/
STATUS
DIforce(
    DI_IO          *f,
    CL_ERR_DESC     *err_code)
{
    STATUS			status = OK;

#ifdef xCL_010_FSYNC_EXISTS
    DI_OP			diop;
#endif /* FSYNC_EXISTS */

    /* default return values */
    CL_CLEAR_ERR( err_code );

    /* Check file control block pointer, return if bad. */

    if (f->io_type != DI_IO_ASCII_ID)
	return(DI_BADFILE);

    /* Count a force */
    f->io_stat.force++;


    /* Don't do anything to the file if it's open O_SYNC or with direct
    ** IO.  Otherwise the caller wants the file sync'ed, so do it.
    */

    if ( (f->io_open_flags & DI_O_OSYNC_MASK) == 0
      && (f->io_fprop & FPROP_DIRECT) == 0)
    {
#if ! defined(xCL_010_FSYNC_EXISTS)
	/* Yarggh!  no fsync and file has no osync.  This must be some
	** obsolete or improperly ported platform.  Use global sync.
	*/
	sync();

#else

	do
	{
	    /* 
	    ** get file descriptor for this file 
	    */
	    status = DIlru_open(f, FALSE, &diop, err_code);
	    if ( status != OK )
		break;

	    status = DI_force( f, &diop, err_code );
	    if ( status != OK )
	    {
		CL_ERR_DESC lerr_code;

		(VOID) DIlru_release(&diop, &lerr_code);
	    }
	    else
	    {
		status = DIlru_release(&diop, err_code);
	    }

	} while (FALSE);
#endif /* FSYNC_EXISTS */
    }


    return (status);
}
Пример #4
0
/*{
** Name: do_writev -  Perform writev() call.
**
** Description:
**	This function collects the queued write requests, 
**	chooses the optimum function to perform the write(s),
**	and invokes the completion handler for each request.
**
** Inputs:
**	DI_TGIO * tgio  - Control block for current thread. 
**      
** Outputs:
**    None.
**
** Returns:
**    OK
**    FAIL - One of more of the write requests failed.
**
**    Exceptions:
**        none
**
** Side Effects:
**	  The completion handler for each I/O request is invoked.
**
** History:
**	19-May-1999 (jenjo02)
**	    Created.
**	09-Jul-1999 (jenjo02)
**	    If queued list is ordered, skip the quicksort.
**	09-Apr-2001 (jenjo02)
**	    Increment first gio's io_count stat for each physical I/O,
**	    gw_pages for multi-page writes.
**	05-Nov-2002 (jenjo02)
**	    Cleaned up use of io_sem: only write() and writev() need
**	    the mutex to protect the (separate) seek. pwrite(64)
**	    atomically seeks and does not need the mutex.
**	25-Aug-2005 (schka24)
**	    Don't bother with IO timing, too slow on some platforms (Linux)
**	    and the results aren't all that interesting.
**	14-Oct-2005 (jenjo02)
**	    Chris's file descriptor properties now cached in io_fprop
**	    (file properties) and established on the first open, 
**	    not every open.
**	24-Jan-2006 (jenjo02)
**	    Break on change in file ("f"), then lru-open to get an
**	    FD, do the write(v), and lru_release the FD. This keeps
**	    gather_write from hogging FDs while waiting for the
**	    signal to actually do something.
**	15-Mar-2006 (jenjo02)
**	    f->io_sem not needed if running with thread affinity,
**	    the fd is not shared by multiple threads.
*/
static STATUS
do_writev( DI_TGIO * tgio, CL_ERR_DESC *err_code )
{
    CL_ERR_DESC lerr_code;
    STATUS 	big_status = OK, small_status = OK;
    i4 		i, j, k;
    DI_GIO 	*gio, *first_gio;
    DI_IO	*f;
    DI_OP	*diop;
    OFFSET_TYPE next_offset, lseek_offset;
    i4		bytes_to_write, bytes_written;
    i4		saved_state;

    i4		num_writev = 0, num_write = 0;
    i4		num_writev_gio = 0, num_write_gio = 0;

#if defined(sgi_us5)
    if( iov_max == 0 )
    {
	iov_max = sysconf(_SC_IOV_MAX);
	if( iov_max <= 0 )
        {
	    iov_max = 16;	/* arbitrary minimum value */
#ifdef DEBUG_THIS_PUPPY
	    TRdisplay("%@ %x do_writev: %t ERROR sysconf failed with %d\n",
		    tgio->tgio_scb->cs_self, 
		    f->io_l_filename, f->io_filename,
		    iov_max);
#endif /* DEBUG_THIS_PUPPY */
        }
        else if( iov_max > 2048 )
        {
	    iov_max = 2048;	/* arbitrary maximum value */
        }
    }
#else
    iov_max = IOV_MAX;
#endif

    /* If unordered, sort the queued list into file,offset order */
    if ( tgio->tgio_state & TGIO_UNORDERED )
    {
	gio_sort( tgio->tgio_queue, 0, tgio->tgio_queued-1 );
	tgio->tgio_state &= ~(TGIO_UNORDERED);
    }


    /*
    ** Method:
    **
    **	Collect requests by file/offset into an iovec until
    **	the next file offset becomes discontiguous. Additionally, if
    **	the buffer addresses are contiguous, colaesce those requests.
    **
    **  Up to IOV_MAX iovecs can be written by a single writev().
    **
    **	If but a single iovec results, the probably-more-efficient
    **	function (p)write() is called instead of writev().
    */
    k = 0;

    while ( (j = k) < tgio->tgio_queued )
    {
	#if defined(sgi_us5)
		struct iovec iov[iov_max];
	#else
		struct iovec iov[IOV_MAX];
	#endif

	/*
	** "i" indexes the current iovec element
	** "j" is the first GIO used in this iovec array
	** "k" is the current GIO in the queue
	*/
	i = 0;
	
	gio = first_gio = tgio->tgio_queue[j];
	f = gio->gio_f;
	lseek_offset = next_offset = gio->gio_offset;
	small_status = OK;

	iov[0].iov_base = gio->gio_buf;
	iov[0].iov_len  = 0;

	do
	{
	    /* If this buffer is contiguous with previous, coalesce it */
	    if ( (char *)iov[i].iov_base + iov[i].iov_len == gio->gio_buf )
	    {
		iov[i].iov_len += gio->gio_len;
	    }
	    /* Initialize next iovec if any remain */
	    else if ( i < iov_max - 1 )
	    {
		i++;
		iov[i].iov_base = gio->gio_buf;
		iov[i].iov_len  = gio->gio_len;
	    }
	    else
		break;

	    next_offset += gio->gio_len;

	} while ( ++k < tgio->tgio_queued
		    && (gio = tgio->tgio_queue[k])
		    && gio->gio_f == f
		    && gio->gio_offset == next_offset );

	/* "k" indexes the next, unprocessed GIO */

	bytes_to_write = next_offset - lseek_offset;
	
	saved_state = tgio->tgio_scb->cs_state;
	tgio->tgio_scb->cs_state = CS_EVENT_WAIT;
	tgio->tgio_scb->cs_memory = CS_DIOW_MASK;

	/* Accumulate multi-page write stats */
	if ( k - j > 1 )
	{
	    /*
	    ** Using the first gio, count
	    ** the number of multi-pages written (k-j)
	    ** and a single I/O.
	    */
	    if ( first_gio->gio_io_count )
		++*first_gio->gio_io_count;
	    if ( first_gio->gio_gw_pages )
		*first_gio->gio_gw_pages += k - j;
	}

	/* Count a single I/O write for server */
	tgio->tgio_scb->cs_diow++;
	Cs_srv_block.cs_wtstatistics.cs_diow_done++;

	/* Count a single I/O wait for server */
	Cs_srv_block.cs_wtstatistics.cs_diow_waits++;

	/* Accumulate number of KB written by this I/O */
	Cs_srv_block.cs_wtstatistics.cs_diow_kbytes
	    += bytes_to_write / 1024;
	
	/* Now get an FD to do the write(v) */
	diop = (DI_OP*)&first_gio->gio_diop;
	if ( big_status = DIlru_open(f, FALSE, diop, err_code) )
	    return(big_status);

#ifdef DEBUG_THIS_PUPPY
	{
	    i4	x;
	    i8	offset = lseek_offset;

	    TRdisplay("%@ %p do_writev: %~t doing %d todo %d fd %d lseek from %ld\n",
		    tgio->tgio_scb->cs_self, 
		    f->io_l_filename, f->io_filename,
		    i+1, tgio->tgio_queued - j,
		    diop->di_fd, offset);
	    for (x = 0; x <= i; x++)
	    {
	TRdisplay("%@ do_writev: iovec[%d] base %p bytes %d (page %d for %d)\n",
			x,
			iov[x].iov_base, iov[x].iov_len,
			(i4)(offset/f->io_bytes_per_page),
			iov[x].iov_len/f->io_bytes_per_page);
		offset += iov[x].iov_len;
	    }
	}
#endif /* DEBUG_THIS_PUPPY */

	/* If more than one iovec, seek and use writev */
	if ( i++ )
	{
	    /* writev needs seek mutex protection */
	    if ( !Di_thread_affinity )
		CS_synch_lock( &f->io_sem );
	    
	    num_writev++;
	    num_writev_gio += k - j;

	    bytes_written = 
		IIdio_writev( diop->di_fd, 
				(char *)iov,
				i,
				lseek_offset, 0, 
				f->io_fprop,
				err_code);
	    if ( !Di_thread_affinity )
		CS_synch_unlock( &f->io_sem );
	}
	else
	{
	    num_write++;
	    num_write_gio += k - j;

# if  !defined(xCL_NO_ATOMIC_READ_WRITE_IO)
	    /* pwrite(64) needs no seek mutex protection */
	    bytes_written =
#ifdef LARGEFILE64
	     pwrite64( diop->di_fd, 
			iov[0].iov_base, 
			bytes_to_write, 
			lseek_offset );
#else /*  LARGEFILE64 */
	     pwrite( diop->di_fd,
			iov[0].iov_base, 
			bytes_to_write, 
			lseek_offset );
#endif /* LARGEFILE64 */
	    if (bytes_written != bytes_to_write)
		SETCLERR(err_code, 0, ER_write);
# else /* !xCL_NO_ATOMIC_READ_WRITE_IO */
	    /* write() needs seek mutex protection */
	    if ( !Di_thread_affinity )
		CS_synch_lock( &f->io_sem );

	    bytes_written =
	     IIdio_write( diop->di_fd,
			    iov[0].iov_base, 
			    bytes_to_write, 
			    lseek_offset, 0, 
			    f->io_fprop,
			    err_code );
	    if ( !Di_thread_affinity )
		CS_synch_unlock( &f->io_sem );

# endif /* !xCL_NO_ATOMIC_READ_WRITE_IO */
	}

	/* Release the FD */
	(VOID)DIlru_release( diop, &lerr_code );
	    
	tgio->tgio_scb->cs_memory &= ~(CS_DIOW_MASK);
	tgio->tgio_scb->cs_state = saved_state;

	if (bytes_written != bytes_to_write)
	{
	    switch ( err_code->errnum )
	    {
		case EFBIG:
		    small_status = DI_BADEXTEND;
		    break;
		case ENOSPC:
		    small_status = DI_EXCEED_LIMIT;
		    break;
#ifdef EDQUOTA
		case EDQUOT:
		    small_status = DI_EXCEED_LIMIT;
		    break;
#endif
		default:
		    if (err_code->errnum == 0)
			small_status = DI_ENDFILE;
		    else
			small_status = DI_BADWRITE;
		    break;
	    }
	    /* Preserve the worst status from all the writes */
	    big_status = (big_status) ? big_status : small_status;
	}

	/* Invoke completion handler for each GIO written */
	do 
	{
	    gio = tgio->tgio_queue[j];
	    (gio->gio_evcomp)( gio->gio_data, small_status, err_code );

	} while ( ++j < k );
    }

#ifdef DEBUG_THIS_PUPPY
    TRdisplay("%@ %p do_writev: %d write requests completed using %d(%d) writev, %d(%d) write\n",
		tgio->tgio_scb->cs_self, 
		tgio->tgio_queued, 
		num_writev, num_writev_gio,
		num_write, num_write_gio);
#endif /* DEBUG_THIS_PUPPY */

    /* Clear the queued count(s) */
    tgio->tgio_queued = *tgio->tgio_uqueued = 0;

    return( big_status );
}
Пример #5
0
/*{
** Name: DIread - Read a page of a file.
**
** Description:
**      The DIread routine is used to read pages of a direct access 
**      file.   For the large block read option, the number of pages
**      to read is an input parameter to this routine.  It will
**      return the number of pages it read, since at
**      end of file it may read less pages than requested.
**      If multiple page reads are requested, the buffer is assumed
**      to be large enough to hold n pages.  The size of a page is 
**      determined at create.
**      
**	BUG FIX WORKAROUND (b4854):
**	
**	The current mainline code (in the case of reading from a 
**	temporary file) expects that a DIread past logical
**	end of file, but within allocated end of file will return with
**	no error.  The value of the data retrieved is undefined.  To
**	make the current code work, DIread on unix has been changed to
**	meet these requirements, but it hoped that mainline code in the
**	future will be changed to not rely on this behaviour.
**
**	The buffer address into which the data is to be read is examined to
**	see if it is in shared memory. If so, we then instruct the slave to
**	read the page(s) directly into the target buffer. Otherwise, we read
**	the page(s) into the buffer in the server segment, and then copy the
**	data from the server segment to the target address.
**      
** Inputs:
**      f                    Pointer to the DI file
**                           context needed to do I/O.
**      n                    Pointer to value of number of pages to read.
**      page                 Value indicating page to begin reading.
**      buf                  Pointer to the area to hold
**                           page(s) being read.
**      
** Outputs:
**      n                    Number of pages read.
**      f                    Updates the file control block.
**      buf                  Pointer to the page read.
**      err_code             Pointer to a variable used
**                           to return operating system 
**                           errors.
**    Returns:
**          OK
**          DI_BADFILE       	Bad file context.
**          DI_BADREAD       	Error reading file.
**          DI_BADPARAM      	Parameter(s) in error.
**          DI_ENDFILE       	Not all blocks read.
**	    DI_BADLRU_RELEASE	Error releasing file.
**    Exceptions:
**        none
**
** Side Effects:
**        none
**
** History:
**    26-mar-87 (mmm)    
**          Created new for 6.0.
**    23-mar-89 (mmm)
**	    bug fix for b4854 (see bug fix workaround comments in header and
**	    in code.)
**	25-Apr-89 (GordonW)
**	    Don't use "bcopy" but use MECOPY macro call. bcopy is unportable.
**	07-may-89 (russ)
**	    Add missing semicolon to MECOPY_VAR_MACRO.
**	2-Feb-90 (anton)
**	    Don't always copy CL_ERR_DESC
**	6-Feb-90 (jkb)
**	    Add IIdio_read so direct io is available for Sequent.
**	5-aug-1991 (bryanp)
**	    Added support for I/O directly to server shared memory, bypassing
**	    the copy through the server segment if possible.
**	12-dec-1991 (bryanp)
**	    Added support for DIread on a raw log file for LG's use. In this
**	    case, all that had to happen was to replace the direct "lseek"
**	    call with a call to the IIdio code which supports file size
**	    determination for raw log files.
**      03-mar-1992 (jnash)
**          Fix LG slave problem noted when Sun mmap() support
**          introduced, change slave logic to send to the slave the
**          segment id "key" rather than "segid" (segid value not
**          the same in the slave).
**	30-October-1992 (rmuth)
**	    Prototype.
**	30-nov-1992 (rmuth)
**	    - Use DI_sense to find out the size of a file.
**	    - DIlru error checking
**      10-dec-1993 (rmuth)
**          If fail the past io_allocated_eof test then make sure that we
**          unset the errno value in CL_ERR_DESC set by SETCLERR. This was
**          causing confusion as we were logging random errno's to the
**          errlog.log
**      31-jan-94 (mikem)
**          sir #57671
**          The transfer size of slave I/O is now stored in
**          Cs_srv_block.cs_size_io_buf, rather than a constant
**          DI_FILE_BUF_SIZE.
**	20-jun-1995 (amo ICL)
**	    Added call on DI_async_read for async io
**	14-Oct-2005 (jenjo02)
**	    Chris's file descriptor properties now cached in io_fprop
**	    (file properties) and established on the first open, 
**	    not every open.
*/
STATUS
DIread(
    DI_IO	   *f,
    i4        *n,
    i4        page,
    char           *buf,
    CL_ERR_DESC     *err_code )
{
    STATUS		small_status = OK, big_status = OK, r_status;
    i4		num_of_pages;
    i4		last_page_to_read;
    DI_OP		diop;
    CL_ERR_DESC         lerr_code ;


    /* default returns */
    CL_CLEAR_ERR( err_code );

    num_of_pages = *n;
    last_page_to_read = page + num_of_pages - 1;

    *n = 0;
    diop.di_flags = 0;
    if (num_of_pages <= 0)
	return (DI_BADPARAM);

    /* 
    ** check file control block pointer, return if bad. 
    */
    if (f->io_type != DI_IO_ASCII_ID)
	return( DI_BADFILE );

    /* Count another read */
    f->io_stat.read++;

    if (big_status = DIlru_open(f, FALSE, &diop, err_code))
	return(big_status);

    /*
    ** Sanity check to make sure we are reading within the bounds of
    ** the file. Note: we may still be reading garbage pages--it is
    ** up to the upper layers to guarantee that we are not doing this
    */
    if (last_page_to_read > f->io_alloc_eof )
    {
	i4 	real_eof;

	/*
	** DI_sense updates f->io_alloc_eof with the protection
	** of io_sem (OS_THREADS), so there's no need to
	** duplicate that update here.
	*/
	big_status = DI_sense(f, &diop, &real_eof, err_code);

	if (big_status == OK)
	{
	    if (last_page_to_read > f->io_alloc_eof)
	    {
		small_status = DI_ENDFILE;
		SETCLERR(err_code, 0, ER_read);

		/*
		** The above sets errno as errno will be left over from
		** a previous call zero it out to avoid confusion.
		*/
		err_code->errnum = 0;
	    }
	}
    }

    if (big_status == OK && small_status == OK)
    {
	if (Di_slave)
	{
	    big_status = DI_slave_read( f, &diop, buf, page, num_of_pages,
					n, err_code );
	}
	else
# if defined(OS_THREADS_USED) || defined(xCL_ASYNC_IO)
	if (Di_async_io)
	{
	    big_status = DI_async_read( f, &diop, buf, page, num_of_pages,
					 n, err_code );
	}
	else
# endif /* OS_THREADS_USED || xCL_ASYNC_IO */
	{
	    big_status = DI_inproc_read( f, &diop, buf, page, num_of_pages,
					 n, err_code );
	}
    }

    r_status = DIlru_release(&diop, &lerr_code);

    if ( big_status )
	return( big_status );
    else if (small_status)
	return( small_status);
    return(r_status);

}