コード例 #1
0
ファイル: diwrite.c プロジェクト: saqibjamil/Ingres
/*{
** Name: DI_slave_write -  Request a slave to writes page(s) to a file on disk.
**
** Description:
**	This routine was created to make DIwrite more readable once
**	error checking had been added. See DIwrite for comments.
**
** Inputs:
**      f                    Pointer to the DI file
**                           context needed to do I/O.
**	diop		     Pointer to dilru file context.
**      buf                  Pointer to page(s) to write.
**      page                 Value indicating page(s) to write.
**	num_of_pages	     number of pages to write
**      
** Outputs:
**      err_code             Pointer to a variable used
**                           to return operating system 
**                           errors.
**    Returns:
**          OK
**	    other errors.
**    Exceptions:
**        none
**
** Side Effects:
**        none
**
** History:
**    30-nov-1992 (rmuth)
**	    Created.
**      10-oct-1993 (mikem)
**          bug #47624
**          Bug 47624 resulted in CSsuspend()'s from the DI system being woken
**          up early.  Mainline DI would then procede while the slave would
**          actually be processing the requested asynchronous action.  Various
**          bad things could happen after this depending on timing: (mainline
**          DI would change the slave control block before slave read it,
**          mainline DI would call DIlru_release() and ignore it failing which
**          would cause the control block to never be freed eventually leading
**          to the server hanging when it ran out of slave control blocks, ...
**
**          Fixes were made to scf to hopefully eliminate the unwanted
**          CSresume()'s.  In addition defensive code has been added to DI
**          to catch cases of returning from CSresume while the slave is
**          operating, and to check for errors from DIlru_release().  Before
**          causing a slave to take action the master will set the slave
**          control block status to DI_INPROGRESS, the slave in turn will not
**          change this status until it has completed the operation.
**
**          The off by one error was caused by the CSsuspend called by 
**	    DI_slave_send() returning early in the case of a DIwrite() of one 
**	    page.  The old write loop would increment disl->pre_seek before the
**	    slave had actually read the control block so the slave would write 
**	    the data from page N to the N+1 location in the file.  The 
**	    DI_INPROGRESS flag should stop this, and at least in the one page 
**	    write case we no longer increment disl->pre_seek.
**	23-aug-1993 (bryanp)
**	    If segment isn't yet mapped, map it!
**	01-oct-1998 (somsa01)
**	    Return DI_NODISKSPACE when we are out of disk space.
*/
static STATUS
DI_slave_write(
    DI_IO	*f,
    DI_OP	*diop,
    char        *buf,
    i4	page,
    i4	num_of_pages,
    CL_ERR_DESC *err_code)
{
    register DI_SLAVE_CB	*disl;
    ME_SEG_INFO			*seginfo;
    bool			direct_write;
    STATUS			big_status = OK, small_status = OK;
    STATUS			intern_status = OK, status;

    /* unix variables */
    int		bytes_to_write;

    do
    {
        disl = diop->di_evcb;
	bytes_to_write = (f->io_bytes_per_page * (num_of_pages));

	/*
	** Determine whether we're writing from shared memory, and set
	** up the segment ID and offset correctly.
	*/
	seginfo = ME_find_seg( buf, (char *)buf + bytes_to_write,
			       &ME_segpool);

	if (seginfo != 0 && (seginfo->flags & ME_SLAVEMAPPED_MASK) == 0)
	{
	    status = DI_lru_slmapmem(seginfo, &intern_status, &small_status);
	    if (status)
		break;
	}

        if (seginfo != 0 && (seginfo->flags & ME_SLAVEMAPPED_MASK) != 0)
	{
	    direct_write = TRUE;
	    MEcopy( (PTR)seginfo->key, sizeof(disl->seg_key),
	            (PTR)disl->seg_key);

	    disl->seg_offset = (char *)buf - (char *)seginfo->addr;
	}
	else
	{
	    direct_write = FALSE;
	    seginfo = ME_find_seg(disl->buf, disl->buf, &ME_segpool);
	    if (seginfo)
	    {
	        MEcopy( (PTR)seginfo->key, sizeof(disl->seg_key),
			(PTR)disl->seg_key);

	        disl->seg_offset= (char *)disl->buf - (char *)seginfo->addr;
	    }
	    else
	    {
	        small_status = DI_BADWRITE;
	    	break;
	    }
	}

	/* Send file properties to slave */
	FPROP_COPY(f->io_fprop,disl->io_fprop);

	disl->pre_seek = 
	    (OFFSET_TYPE)(f->io_bytes_per_page) * (OFFSET_TYPE)(page);
	disl->file_op = DI_SL_WRITE;

	/*
	** Write the data
	*/
	do 
	{
	    if (direct_write)
		disl->length = bytes_to_write;
	    else
	    {
		disl->length = min(bytes_to_write, Cs_srv_block.cs_size_io_buf);
		MEcopy((PTR)buf, disl->length, (PTR)disl->buf);
	    }

	    DI_slave_send( disl->dest_slave_no, diop,
		           &big_status, &small_status, &intern_status);

	    if (( big_status != OK ) || (small_status != OK ))
		break;

	    if ((small_status = disl->status) != OK )
	    {
		STRUCT_ASSIGN_MACRO(disl->errcode, *err_code);
	    }

	    if ((small_status != OK) || (disl->length == 0))
	    {
		switch( err_code->errnum )
		{
		case EFBIG:
		    small_status = DI_BADEXTEND;
		    break;
		case ENOSPC:
		    small_status = DI_NODISKSPACE;
		    break;
#ifdef EDQUOT
		case EDQUOT:
		    small_status = DI_EXCEED_LIMIT;
		    break;
#endif
		default:
		    small_status = DI_BADWRITE;
		    break;
		}

		break;
	    }

	    bytes_to_write -= disl->length;
	    buf += disl->length;
	    if (bytes_to_write > 0)
		disl->pre_seek += (OFFSET_TYPE)disl->length;

	} while ( bytes_to_write > 0);

    } while (FALSE);


    if (big_status != OK )
	small_status = big_status;

    if (small_status != OK )
        DIlru_set_di_error( &small_status, err_code, intern_status,
			    DI_GENERAL_ERR);

    return( small_status );
}
コード例 #2
0
static STATUS
DI_force(
    DI_IO	*f,
    DI_OP	*diop,
    CL_ERR_DESC	*err_code)
{

    STATUS	big_status = OK, small_status = OK, intern_status = OK;
    register DI_SLAVE_CB	*disl;

    do
    {
        if (Di_slave)
	{
	    disl = diop->di_evcb;
	    disl->file_op = DI_SL_SYNC;
	    /* Send file properties to slave */
	    FPROP_COPY(f->io_fprop,disl->io_fprop);

	    DI_slave_send( disl->dest_slave_no, diop,
			   &big_status, &small_status, &intern_status);

	    if (big_status != OK )
	        break;

	    if ( small_status == OK )
	    {
	        if ((small_status = disl->status) != OK )
	        {
		    STRUCT_ASSIGN_MACRO(disl->errcode, *err_code);
		}
	    }
	}
	else
	{
	    /* 
	    ** put code in here for fsync issues 
	    */
#ifdef xCL_ASYNC_IO
            if( Di_async_io)
	    {
                DI_AIOCB *aio;
                aio=DI_get_aiocb();
#ifdef dr6_us5
                aio->aio.aio_filedes=diop->di_fd;
#else
                aio->aio.aio_fildes=diop->di_fd;
#endif /* dr6_us5 */
#ifdef LARGEFILE64
                if(aio_fsync64( O_SYNC, &aio->aio))
#elif defined(any_aix)
                if(fsync( aio->aio.aio_fildes ))
#else
                if(aio_fsync( O_SYNC, &aio->aio))
#endif /* LARGEFILE64 */
                {
	        SETCLERR(err_code, 0, ER_fsync);
                    small_status = FAIL;
                    break;
                }
                else
                {
                    if( (small_status=CSsuspend( CS_DIOW_MASK, 0, 0) ) != OK)
                    {
                        DIlru_set_di_error( &small_status, err_code, 
				DI_LRU_CSSUSPEND_ERR, DI_GENERAL_ERR);
		        break;
                    }
#if defined(axp_osf) 
                    if (  (aio_error(&aio->aio)) != 0 )
#else
#ifdef LARGEFILE64
                    if (  (aio_error64(&aio->aio)) != 0 )
#else /* LARGEFILE64 */
                    if (  (aio_error(&aio->aio)) != 0 )
#endif /* LARGEFILE64 */
#endif
                    {
	                SETCLERR(err_code, 0, ER_fsync);
	                small_status = FAIL;
                        break;
                    }
                }
            }
            else 
#endif /* xCL_ASYNC_IO */
	    if (FSYNC(diop->di_fd) < 0)
	    {
#ifdef xCL_092_NO_RAW_FSYNC
		/* AIX returns EINVAL on character special files */
		if (errno != EINVAL) 
#endif /* xCL_092_NO_RAW_FSYNC */
		{
	            SETCLERR(err_code, 0, ER_fsync);
	            small_status = FAIL;
		}
	    }
	}

    } while (FALSE);

    if ( big_status != OK )
	small_status = big_status;

    if ( small_status != OK  )
        DIlru_set_di_error( &small_status, err_code, intern_status,
			    DI_GENERAL_ERR);

    return( small_status );
}
コード例 #3
0
static STATUS
DI_galloc(
    DI_IO	*f,
    i4	n,
    DI_OP	*diop,
    i4		*end_of_file,
    CL_ERR_DESC *err_code)
{
    STATUS                      big_status = OK, small_status =OK;
    STATUS                      intern_status = OK;
    register DI_SLAVE_CB        *disl;
    i4				last_page;
    OFFSET_TYPE			lseek_ret;

    do
    {
# ifdef OS_THREADS_USED
	/* Seek/write must be semaphore protected */
	if ((f->io_fprop & FPROP_PRIVATE) == 0)
	    CS_synch_lock( &f->io_sem );
# endif /* OS_THREADS_USED */

        if (Di_slave)
        {
	    disl = diop->di_evcb;

	    disl->file_op = DI_SL_ZALLOC;
	    disl->length = n * f->io_bytes_per_page;
	    /* Pass file properties to slave */
	    FPROP_COPY(f->io_fprop,disl->io_fprop);

	    DI_slave_send( disl->dest_slave_no, diop,
			   &big_status, &small_status, &intern_status );

	    if (( big_status != OK ) || ( small_status != OK ))
		break;

	    if ( disl->status != OK )
	    {
	        STRUCT_ASSIGN_MACRO(disl->errcode, *err_code);
	        small_status = DI_BADEXTEND;
	        break;
	     }
	     else
	     {
	        lseek_ret = disl->length;
	     }

	}
    	else
    	{
	    /* 
	    ** Running without slaves 
	    */
	    OFFSET_TYPE	lseek_offset;
	    i8		reservation;
	    i4		buf_size;
	    i4		bytes_written;
	    i4		pages_remaining = n;
	    i4		pages_at_a_time = Di_zero_bufsize /
					  f->io_bytes_per_page;

	    /* find current end-of-file */

	    lseek_ret = IIdio_get_file_eof(diop->di_fd, f->io_fprop);
	    if ( lseek_ret == (OFFSET_TYPE)-1L )
	    {
	    	SETCLERR(err_code, 0, ER_lseek);
	    	small_status = DI_BADINFO;
		break;
	    }
	    else
	    {
		lseek_offset = lseek_ret;
		/* If this filesystem can do reservations, see if we
		** should reserve more space.
		** Even though we have to write the zeros anyway, the
		** reservation may well be larger than the zeroing
		** buffer, and this way helps maintain contiguity.
		** Not worth it for tiny writes.
		*/
		if (pages_remaining > 2
		  && FPROP_ALLOCSTRATEGY_GET(f->io_fprop) == FPROP_ALLOCSTRATEGY_RESV)
		{
		    reservation = lseek_offset + (pages_remaining * f->io_bytes_per_page);
		    if (reservation > f->io_reserved_bytes)
		    {
			/* Re-check in case some other server reserved */
			small_status = IIdio_get_reserved(diop->di_fd,
				&f->io_reserved_bytes, err_code);
			if (small_status == OK && reservation > f->io_reserved_bytes)
			{
			    small_status = IIdio_reserve(diop->di_fd,
					f->io_reserved_bytes,
					reservation - f->io_reserved_bytes,
					err_code);
			    if (small_status == OK)
			    {
				f->io_reserved_bytes = reservation;
			    }
			    else
			    {
				if (small_status != DI_BADFILE)
				    break;
				/* Fallocate not supported, turn off
				** "reserve" strategy, continue without.
				*/
				small_status = OK;
				FPROP_ALLOCSTRATEGY_SET(f->io_fprop, FPROP_ALLOCSTRATEGY_VIRT);
			    }
			}
		    }
		} /* end reservations */

		while ( pages_remaining > 0 )
		{
		    if ( pages_remaining < pages_at_a_time )
			buf_size = pages_remaining *
				    f->io_bytes_per_page;
		    else
			buf_size = Di_zero_bufsize;

# if  defined(OS_THREADS_USED) && !defined(xCL_NO_ATOMIC_READ_WRITE_IO)
		    bytes_written =
#ifdef LARGEFILE64
			pwrite64( diop->di_fd, Di_zero_buffer, 
				    buf_size, lseek_offset );
#else /*  LARGEFILE64 */
			pwrite( diop->di_fd, Di_zero_buffer, 
				    buf_size, lseek_offset );
#endif /* LARGEFILE64 */
# else /* OS_THREADS_USED  !xCL_NO_ATOMIC_READ_WRITE_IO */
		    bytes_written =
			IIdio_write( diop->di_fd, Di_zero_buffer, 
				    buf_size, 
				    lseek_offset, 
				    &lseek_offset, 
				    f->io_fprop,
				    err_code );
# endif /* OS_THREADS_USED */

		    if ( bytes_written != buf_size )
		    {
			SETCLERR(err_code, 0, ER_write);
			small_status = DI_BADEXTEND;
			break;
		    }

		    lseek_offset += buf_size;
		    pages_remaining -= pages_at_a_time;
		}

		if ( small_status != OK )
		    break;
	    }
	}

	*end_of_file = ( lseek_ret / f->io_bytes_per_page) - 1;

    } while (FALSE);

    if (big_status == OK && small_status == OK)
    {
	/*
	** Update the current allocated end-of-file under mutex protection
	*/
	last_page = *end_of_file + n;
	if (last_page > f->io_alloc_eof)
	    f->io_alloc_eof = last_page;
    }

# ifdef OS_THREADS_USED
    if ((f->io_fprop & FPROP_PRIVATE) == 0)
	CS_synch_unlock( &f->io_sem );
# endif /* OS_THREADS_USED */

    if ( big_status != OK )
	small_status = big_status;

    if ( small_status != OK )
	DIlru_set_di_error( &small_status, err_code, intern_status,
			    DI_GENERAL_ERR);

    return(small_status);

}
コード例 #4
0
/*{
** Name: DI_slave_read -  Request a slave to read page(s) from a file on disk.
**
** Description:
**	This routine was created to make DIread more readable once
**	error checking had been added. See DIread for comments.
**
** Inputs:
**      f                    Pointer to the DI file
**                           context needed to do I/O.
**	diop		     Pointer to dilru file context.
**      buf                  Pointer to page(s) to read.
**      page                 Value indicating page(s) to read.
**	num_of_pages	     number of pages to read.
**      
** Outputs:
**      err_code             Pointer to a variable used
**                           to return operating system 
**                           errors.
**    Returns:
**          OK
**	    other errors.
**    Exceptions:
**        none
**
** Side Effects:
**        none
**
** History:
**    30-nov-1992 (rmuth)
**	    Created.
**      10-mar-1993 (mikem)
**          Changed the type of the first parameter to DI_send_ev_to_slave() and
**          the 2nd parameter to DI_slave_send(), so that DI_send_ev_to_slave()
**          could access the slave control block's status.
**          This routine will now initialize the status to DI_INPROGRESS, before
**          making the request and the slave will change the status once the
**          operation is complete.
**	23-aug-1993 (bryanp)
**	    If memory segment isn't yet mapped, map it.
*/
static STATUS
DI_slave_read(
    DI_IO	*f,
    DI_OP	*diop,
    char        *buf,
    i4	page,
    i4	num_of_pages,
    i4	*n,
    CL_ERR_DESC *err_code)
{
    register DI_SLAVE_CB        *disl;
    ME_SEG_INFO                 *seginfo;
    bool                        direct_read;
    STATUS			small_status = OK, big_status = OK, 
				intern_status = OK, status;
    
    /* unix variables */
    int		bytes_to_read;
    int		bytes_read = 0;

    do
    {
	disl = diop->di_evcb;
	    
	disl->pre_seek = 
	    (OFFSET_TYPE)(f->io_bytes_per_page) * (OFFSET_TYPE)(page);
	bytes_to_read	= f->io_bytes_per_page * num_of_pages;
	    
	/*
	** determine whether we're reading into shared memory, and set
	** up the segment ID and offset correctly
	*/
	seginfo = ME_find_seg( buf, (char *)buf + bytes_to_read,
	   		       &ME_segpool);

	if (seginfo != 0 && (seginfo->flags & ME_SLAVEMAPPED_MASK) == 0)
	{
	    status = DI_lru_slmapmem(seginfo, &intern_status, &small_status);
	    if (status)
		break;
	}
	
	if (seginfo != 0 && (seginfo->flags & ME_SLAVEMAPPED_MASK) != 0)
	{
	    MEcopy( (PTR)seginfo->key, sizeof(disl->seg_key),
		    (PTR)disl->seg_key);

	    disl->seg_offset = (char *)buf - (char *)seginfo->addr;
	    direct_read = TRUE;
	}
	else
	{
	    direct_read = FALSE;
	    seginfo = ME_find_seg(disl->buf, disl->buf, &ME_segpool);
	    if (seginfo)
	    {
		MEcopy( (PTR)seginfo->key, sizeof(disl->seg_key),
		        (PTR)disl->seg_key);

		disl->seg_offset= (char *)disl->buf - (char *)seginfo->addr;
	    }
	    else
	    {
		small_status = DI_BADREAD;
		break;
	    }
	}


	/* 
	** seek to place to read 
	*/
	do 
	{
	    disl->file_op = DI_SL_READ;

	    /* Send file properties to slave */
	    FPROP_COPY(f->io_fprop,disl->io_fprop);
	    
	    if (direct_read)
		disl->length = bytes_to_read;
	    else
		disl->length = min(bytes_to_read, Cs_srv_block.cs_size_io_buf);

	    DI_slave_send( disl->dest_slave_no, diop,
			   &big_status, &small_status, &intern_status);
	    if (( big_status != OK ) || ( small_status != OK ))
		break;

	    if ((small_status = disl->status) != OK ) 
	    {
		STRUCT_ASSIGN_MACRO(disl->errcode, *err_code);
		small_status = DI_BADREAD;
		break;
	    }
	    else
	    {
		if ( disl->length == 0 )
		{
		    small_status = DI_ENDFILE;
#ifdef	xDEV_TST

		    TRdisplay("num_pages %d\n, read_op = %x", 
				  num_of_pages, 0x70000000);
		    DIlru_dump();
#endif	/* xDev_TST */
		    break;

		}
	    }

	    /*
	    ** Read data ok 
	    */
	    if (! direct_read)
	    {
		MEcopy((PTR)disl->buf, disl->length, (PTR)buf);
		buf += disl->length;
	    }

	    bytes_to_read -= disl->length;
	    disl->pre_seek += (OFFSET_TYPE)disl->length;
	    bytes_read	   += disl->length;

	} while ( bytes_to_read > 0);
    } while (FALSE);

    if ( bytes_read > 0 )
	*n = bytes_read / f->io_bytes_per_page;

    if ( big_status != OK )
	small_status = big_status;

    if (small_status != OK )
	DIlru_set_di_error( &small_status, err_code, intern_status,
			    DI_GENERAL_ERR);

    return(small_status);
  }