/*{ ** Name: DI_slave_write - Request a slave to writes page(s) to a file on disk. ** ** Description: ** This routine was created to make DIwrite more readable once ** error checking had been added. See DIwrite for comments. ** ** Inputs: ** f Pointer to the DI file ** context needed to do I/O. ** diop Pointer to dilru file context. ** buf Pointer to page(s) to write. ** page Value indicating page(s) to write. ** num_of_pages number of pages to write ** ** Outputs: ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** other errors. ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 30-nov-1992 (rmuth) ** Created. ** 10-oct-1993 (mikem) ** bug #47624 ** Bug 47624 resulted in CSsuspend()'s from the DI system being woken ** up early. Mainline DI would then procede while the slave would ** actually be processing the requested asynchronous action. Various ** bad things could happen after this depending on timing: (mainline ** DI would change the slave control block before slave read it, ** mainline DI would call DIlru_release() and ignore it failing which ** would cause the control block to never be freed eventually leading ** to the server hanging when it ran out of slave control blocks, ... ** ** Fixes were made to scf to hopefully eliminate the unwanted ** CSresume()'s. In addition defensive code has been added to DI ** to catch cases of returning from CSresume while the slave is ** operating, and to check for errors from DIlru_release(). Before ** causing a slave to take action the master will set the slave ** control block status to DI_INPROGRESS, the slave in turn will not ** change this status until it has completed the operation. ** ** The off by one error was caused by the CSsuspend called by ** DI_slave_send() returning early in the case of a DIwrite() of one ** page. The old write loop would increment disl->pre_seek before the ** slave had actually read the control block so the slave would write ** the data from page N to the N+1 location in the file. The ** DI_INPROGRESS flag should stop this, and at least in the one page ** write case we no longer increment disl->pre_seek. ** 23-aug-1993 (bryanp) ** If segment isn't yet mapped, map it! ** 01-oct-1998 (somsa01) ** Return DI_NODISKSPACE when we are out of disk space. */ static STATUS DI_slave_write( DI_IO *f, DI_OP *diop, char *buf, i4 page, i4 num_of_pages, CL_ERR_DESC *err_code) { register DI_SLAVE_CB *disl; ME_SEG_INFO *seginfo; bool direct_write; STATUS big_status = OK, small_status = OK; STATUS intern_status = OK, status; /* unix variables */ int bytes_to_write; do { disl = diop->di_evcb; bytes_to_write = (f->io_bytes_per_page * (num_of_pages)); /* ** Determine whether we're writing from shared memory, and set ** up the segment ID and offset correctly. */ seginfo = ME_find_seg( buf, (char *)buf + bytes_to_write, &ME_segpool); if (seginfo != 0 && (seginfo->flags & ME_SLAVEMAPPED_MASK) == 0) { status = DI_lru_slmapmem(seginfo, &intern_status, &small_status); if (status) break; } if (seginfo != 0 && (seginfo->flags & ME_SLAVEMAPPED_MASK) != 0) { direct_write = TRUE; MEcopy( (PTR)seginfo->key, sizeof(disl->seg_key), (PTR)disl->seg_key); disl->seg_offset = (char *)buf - (char *)seginfo->addr; } else { direct_write = FALSE; seginfo = ME_find_seg(disl->buf, disl->buf, &ME_segpool); if (seginfo) { MEcopy( (PTR)seginfo->key, sizeof(disl->seg_key), (PTR)disl->seg_key); disl->seg_offset= (char *)disl->buf - (char *)seginfo->addr; } else { small_status = DI_BADWRITE; break; } } /* Send file properties to slave */ FPROP_COPY(f->io_fprop,disl->io_fprop); disl->pre_seek = (OFFSET_TYPE)(f->io_bytes_per_page) * (OFFSET_TYPE)(page); disl->file_op = DI_SL_WRITE; /* ** Write the data */ do { if (direct_write) disl->length = bytes_to_write; else { disl->length = min(bytes_to_write, Cs_srv_block.cs_size_io_buf); MEcopy((PTR)buf, disl->length, (PTR)disl->buf); } DI_slave_send( disl->dest_slave_no, diop, &big_status, &small_status, &intern_status); if (( big_status != OK ) || (small_status != OK )) break; if ((small_status = disl->status) != OK ) { STRUCT_ASSIGN_MACRO(disl->errcode, *err_code); } if ((small_status != OK) || (disl->length == 0)) { switch( err_code->errnum ) { case EFBIG: small_status = DI_BADEXTEND; break; case ENOSPC: small_status = DI_NODISKSPACE; break; #ifdef EDQUOT case EDQUOT: small_status = DI_EXCEED_LIMIT; break; #endif default: small_status = DI_BADWRITE; break; } break; } bytes_to_write -= disl->length; buf += disl->length; if (bytes_to_write > 0) disl->pre_seek += (OFFSET_TYPE)disl->length; } while ( bytes_to_write > 0); } while (FALSE); if (big_status != OK ) small_status = big_status; if (small_status != OK ) DIlru_set_di_error( &small_status, err_code, intern_status, DI_GENERAL_ERR); return( small_status ); }
static STATUS DI_force( DI_IO *f, DI_OP *diop, CL_ERR_DESC *err_code) { STATUS big_status = OK, small_status = OK, intern_status = OK; register DI_SLAVE_CB *disl; do { if (Di_slave) { disl = diop->di_evcb; disl->file_op = DI_SL_SYNC; /* Send file properties to slave */ FPROP_COPY(f->io_fprop,disl->io_fprop); DI_slave_send( disl->dest_slave_no, diop, &big_status, &small_status, &intern_status); if (big_status != OK ) break; if ( small_status == OK ) { if ((small_status = disl->status) != OK ) { STRUCT_ASSIGN_MACRO(disl->errcode, *err_code); } } } else { /* ** put code in here for fsync issues */ #ifdef xCL_ASYNC_IO if( Di_async_io) { DI_AIOCB *aio; aio=DI_get_aiocb(); #ifdef dr6_us5 aio->aio.aio_filedes=diop->di_fd; #else aio->aio.aio_fildes=diop->di_fd; #endif /* dr6_us5 */ #ifdef LARGEFILE64 if(aio_fsync64( O_SYNC, &aio->aio)) #elif defined(any_aix) if(fsync( aio->aio.aio_fildes )) #else if(aio_fsync( O_SYNC, &aio->aio)) #endif /* LARGEFILE64 */ { SETCLERR(err_code, 0, ER_fsync); small_status = FAIL; break; } else { if( (small_status=CSsuspend( CS_DIOW_MASK, 0, 0) ) != OK) { DIlru_set_di_error( &small_status, err_code, DI_LRU_CSSUSPEND_ERR, DI_GENERAL_ERR); break; } #if defined(axp_osf) if ( (aio_error(&aio->aio)) != 0 ) #else #ifdef LARGEFILE64 if ( (aio_error64(&aio->aio)) != 0 ) #else /* LARGEFILE64 */ if ( (aio_error(&aio->aio)) != 0 ) #endif /* LARGEFILE64 */ #endif { SETCLERR(err_code, 0, ER_fsync); small_status = FAIL; break; } } } else #endif /* xCL_ASYNC_IO */ if (FSYNC(diop->di_fd) < 0) { #ifdef xCL_092_NO_RAW_FSYNC /* AIX returns EINVAL on character special files */ if (errno != EINVAL) #endif /* xCL_092_NO_RAW_FSYNC */ { SETCLERR(err_code, 0, ER_fsync); small_status = FAIL; } } } } while (FALSE); if ( big_status != OK ) small_status = big_status; if ( small_status != OK ) DIlru_set_di_error( &small_status, err_code, intern_status, DI_GENERAL_ERR); return( small_status ); }
static STATUS DI_galloc( DI_IO *f, i4 n, DI_OP *diop, i4 *end_of_file, CL_ERR_DESC *err_code) { STATUS big_status = OK, small_status =OK; STATUS intern_status = OK; register DI_SLAVE_CB *disl; i4 last_page; OFFSET_TYPE lseek_ret; do { # ifdef OS_THREADS_USED /* Seek/write must be semaphore protected */ if ((f->io_fprop & FPROP_PRIVATE) == 0) CS_synch_lock( &f->io_sem ); # endif /* OS_THREADS_USED */ if (Di_slave) { disl = diop->di_evcb; disl->file_op = DI_SL_ZALLOC; disl->length = n * f->io_bytes_per_page; /* Pass file properties to slave */ FPROP_COPY(f->io_fprop,disl->io_fprop); DI_slave_send( disl->dest_slave_no, diop, &big_status, &small_status, &intern_status ); if (( big_status != OK ) || ( small_status != OK )) break; if ( disl->status != OK ) { STRUCT_ASSIGN_MACRO(disl->errcode, *err_code); small_status = DI_BADEXTEND; break; } else { lseek_ret = disl->length; } } else { /* ** Running without slaves */ OFFSET_TYPE lseek_offset; i8 reservation; i4 buf_size; i4 bytes_written; i4 pages_remaining = n; i4 pages_at_a_time = Di_zero_bufsize / f->io_bytes_per_page; /* find current end-of-file */ lseek_ret = IIdio_get_file_eof(diop->di_fd, f->io_fprop); if ( lseek_ret == (OFFSET_TYPE)-1L ) { SETCLERR(err_code, 0, ER_lseek); small_status = DI_BADINFO; break; } else { lseek_offset = lseek_ret; /* If this filesystem can do reservations, see if we ** should reserve more space. ** Even though we have to write the zeros anyway, the ** reservation may well be larger than the zeroing ** buffer, and this way helps maintain contiguity. ** Not worth it for tiny writes. */ if (pages_remaining > 2 && FPROP_ALLOCSTRATEGY_GET(f->io_fprop) == FPROP_ALLOCSTRATEGY_RESV) { reservation = lseek_offset + (pages_remaining * f->io_bytes_per_page); if (reservation > f->io_reserved_bytes) { /* Re-check in case some other server reserved */ small_status = IIdio_get_reserved(diop->di_fd, &f->io_reserved_bytes, err_code); if (small_status == OK && reservation > f->io_reserved_bytes) { small_status = IIdio_reserve(diop->di_fd, f->io_reserved_bytes, reservation - f->io_reserved_bytes, err_code); if (small_status == OK) { f->io_reserved_bytes = reservation; } else { if (small_status != DI_BADFILE) break; /* Fallocate not supported, turn off ** "reserve" strategy, continue without. */ small_status = OK; FPROP_ALLOCSTRATEGY_SET(f->io_fprop, FPROP_ALLOCSTRATEGY_VIRT); } } } } /* end reservations */ while ( pages_remaining > 0 ) { if ( pages_remaining < pages_at_a_time ) buf_size = pages_remaining * f->io_bytes_per_page; else buf_size = Di_zero_bufsize; # if defined(OS_THREADS_USED) && !defined(xCL_NO_ATOMIC_READ_WRITE_IO) bytes_written = #ifdef LARGEFILE64 pwrite64( diop->di_fd, Di_zero_buffer, buf_size, lseek_offset ); #else /* LARGEFILE64 */ pwrite( diop->di_fd, Di_zero_buffer, buf_size, lseek_offset ); #endif /* LARGEFILE64 */ # else /* OS_THREADS_USED !xCL_NO_ATOMIC_READ_WRITE_IO */ bytes_written = IIdio_write( diop->di_fd, Di_zero_buffer, buf_size, lseek_offset, &lseek_offset, f->io_fprop, err_code ); # endif /* OS_THREADS_USED */ if ( bytes_written != buf_size ) { SETCLERR(err_code, 0, ER_write); small_status = DI_BADEXTEND; break; } lseek_offset += buf_size; pages_remaining -= pages_at_a_time; } if ( small_status != OK ) break; } } *end_of_file = ( lseek_ret / f->io_bytes_per_page) - 1; } while (FALSE); if (big_status == OK && small_status == OK) { /* ** Update the current allocated end-of-file under mutex protection */ last_page = *end_of_file + n; if (last_page > f->io_alloc_eof) f->io_alloc_eof = last_page; } # ifdef OS_THREADS_USED if ((f->io_fprop & FPROP_PRIVATE) == 0) CS_synch_unlock( &f->io_sem ); # endif /* OS_THREADS_USED */ if ( big_status != OK ) small_status = big_status; if ( small_status != OK ) DIlru_set_di_error( &small_status, err_code, intern_status, DI_GENERAL_ERR); return(small_status); }
/*{ ** Name: DI_slave_read - Request a slave to read page(s) from a file on disk. ** ** Description: ** This routine was created to make DIread more readable once ** error checking had been added. See DIread for comments. ** ** Inputs: ** f Pointer to the DI file ** context needed to do I/O. ** diop Pointer to dilru file context. ** buf Pointer to page(s) to read. ** page Value indicating page(s) to read. ** num_of_pages number of pages to read. ** ** Outputs: ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** other errors. ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 30-nov-1992 (rmuth) ** Created. ** 10-mar-1993 (mikem) ** Changed the type of the first parameter to DI_send_ev_to_slave() and ** the 2nd parameter to DI_slave_send(), so that DI_send_ev_to_slave() ** could access the slave control block's status. ** This routine will now initialize the status to DI_INPROGRESS, before ** making the request and the slave will change the status once the ** operation is complete. ** 23-aug-1993 (bryanp) ** If memory segment isn't yet mapped, map it. */ static STATUS DI_slave_read( DI_IO *f, DI_OP *diop, char *buf, i4 page, i4 num_of_pages, i4 *n, CL_ERR_DESC *err_code) { register DI_SLAVE_CB *disl; ME_SEG_INFO *seginfo; bool direct_read; STATUS small_status = OK, big_status = OK, intern_status = OK, status; /* unix variables */ int bytes_to_read; int bytes_read = 0; do { disl = diop->di_evcb; disl->pre_seek = (OFFSET_TYPE)(f->io_bytes_per_page) * (OFFSET_TYPE)(page); bytes_to_read = f->io_bytes_per_page * num_of_pages; /* ** determine whether we're reading into shared memory, and set ** up the segment ID and offset correctly */ seginfo = ME_find_seg( buf, (char *)buf + bytes_to_read, &ME_segpool); if (seginfo != 0 && (seginfo->flags & ME_SLAVEMAPPED_MASK) == 0) { status = DI_lru_slmapmem(seginfo, &intern_status, &small_status); if (status) break; } if (seginfo != 0 && (seginfo->flags & ME_SLAVEMAPPED_MASK) != 0) { MEcopy( (PTR)seginfo->key, sizeof(disl->seg_key), (PTR)disl->seg_key); disl->seg_offset = (char *)buf - (char *)seginfo->addr; direct_read = TRUE; } else { direct_read = FALSE; seginfo = ME_find_seg(disl->buf, disl->buf, &ME_segpool); if (seginfo) { MEcopy( (PTR)seginfo->key, sizeof(disl->seg_key), (PTR)disl->seg_key); disl->seg_offset= (char *)disl->buf - (char *)seginfo->addr; } else { small_status = DI_BADREAD; break; } } /* ** seek to place to read */ do { disl->file_op = DI_SL_READ; /* Send file properties to slave */ FPROP_COPY(f->io_fprop,disl->io_fprop); if (direct_read) disl->length = bytes_to_read; else disl->length = min(bytes_to_read, Cs_srv_block.cs_size_io_buf); DI_slave_send( disl->dest_slave_no, diop, &big_status, &small_status, &intern_status); if (( big_status != OK ) || ( small_status != OK )) break; if ((small_status = disl->status) != OK ) { STRUCT_ASSIGN_MACRO(disl->errcode, *err_code); small_status = DI_BADREAD; break; } else { if ( disl->length == 0 ) { small_status = DI_ENDFILE; #ifdef xDEV_TST TRdisplay("num_pages %d\n, read_op = %x", num_of_pages, 0x70000000); DIlru_dump(); #endif /* xDev_TST */ break; } } /* ** Read data ok */ if (! direct_read) { MEcopy((PTR)disl->buf, disl->length, (PTR)buf); buf += disl->length; } bytes_to_read -= disl->length; disl->pre_seek += (OFFSET_TYPE)disl->length; bytes_read += disl->length; } while ( bytes_to_read > 0); } while (FALSE); if ( bytes_read > 0 ) *n = bytes_read / f->io_bytes_per_page; if ( big_status != OK ) small_status = big_status; if (small_status != OK ) DIlru_set_di_error( &small_status, err_code, intern_status, DI_GENERAL_ERR); return(small_status); }