asmlinkage int sunos_shmsys(int op, unsigned long arg1, unsigned long arg2, unsigned long arg3) { unsigned long raddr; int rval; switch(op) { case 0: /* do_shmat(): attach a shared memory area */ rval = do_shmat((int)arg1,(char __user *)arg2,(int)arg3,&raddr); if (!rval) rval = (int) raddr; break; case 1: /* sys_shmctl(): modify shared memory area attr. */ rval = sys_shmctl((int)arg1,(int)arg2,(struct shmid_ds __user *)arg3); break; case 2: /* sys_shmdt(): detach a shared memory area */ rval = sys_shmdt((char __user *)arg1); break; case 3: /* sys_shmget(): get a shared memory area */ rval = sys_shmget((key_t)arg1,(int)arg2,(int)arg3); break; default: rval = -EINVAL; break; }; return rval; }
asmlinkage long xtensa_shmat(int shmid, char __user *shmaddr, int shmflg) { unsigned long ret; long err; err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA); if (err) return err; return (long)ret; }
long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg) { unsigned long ret; long err; err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret); if (err) return err; force_successful_syscall_return(); return (long)ret; }
/* * Native ABI that is O32 or N64 version */ asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr) { unsigned long raddr; int err; err = do_shmat(shmid, shmaddr, shmflg, &raddr); if (err) return err; return put_user(raddr, addr); }
asmlinkage unsigned long ia64_shmat (int shmid, void __user *shmaddr, int shmflg) { unsigned long raddr; int retval; retval = do_shmat(shmid, shmaddr, shmflg, &raddr); if (retval < 0) return retval; force_successful_syscall_return(); return raddr; }
long compat_sys_shmat(int first, int second, compat_uptr_t third, int version, void __user *uptr) { int err; unsigned long raddr; compat_ulong_t __user *uaddr; if (version == 1) return -EINVAL; err = do_shmat(first, uptr, second, &raddr); if (err < 0) return err; uaddr = compat_ptr(third); return put_user(raddr, uaddr); }
asmlinkage int sunos_shmsys(int op, u32 arg1, u32 arg2, u32 arg3) { struct shmid_ds ksds; unsigned long raddr; mm_segment_t old_fs = get_fs(); int rval; switch(op) { case 0: /* do_shmat(): attach a shared memory area */ rval = do_shmat((int)arg1,(char __user *)(unsigned long)arg2,(int)arg3,&raddr); if (!rval) rval = (int) raddr; break; case 1: /* sys_shmctl(): modify shared memory area attr. */ if (!sunos_shmid_get((struct shmid_ds32 __user *)(unsigned long)arg3, &ksds)) { set_fs(KERNEL_DS); rval = sys_shmctl((int) arg1,(int) arg2, (struct shmid_ds __user *) &ksds); set_fs(old_fs); if (!rval) rval = sunos_shmid_put((struct shmid_ds32 __user *)(unsigned long)arg3, &ksds); } else rval = -EFAULT; break; case 2: /* sys_shmdt(): detach a shared memory area */ rval = sys_shmdt((char __user *)(unsigned long)arg1); break; case 3: /* sys_shmget(): get a shared memory area */ rval = sys_shmget((key_t)arg1,(int)arg2,(int)arg3); break; default: rval = -EINVAL; break; }; return rval; }
int mu_rndwn_replpool(replpool_identifier *replpool_id, repl_inst_hdr_ptr_t repl_inst_filehdr, int shm_id, boolean_t *ipc_rmvd) { int semval, status, save_errno, nattch; char *instfilename, pool_type; sm_uc_ptr_t start_addr; struct shmid_ds shm_buf; unix_db_info *udi; sgmnt_addrs *csa; boolean_t anticipatory_freeze_available, force_attach; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; assert(INVALID_SHMID != shm_id); instfilename = replpool_id->instfilename; pool_type = replpool_id->pool_type; assert((JNLPOOL_SEGMENT == pool_type) || (RECVPOOL_SEGMENT == pool_type)); anticipatory_freeze_available = ANTICIPATORY_FREEZE_AVAILABLE; force_attach = (jgbl.onlnrlbk || (!jgbl.mur_rollback && !argumentless_rundown && anticipatory_freeze_available)); if (-1 == shmctl(shm_id, IPC_STAT, &shm_buf)) { save_errno = errno; ISSUE_REPLPOOLINST_AND_RETURN(save_errno, shm_id, instfilename, "shmctl()"); } nattch = shm_buf.shm_nattch; if ((0 != nattch) && !force_attach) { util_out_print("Replpool segment (id = !UL) for replication instance !AD is in use by another process.", TRUE, shm_id, LEN_AND_STR(instfilename)); return -1; } if (-1 == (sm_long_t)(start_addr = (sm_uc_ptr_t) do_shmat(shm_id, 0, 0))) { save_errno = errno; ISSUE_REPLPOOLINST_AND_RETURN(save_errno, shm_id, instfilename, "shmat()"); } ESTABLISH_RET(mu_rndwn_replpool_ch, -1); /* assert that the identifiers are at the top of replpool control structure */ assert(0 == offsetof(jnlpool_ctl_struct, jnlpool_id)); assert(0 == offsetof(recvpool_ctl_struct, recvpool_id)); memcpy((void *)replpool_id, (void *)start_addr, SIZEOF(replpool_identifier)); if (memcmp(replpool_id->label, GDS_RPL_LABEL, GDS_LABEL_SZ - 1)) { if (!memcmp(replpool_id->label, GDS_RPL_LABEL, GDS_LABEL_SZ - 3)) util_out_print( "Incorrect version for the replpool segment (id = !UL) belonging to replication instance !AD", TRUE, shm_id, LEN_AND_STR(instfilename)); else util_out_print("Incorrect replpool format for the segment (id = !UL) belonging to replication instance !AD", TRUE, shm_id, LEN_AND_STR(instfilename)); DETACH_AND_RETURN(start_addr, shm_id, instfilename); } if (memcmp(replpool_id->now_running, gtm_release_name, gtm_release_name_len + 1)) { util_out_print("Attempt to access with version !AD, while already using !AD for replpool segment (id = !UL)" " belonging to replication instance !AD.", TRUE, gtm_release_name_len, gtm_release_name, LEN_AND_STR(replpool_id->now_running), shm_id, LEN_AND_STR(instfilename)); DETACH_AND_RETURN(start_addr, shm_id, instfilename); } /* Assert that if we haven't yet attached to the journal pool yet, we have the corresponding global vars set to NULL */ assert((JNLPOOL_SEGMENT != pool_type) || ((NULL == jnlpool.jnlpool_ctl) && (NULL == jnlpool_ctl))); if (JNLPOOL_SEGMENT == pool_type) { /* Initialize variables to simulate a "jnlpool_init". This is required by "repl_inst_flush_jnlpool" called below */ jnlpool_ctl = jnlpool.jnlpool_ctl = (jnlpool_ctl_ptr_t)start_addr; assert(NULL != jnlpool.jnlpool_dummy_reg); udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); csa = &udi->s_addrs; csa->critical = (mutex_struct_ptr_t)((sm_uc_ptr_t)jnlpool.jnlpool_ctl + JNLPOOL_CTL_SIZE); csa->nl = (node_local_ptr_t)((sm_uc_ptr_t)csa->critical + CRIT_SPACE + SIZEOF(mutex_spin_parms_struct)); /* secshr_db_clnup uses this relationship */ assert(jnlpool.jnlpool_ctl->filehdr_off); assert(jnlpool.jnlpool_ctl->srclcl_array_off > jnlpool.jnlpool_ctl->filehdr_off); assert(jnlpool.jnlpool_ctl->sourcelocal_array_off > jnlpool.jnlpool_ctl->srclcl_array_off); /* Initialize "jnlpool.repl_inst_filehdr" and related fields as "repl_inst_flush_jnlpool" relies on that */ jnlpool.repl_inst_filehdr = (repl_inst_hdr_ptr_t)((sm_uc_ptr_t)jnlpool.jnlpool_ctl + jnlpool.jnlpool_ctl->filehdr_off); jnlpool.gtmsrc_lcl_array = (gtmsrc_lcl_ptr_t)((sm_uc_ptr_t)jnlpool.jnlpool_ctl + jnlpool.jnlpool_ctl->srclcl_array_off); jnlpool.gtmsource_local_array = (gtmsource_local_ptr_t)((sm_uc_ptr_t)jnlpool.jnlpool_ctl + jnlpool.jnlpool_ctl->sourcelocal_array_off); if (0 == nattch) { /* No one attached. So, we can safely flush the journal pool so that the gtmsrc_lcl structures in the * jnlpool and disk are in sync with each other. More importantly we are about to remove the jnlpool * so we better get things in sync before that. If anticipatory freeze scheme is in effect, then we * need to keep the journal pool up and running. So, don't reset the crash field in the instance file * header (dictated by the second parameter to repl_inst_flush_jnlpool below). * Note: * If mu_rndwn_repl_instance created new semaphores (in mu_replpool_remove_sem), we need to flush those * to the instance file as well. So, override the jnlpool_semid and jnlpool_semid_ctime with the new * values. */ assert((INVALID_SEMID != repl_inst_filehdr->jnlpool_semid) && (0 != repl_inst_filehdr->jnlpool_semid_ctime)); jnlpool.repl_inst_filehdr->jnlpool_semid = repl_inst_filehdr->jnlpool_semid; jnlpool.repl_inst_filehdr->jnlpool_semid_ctime = repl_inst_filehdr->jnlpool_semid_ctime; repl_inst_flush_jnlpool(FALSE, !anticipatory_freeze_available); assert(!jnlpool.repl_inst_filehdr->crash || anticipatory_freeze_available); /* Refresh local copy (repl_inst_filehdr) with the copy that was just flushed (jnlpool.repl_inst_filehdr) */ memcpy(repl_inst_filehdr, jnlpool.repl_inst_filehdr, SIZEOF(repl_inst_hdr)); if (!anticipatory_freeze_available) { /* Now that jnlpool has been flushed and there is going to be no journal pool, reset * "jnlpool.repl_inst_filehdr" as otherwise other routines (e.g. "repl_inst_recvpool_reset") are * affected by whether this is NULL or not. */ jnlpool.jnlpool_ctl = NULL; jnlpool_ctl = NULL; jnlpool.gtmsrc_lcl_array = NULL; jnlpool.gtmsource_local_array = NULL; jnlpool.jnldata_base = NULL; jnlpool.repl_inst_filehdr = NULL; } } /* else we are ONLINE ROLLBACK. repl_inst_flush_jnlpool will be done later after gvcst_init in mur_open_files */ } if ((0 == nattch) && (!anticipatory_freeze_available || (RECVPOOL_SEGMENT == pool_type))) { if (-1 == shmdt((caddr_t)start_addr)) { save_errno = errno; ISSUE_REPLPOOLINST_AND_RETURN(save_errno, shm_id, instfilename, "shmdt()"); } if (0 != shm_rmid(shm_id)) { save_errno = errno; ISSUE_REPLPOOLINST_AND_RETURN(save_errno, shm_id, instfilename, "shm_rmid()"); } if (JNLPOOL_SEGMENT == pool_type) { repl_inst_filehdr->jnlpool_shmid = INVALID_SHMID; repl_inst_filehdr->jnlpool_shmid_ctime = 0; assert((NULL == jnlpool.jnlpool_ctl) && (NULL == jnlpool_ctl)); *ipc_rmvd = TRUE; } else { repl_inst_filehdr->recvpool_shmid = INVALID_SHMID; repl_inst_filehdr->recvpool_shmid_ctime = 0; *ipc_rmvd = TRUE; } } else { /* Else we are ONLINE ROLLBACK or anticipatory freeze is in effect and so we want to keep the journal pool available * for the duration of the rollback. Do not remove and/or reset the fields in the file header */ assert((JNLPOOL_SEGMENT != pool_type) || ((NULL != jnlpool.jnlpool_ctl) && (NULL != jnlpool_ctl))); if (JNLPOOL_SEGMENT == pool_type) *ipc_rmvd = FALSE; if (RECVPOOL_SEGMENT == pool_type) *ipc_rmvd = FALSE; } REVERT; return 0; }
SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second, unsigned long, third, void __user *, ptr, long, fifth) { long err; /* No need for backward compatibility. We can start fresh... */ if (call <= SEMCTL) { switch (call) { case SEMOP: err = sys_semtimedop(first, ptr, (unsigned)second, NULL); goto out; case SEMTIMEDOP: err = sys_semtimedop(first, ptr, (unsigned)second, (const struct timespec __user *) (unsigned long) fifth); goto out; case SEMGET: err = sys_semget(first, (int)second, (int)third); goto out; case SEMCTL: { err = sys_semctl(first, second, (int)third | IPC_64, (union semun) ptr); goto out; } default: err = -ENOSYS; goto out; } } if (call <= MSGCTL) { switch (call) { case MSGSND: err = sys_msgsnd(first, ptr, (size_t)second, (int)third); goto out; case MSGRCV: err = sys_msgrcv(first, ptr, (size_t)second, fifth, (int)third); goto out; case MSGGET: err = sys_msgget((key_t)first, (int)second); goto out; case MSGCTL: err = sys_msgctl(first, (int)second | IPC_64, ptr); goto out; default: err = -ENOSYS; goto out; } } if (call <= SHMCTL) { switch (call) { case SHMAT: { ulong raddr; err = do_shmat(first, ptr, (int)second, &raddr); if (!err) { if (put_user(raddr, (ulong __user *) third)) err = -EFAULT; } goto out; } case SHMDT: err = sys_shmdt(ptr); goto out; case SHMGET: err = sys_shmget(first, (size_t)second, (int)third); goto out; case SHMCTL: err = sys_shmctl(first, (int)second | IPC_64, ptr); goto out; default: err = -ENOSYS; goto out; } } else { err = -ENOSYS; } out: return err; }
/* * sys_ipc() is the de-multiplexer for the SysV IPC calls.. * * This is really horribly ugly. This will be remove with new toolchain. */ asmlinkage long sys_ipc(uint call, int first, int second, int third, void *ptr, long fifth) { int version, ret; version = call >> 16; /* hack for backward compatibility */ call &= 0xffff; ret = -EINVAL; switch (call) { case SEMOP: ret = sys_semop(first, (struct sembuf *)ptr, second); break; case SEMGET: ret = sys_semget(first, second, third); break; case SEMCTL: { union semun fourth; if (!ptr) break; ret = (access_ok(VERIFY_READ, ptr, sizeof(long)) ? 0 : -EFAULT) || (get_user(fourth.__pad, (void **)ptr)) ; if (ret) break; ret = sys_semctl(first, second, third, fourth); break; } case MSGSND: ret = sys_msgsnd(first, (struct msgbuf *) ptr, second, third); break; case MSGRCV: switch (version) { case 0: { struct ipc_kludge tmp; if (!ptr) break; ret = (access_ok(VERIFY_READ, ptr, sizeof(tmp)) ? 0 : -EFAULT) || copy_from_user(&tmp, (struct ipc_kludge *) ptr, sizeof(tmp)); if (ret) break; ret = sys_msgrcv(first, tmp.msgp, second, tmp.msgtyp, third); break; } default: ret = sys_msgrcv(first, (struct msgbuf *) ptr, second, fifth, third); break; } break; case MSGGET: ret = sys_msgget((key_t) first, second); break; case MSGCTL: ret = sys_msgctl(first, second, (struct msqid_ds *) ptr); break; case SHMAT: switch (version) { default: { ulong raddr; ret = access_ok(VERIFY_WRITE, (ulong *) third, sizeof(ulong)) ? 0 : -EFAULT; if (ret) break; ret = do_shmat(first, (char *) ptr, second, &raddr); if (ret) break; ret = put_user(raddr, (ulong *) third); break; } case 1: /* iBCS2 emulator entry point */ if (!segment_eq(get_fs(), get_ds())) break; ret = do_shmat(first, (char *) ptr, second, (ulong *) third); break; } break; case SHMDT: ret = sys_shmdt((char *)ptr); break; case SHMGET: ret = sys_shmget(first, second, third); break; case SHMCTL: ret = sys_shmctl(first, second, (struct shmid_ds *) ptr); break; } return ret; }
void db_init(gd_region *reg, sgmnt_data_ptr_t tsd) { static boolean_t mutex_init_done = FALSE; boolean_t is_bg, read_only; char machine_name[MAX_MCNAMELEN]; file_control *fc; int gethostname_res, stat_res, mm_prot; int4 status, semval, dblksize, fbwsize; sm_long_t status_l; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; struct sembuf sop[3]; struct stat stat_buf; union semun semarg; struct semid_ds semstat; struct shmid_ds shmstat; struct statvfs dbvfs; uint4 sopcnt; unix_db_info *udi; #ifdef periodic_timer_removed void periodic_flush_check(); #endif error_def(ERR_CLSTCONFLICT); error_def(ERR_CRITSEMFAIL); error_def(ERR_DBNAMEMISMATCH); error_def(ERR_DBIDMISMATCH); error_def(ERR_NLMISMATCHCALC); error_def(ERR_REQRUNDOWN); error_def(ERR_SYSCALL); assert(tsd->acc_meth == dba_bg || tsd->acc_meth == dba_mm); is_bg = (dba_bg == tsd->acc_meth); read_only = reg->read_only; new_dbinit_ipc = FALSE; /* we did not create a new ipc resource */ udi = FILE_INFO(reg); memset(machine_name, 0, sizeof(machine_name)); if (GETHOSTNAME(machine_name, MAX_MCNAMELEN, gethostname_res)) rts_error(VARLSTCNT(5) ERR_TEXT, 2, LEN_AND_LIT("Unable to get the hostname"), errno); assert(strlen(machine_name) < MAX_MCNAMELEN); csa = &udi->s_addrs; csa->db_addrs[0] = csa->db_addrs[1] = csa->lock_addrs[0] = NULL; /* to help in dbinit_ch and gds_rundown */ reg->opening = TRUE; /* * Create ftok semaphore for this region. * We do not want to make ftok counter semaphore to be 2 for on mupip journal recover process. */ if (!ftok_sem_get(reg, !mupip_jnl_recover, GTM_ID, FALSE)) rts_error(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg)); /* * At this point we have ftok_semid sempahore based on ftok key. * Any ftok conflicted region will block at this point. * Say, a.dat and b.dat both has same ftok and we have process A to access a.dat and * process B to access b.dat. In this case only one can continue to do db_init() */ fc = reg->dyn.addr->file_cntl; fc->file_type = reg->dyn.addr->acc_meth; fc->op = FC_READ; fc->op_buff = (sm_uc_ptr_t)tsd; fc->op_len = sizeof(*tsd); fc->op_pos = 1; dbfilop(fc); /* Read file header */ udi->shmid = tsd->shmid; udi->semid = tsd->semid; udi->sem_ctime = tsd->sem_ctime.ctime; udi->shm_ctime = tsd->shm_ctime.ctime; dbsecspc(reg, tsd); /* Find db segment size */ if (!mupip_jnl_recover) { if (INVALID_SEMID == udi->semid) { if (0 != udi->sem_ctime || INVALID_SHMID != udi->shmid || 0 != udi->shm_ctime) /* We must have somthing wrong in protocol or, code, if this happens */ GTMASSERT; /* * Create new semaphore using IPC_PRIVATE. System guarantees a unique id. */ if (-1 == (udi->semid = semget(IPC_PRIVATE, FTOK_SEM_PER_ID, RWDALL | IPC_CREAT))) { udi->semid = INVALID_SEMID; rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database control semget"), errno); } udi->shmid = INVALID_SHMID; /* reset shmid so dbinit_ch does not get confused in case we go there */ new_dbinit_ipc = TRUE; tsd->semid = udi->semid; semarg.val = GTM_ID; /* * Following will set semaphore number 2 (=FTOK_SEM_PER_ID - 1) value as GTM_ID. * In case we have orphaned semaphore for some reason, mupip rundown will be * able to identify GTM semaphores from the value and can remove. */ if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, SETVAL, semarg)) rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl SETVAL"), errno); /* * Warning: We must read the sem_ctime using IPC_STAT after SETVAL, which changes it. * We must NOT do any more SETVAL after this. Our design is to use * sem_ctime as creation time of semaphore. */ semarg.buf = &semstat; if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, IPC_STAT, semarg)) rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl IPC_STAT"), errno); tsd->sem_ctime.ctime = udi->sem_ctime = semarg.buf->sem_ctime; } else { if (INVALID_SHMID == udi->shmid) /* if mu_rndwn_file gets standalone access of this region and * somehow mupip process crashes, we can have semid != -1 but shmid == -1 */ rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name), ERR_TEXT, 2, LEN_AND_LIT("semid is valid but shmid is invalid")); semarg.buf = &semstat; if (-1 == semctl(udi->semid, 0, IPC_STAT, semarg)) /* file header has valid semid but semaphore does not exists */ rts_error(VARLSTCNT(6) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name)); else if (semarg.buf->sem_ctime != tsd->sem_ctime.ctime) rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name), ERR_TEXT, 2, LEN_AND_LIT("sem_ctime does not match")); if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat)) rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl"), errno); else if (shmstat.shm_ctime != tsd->shm_ctime.ctime) rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name), ERR_TEXT, 2, LEN_AND_LIT("shm_ctime does not match")); } /* We already have ftok semaphore of this region, so just plainly do semaphore operation */ /* This is the database access control semaphore for any region */ sop[0].sem_num = 0; sop[0].sem_op = 0; /* Wait for 0 */ sop[1].sem_num = 0; sop[1].sem_op = 1; /* Lock */ sopcnt = 2; if (!read_only) { sop[2].sem_num = 1; sop[2].sem_op = 1; /* increment r/w access counter */ sopcnt = 3; } sop[0].sem_flg = sop[1].sem_flg = sop[2].sem_flg = SEM_UNDO | IPC_NOWAIT; SEMOP(udi->semid, sop, sopcnt, status); if (-1 == status) { errno_save = errno; gtm_putmsg(VARLSTCNT(4) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg)); rts_error(VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("semop()"), CALLFROM, errno_save); } } else /* for mupip_jnl_recover we were already in mu_rndwn_file and got "semid" semaphore */ { if (INVALID_SEMID == udi->semid || 0 == udi->sem_ctime) /* make sure mu_rndwn_file() has reset created semaphore for standalone access */ GTMASSERT; if (INVALID_SHMID != udi->shmid || 0 != udi->shm_ctime) /* make sure mu_rndwn_file() has reset shared memory */ GTMASSERT; udi->shmid = INVALID_SHMID; /* reset shmid so dbinit_ch does not get confused in case we go there */ new_dbinit_ipc = TRUE; } sem_incremented = TRUE; if (new_dbinit_ipc) { /* Create new shared memory using IPC_PRIVATE. System guarantees a unique id */ #ifdef __MVS__ if (-1 == (status_l = udi->shmid = shmget(IPC_PRIVATE, ROUND_UP(reg->sec_size, MEGA_BOUND), __IPC_MEGA | IPC_CREAT | RWDALL))) #else if (-1 == (status_l = udi->shmid = shmget(IPC_PRIVATE, reg->sec_size, RWDALL | IPC_CREAT))) #endif { udi->shmid = status_l = INVALID_SHMID; rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database shmget"), errno); } tsd->shmid = udi->shmid; if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat)) rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl"), errno); tsd->shm_ctime.ctime = udi->shm_ctime = shmstat.shm_ctime; } #ifdef DEBUG_DB64 status_l = (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)do_shmat(udi->shmid, next_smseg, SHM_RND)); next_smseg = (sm_uc_ptr_t)ROUND_UP((sm_long_t)(next_smseg + reg->sec_size), SHMAT_ADDR_INCS); #else status_l = (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)do_shmat(udi->shmid, 0, SHM_RND)); #endif if (-1 == status_l) { rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error attaching to database shared memory"), errno); } csa->nl = (node_local_ptr_t)csa->db_addrs[0]; csa->critical = (mutex_struct_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SIZE); assert(((int)csa->critical & 0xf) == 0); /* critical should be 16-byte aligned */ #ifdef CACHELINE_SIZE assert(0 == ((int)csa->critical & (CACHELINE_SIZE - 1))); #endif /* Note: Here we check jnl_sate from database file and its value cannot change without standalone access. * The jnl_buff buffer should be initialized irrespective of read/write process */ JNL_INIT(csa, reg, tsd); csa->backup_buffer = (backup_buff_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SPACE + JNL_SHARE_SIZE(tsd)); csa->lock_addrs[0] = (sm_uc_ptr_t)csa->backup_buffer + BACKUP_BUFFER_SIZE + 1; csa->lock_addrs[1] = csa->lock_addrs[0] + LOCK_SPACE_SIZE(tsd) - 1; csa->total_blks = tsd->trans_hist.total_blks; /* For test to see if file has extended */ if (new_dbinit_ipc) { memset(csa->nl, 0, sizeof(*csa->nl)); /* We allocated shared storage -- we have to init it */ if (JNL_ALLOWED(csa)) { /* initialize jb->cycle to a value different from initial value of jpc->cycle (0). although this is not * necessary right now, in the future, the plan is to change jnl_ensure_open() to only do a cycle mismatch * check in order to determine whether to call jnl_file_open() or not. this is in preparation for that. */ csa->jnl->jnl_buff->cycle = 1; } } if (is_bg) csd = csa->hdr = (sgmnt_data_ptr_t)(csa->lock_addrs[1] + 1 + CACHE_CONTROL_SIZE(tsd)); else { csa->acc_meth.mm.mmblk_state = (mmblk_que_heads_ptr_t)(csa->lock_addrs[1] + 1); FSTAT_FILE(udi->fd, &stat_buf, stat_res); if (-1 == stat_res) rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno); mm_prot = read_only ? PROT_READ : (PROT_READ | PROT_WRITE); #ifdef DEBUG_DB64 if (-1 == (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)mmap((caddr_t)get_mmseg((size_t)stat_buf.st_size), (size_t)stat_buf.st_size, mm_prot, GTM_MM_FLAGS, udi->fd, (off_t)0))) rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno); put_mmseg((caddr_t)(csa->db_addrs[0]), (size_t)stat_buf.st_size); #else if (-1 == (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)mmap((caddr_t)NULL, (size_t)stat_buf.st_size, mm_prot, GTM_MM_FLAGS, udi->fd, (off_t)0))) rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno); #endif csa->db_addrs[1] = csa->db_addrs[0] + stat_buf.st_size - 1; csd = csa->hdr = (sgmnt_data_ptr_t)csa->db_addrs[0]; } if (!csa->nl->glob_sec_init) { assert(new_dbinit_ipc); if (is_bg) *csd = *tsd; if (csd->machine_name[0]) /* crash occured */ { if (0 != memcmp(csd->machine_name, machine_name, MAX_MCNAMELEN)) /* crashed on some other node */ rts_error(VARLSTCNT(6) ERR_CLSTCONFLICT, 4, DB_LEN_STR(reg), LEN_AND_STR(csd->machine_name)); else rts_error(VARLSTCNT(6) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(csd->machine_name)); } if (is_bg) { bt_malloc(csa); csa->nl->cache_off = -CACHE_CONTROL_SIZE(tsd); db_csh_ini(csa); } db_csh_ref(csa); strcpy(csa->nl->machine_name, machine_name); /* machine name */ assert(MAX_REL_NAME > gtm_release_name_len); memcpy(csa->nl->now_running, gtm_release_name, gtm_release_name_len + 1); /* GT.M release name */ memcpy(csa->nl->label, GDS_LABEL, GDS_LABEL_SZ - 1); /* GDS label */ memcpy(csa->nl->fname, reg->dyn.addr->fname, reg->dyn.addr->fname_len); /* database filename */ csa->nl->creation_date_time = csd->creation.date_time; csa->nl->highest_lbm_blk_changed = -1; csa->nl->wcs_timers = -1; csa->nl->nbb = BACKUP_NOT_IN_PROGRESS; csa->nl->unique_id.uid = FILE_INFO(reg)->fileid; /* save what file we initialized this storage for */ /* save pointers in csa to access shared memory */ csa->nl->critical = (sm_off_t)((sm_uc_ptr_t)csa->critical - (sm_uc_ptr_t)csa->nl); if (JNL_ALLOWED(csa)) csa->nl->jnl_buff = (sm_off_t)((sm_uc_ptr_t)csa->jnl->jnl_buff - (sm_uc_ptr_t)csa->nl); csa->nl->backup_buffer = (sm_off_t)((sm_uc_ptr_t)csa->backup_buffer - (sm_uc_ptr_t)csa->nl); csa->nl->hdr = (sm_off_t)((sm_uc_ptr_t)csd - (sm_uc_ptr_t)csa->nl); csa->nl->lock_addrs = (sm_off_t)((sm_uc_ptr_t)csa->lock_addrs[0] - (sm_uc_ptr_t)csa->nl); if (!read_only || is_bg) { csd->trans_hist.early_tn = csd->trans_hist.curr_tn; csd->max_update_array_size = csd->max_non_bm_update_array_size = ROUND_UP2(MAX_NON_BITMAP_UPDATE_ARRAY_SIZE(csd), UPDATE_ARRAY_ALIGN_SIZE); csd->max_update_array_size += ROUND_UP2(MAX_BITMAP_UPDATE_ARRAY_SIZE, UPDATE_ARRAY_ALIGN_SIZE); /* add current db_csh counters into the cumulative counters and reset the current counters */ #define TAB_DB_CSH_ACCT_REC(COUNTER, DUMMY1, DUMMY2) \ csd->COUNTER.cumul_count += csd->COUNTER.curr_count; \ csd->COUNTER.curr_count = 0; #include "tab_db_csh_acct_rec.h" #undef TAB_DB_CSH_ACCT_REC } if (!read_only) { if (is_bg) { assert(memcmp(csd, GDS_LABEL, GDS_LABEL_SZ - 1) == 0); LSEEKWRITE(udi->fd, (off_t)0, (sm_uc_ptr_t)csd, sizeof(sgmnt_data), errno_save); if (0 != errno_save) { rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database write"), errno_save); } } } reg->dyn.addr->ext_blk_count = csd->extension_size; mlk_shr_init(csa->lock_addrs[0], csd->lock_space_size, csa, (FALSE == read_only)); DEBUG_ONLY(locknl = csa->nl;) /* for DEBUG_ONLY LOCK_HIST macro */
int mur_forward_multi_proc(reg_ctl_list *rctl) { boolean_t multi_proc, this_reg_stuck, release_latch, ok_to_play; boolean_t cancelled_dbsync_timer, cancelled_timer; reg_ctl_list *rctl_top, *prev_rctl; jnl_ctl_list *jctl; gd_region *reg; sgmnt_addrs *csa; seq_num rec_token_seq; jnl_tm_t rec_time; enum broken_type recstat; jnl_record *rec; enum jnl_record_type rectype; char errstr[256]; int i, rctl_index, save_errno, num_procs_stuck, num_reg_stuck; uint4 status, regcnt_stuck, num_partners, start_hrtbt_cntr; forw_multi_struct *forw_multi; shm_forw_multi_t *sfm; multi_struct *multi; jnl_tm_t adjusted_resolve_time; shm_reg_ctl_t *shm_rctl_start, *shm_rctl, *first_shm_rctl; size_t shm_size, reccnt, copy_size; int4 *size_ptr; char *shmPtr; /* not using "shm_ptr" since it is already used in an AIX include file */ int shmid; multi_proc_shm_hdr_t *mp_hdr; /* Pointer to "multi_proc_shm_hdr_t" structure in shared memory */ status = 0; /* Although we made sure the # of tasks is the same as the # of processes forked off (in the "gtm_multi_proc" * invocation in "mur_forward"), it is possible one of the forked process finishes one invocation of * "mur_forward_multi_proc" before even another forked process gets assigned one task in "gtm_multi_proc_helper". * In this case, we would be invoked more than once. But the first invocation would have done all the needed stuff * so return for later invocations. */ if (mur_forward_multi_proc_done) return 0; mur_forward_multi_proc_done = TRUE; /* Note: "rctl" is unused. But cannot avoid passing it since "gtm_multi_proc" expects something */ prev_rctl = NULL; rctl_start = NULL; adjusted_resolve_time = murgbl.adjusted_resolve_time; assert(0 == murgbl.regcnt_remaining); multi_proc = multi_proc_in_use; /* cache value in "local" to speed up access inside loops below */ if (multi_proc) { mp_hdr = multi_proc_shm_hdr; shm_rctl_start = mur_shm_hdr->shm_rctl_start; if (jgbl.onlnrlbk) { for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { assert(rctl->csa->hold_onto_crit); /* would have been set in parent process */ rctl->csa->hold_onto_crit = FALSE; /* reset since we dont own this region */ assert(rctl->csa->now_crit); /* would have been set in parent process */ rctl->csa->now_crit = FALSE; /* reset since we dont own this region */ } } START_HEARTBEAT_IF_NEEDED; /* heartbeat timer needed later (in case not already started by "gtm_multi_proc") */ } first_shm_rctl = NULL; /* Phase1 of forward recovery starts */ for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { /* Check if "rctl" is available for us or if some other concurrent process has taken it */ if (multi_proc) { rctl_index = rctl - &mur_ctl[0]; shm_rctl = &shm_rctl_start[rctl_index]; if (shm_rctl->owning_pid) { assert(process_id != shm_rctl->owning_pid); continue; } GRAB_MULTI_PROC_LATCH_IF_NEEDED(release_latch); assert(release_latch); for ( ; rctl < rctl_top; rctl++, shm_rctl++) { if (shm_rctl->owning_pid) { assert(process_id != shm_rctl->owning_pid); continue; } shm_rctl->owning_pid = process_id; /* Declare ownership */ rctl->this_pid_is_owner = TRUE; if (jgbl.onlnrlbk) { /* This is an online rollback and crit was grabbed on all regions by the parent rollback * process. But this child process now owns this region and does the actual rollback on * this region so borrow crit for the duration of this child process. */ csa = rctl->csa; csa->hold_onto_crit = TRUE; csa->now_crit = TRUE; assert(csa->nl->in_crit == mp_hdr->parent_pid); csa->nl->in_crit = process_id; assert(csa->nl->onln_rlbk_pid == mp_hdr->parent_pid); csa->nl->onln_rlbk_pid = process_id; } if (NULL == first_shm_rctl) first_shm_rctl = shm_rctl; break; } REL_MULTI_PROC_LATCH_IF_NEEDED(release_latch); if (rctl >= rctl_top) { assert(rctl == rctl_top); break; } /* Set key to print this rctl'ss region-name as prefix in case this forked off process prints any output */ MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key); # ifdef MUR_DEBUG fprintf(stderr, "pid = %d : Owns region %s\n", process_id, multi_proc_key); # endif } else rctl->this_pid_is_owner = TRUE; if (mur_options.forward) { assert(NULL == rctl->jctl_turn_around); jctl = rctl->jctl = rctl->jctl_head; assert(jctl->reg_ctl == rctl); jctl->rec_offset = JNL_HDR_LEN; jnl_fence_ctl.fence_list = JNL_FENCE_LIST_END; /* initialized to reflect journaling is not enabled */ if (mur_options.rollback) jgbl.mur_jrec_seqno = jctl->jfh->start_seqno; } else { jctl = rctl->jctl = (NULL == rctl->jctl_turn_around) ? rctl->jctl_head : rctl->jctl_turn_around; assert(jctl->reg_ctl == rctl); jctl->rec_offset = jctl->turn_around_offset; jgbl.mur_jrec_seqno = jctl->turn_around_seqno; assert((NULL != rctl->jctl_turn_around) || (0 == jctl->rec_offset)); } if (mur_options.rollback) { if (murgbl.consist_jnl_seqno < jgbl.mur_jrec_seqno) { /* Assert that murgbl.losttn_seqno is never lesser than jgbl.mur_jrec_seqno (the turnaround * point seqno) as this is what murgbl.consist_jnl_seqno is going to be set to and will * eventually be the post-rollback seqno. If this condition is violated, the result of the * recovery is a compromised database (the file header will indicate a Region Seqno which * is not necessarily correct since seqnos prior to it might be absent in the database). * Therefore, this is an out-of-design situation with respect to rollback and so stop it. */ assert(murgbl.losttn_seqno >= jgbl.mur_jrec_seqno); murgbl.consist_jnl_seqno = jgbl.mur_jrec_seqno; } assert(murgbl.consist_jnl_seqno <= murgbl.losttn_seqno); } if (mur_options.update || mur_options.extr[GOOD_TN]) { reg = rctl->gd; gv_cur_region = reg; tp_change_reg(); /* note : sets cs_addrs to non-NULL value even if gv_cur_region->open is FALSE * (cs_data could still be NULL). */ rctl->csa = cs_addrs; cs_addrs->miscptr = (void *)rctl; rctl->csd = cs_data; rctl->sgm_info_ptr = cs_addrs->sgm_info_ptr; assert(!reg->open || (NULL != cs_addrs->dir_tree)); gv_target = cs_addrs->dir_tree; } jctl->after_end_of_data = FALSE; status = mur_next(jctl, jctl->rec_offset); assert(ERR_JNLREADEOF != status); /* cannot get EOF at start of forward processing */ if (SS_NORMAL != status) goto finish; PRINT_VERBOSE_STAT(jctl, "mur_forward:at the start"); rctl->process_losttn = FALSE; /* Any multi-region TP transaction will be processed as multiple single-region TP transactions up * until the tp-resolve-time is reached. From then on, they will be treated as one multi-region TP * transaction. This is needed for proper lost-tn determination (any multi-region transaction that * gets played in a region AFTER it has already encountered a broken tn should treat this as a lost tn). */ do { if (multi_proc && IS_FORCED_MULTI_PROC_EXIT(mp_hdr)) { /* We are at a logical point. So exit if signaled by parent */ status = ERR_FORCEDHALT; goto finish; } assert(jctl == rctl->jctl); rec = rctl->mur_desc->jnlrec; rec_time = rec->prefix.time; if (rec_time > mur_options.before_time) break; /* Records after -BEFORE_TIME do not go to extract or losttrans or brkntrans files */ if (rec_time < mur_options.after_time) { status = mur_next_rec(&jctl); continue; /* Records before -AFTER_TIME do not go to extract or losttrans or brkntrans files */ } if (rec_time >= adjusted_resolve_time) break; /* Records after this adjusted resolve_time will be processed below in phase2 */ /* Note: Since we do hashtable token processing only for records from tp_resolve_time onwards, * it is possible that if we encounter any broken transactions here we wont know they are broken * but will play them as is. That is unavoidable. Specify -SINCE_TIME (for -BACKWARD rollback/recover) * and -VERIFY (for -FORWARD rollback/recover) to control tp_resolve_time (and in turn more * effective broken tn determination). */ status = mur_forward_play_cur_jrec(rctl); if (SS_NORMAL != status) break; status = mur_next_rec(&jctl); } while (SS_NORMAL == status); CHECK_IF_EOF_REACHED(rctl, status); /* sets rctl->forw_eof_seen if needed; resets "status" to SS_NORMAL */ if (SS_NORMAL != status) { /* ERR_FILENOTCREATE is possible from "mur_cre_file_extfmt" OR ERR_FORCEDHALT is possible * from "mur_forward_play_cur_jrec". No other errors are known to occur here. Assert accordingly. */ assert((ERR_FILENOTCREATE == status) || (ERR_FORCEDHALT == status)); goto finish; } if (rctl->forw_eof_seen) { PRINT_VERBOSE_STAT(jctl, "mur_forward:Reached EOF before tp_resolve_time"); continue; /* Reached EOF before even getting to tp_resolve_time. * Do not even consider region for next processing loop */ } rctl->last_tn = 0; murgbl.regcnt_remaining++; /* # of regions participating in recovery at this point */ if (NULL == rctl_start) rctl_start = rctl; if (NULL != prev_rctl) { prev_rctl->next_rctl = rctl; rctl->prev_rctl = prev_rctl; } prev_rctl = rctl; assert(murgbl.ok_to_update_db || !rctl->db_updated); PRINT_VERBOSE_STAT(jctl, "mur_forward:at tp_resolve_time"); } if (multi_proc) multi_proc_key = NULL; /* reset key until it can be set to rctl's region-name again */ /* Note that it is possible for rctl_start to be NULL at this point. That is there is no journal record in any region * AFTER the calculated tp-resolve-time. This is possible if for example -AFTER_TIME was used and has a time later * than any journal record in all journal files. If rctl_start is NULL, prev_rctl should also be NULL and vice versa. */ if (NULL != rctl_start) { assert(NULL != prev_rctl); prev_rctl->next_rctl = rctl_start; rctl_start->prev_rctl = prev_rctl; } rctl = rctl_start; regcnt_stuck = 0; /* # of regions we are stuck in waiting for other regions to resolve a multi-region TP transaction */ assert((NULL == rctl) || (NULL == rctl->forw_multi)); gv_cur_region = NULL; /* clear out any previous value to ensure gv_cur_region/cs_addrs/cs_data * all get set in sync by the MUR_CHANGE_REG macro below. */ /* Phase2 of forward recovery starts */ while (NULL != rctl) { /* while there is at least one region remaining with unprocessed journal records */ assert(NULL != rctl_start); assert(0 < murgbl.regcnt_remaining); if (NULL != rctl->forw_multi) { /* This region's current journal record is part of a TP transaction waiting for other regions */ regcnt_stuck++; assert(regcnt_stuck <= murgbl.regcnt_remaining); if (regcnt_stuck == murgbl.regcnt_remaining) { assertpro(multi_proc_in_use); /* Else : Out-of-design situation. Stuck in ALL regions. */ /* Check one last time if all regions are stuck waiting for another process to resolve the * multi-region TP transaction. If so, wait in a sleep loop. If not, we can proceed. */ rctl = rctl_start; start_hrtbt_cntr = heartbeat_counter; do { if (IS_FORCED_MULTI_PROC_EXIT(mp_hdr)) { /* We are at a logical point. So exit if signaled by parent */ status = ERR_FORCEDHALT; goto finish; } forw_multi = rctl->forw_multi; assert(NULL != forw_multi); sfm = forw_multi->shm_forw_multi; assert(NULL != sfm); assert(sfm->num_reg_seen_forward <= sfm->num_reg_seen_backward); # ifdef MUR_DEBUG fprintf(stderr, "Pid = %d : Line %d : token = %llu : forward = %d : backward = %d\n", process_id, __LINE__, (long long int)sfm->token, sfm->num_reg_seen_forward, sfm->num_reg_seen_backward); # endif if (sfm->num_reg_seen_forward == sfm->num_reg_seen_backward) { /* We are no longer stuck in this region */ assert(!forw_multi->no_longer_stuck); forw_multi->no_longer_stuck = TRUE; break; } rctl = rctl->next_rctl; /* Move on to the next available region */ assert(NULL != rctl); if (rctl == rctl_start) { /* We went through all regions once and are still stuck. * Sleep until at leat TWO heartbeats have elapsed after which check for deadlock. * Do this only in the child process that owns the FIRST region in the region list. * This way we dont have contention for the GRAB_MULTI_PROC_LATCH from * all children at more or less the same time. */ if ((rctl == mur_ctl) && (heartbeat_counter > (start_hrtbt_cntr + 2))) { /* Check if all processes are stuck for a while. If so assertpro */ GRAB_MULTI_PROC_LATCH_IF_NEEDED(release_latch); assert(release_latch); shm_rctl_start = mur_shm_hdr->shm_rctl_start; num_reg_stuck = 0; for (i = 0; i < murgbl.reg_total; i++) { shm_rctl = &shm_rctl_start[i]; sfm = shm_rctl->shm_forw_multi; if (NULL != sfm) { if (sfm->num_reg_seen_forward != sfm->num_reg_seen_backward) num_reg_stuck++; } } REL_MULTI_PROC_LATCH_IF_NEEDED(release_latch); /* If everyone is stuck at this point, it is an out-of-design situation */ assertpro(num_reg_stuck < murgbl.reg_total); start_hrtbt_cntr = heartbeat_counter; } else { /* Sleep and recheck if any region we are stuck in got resolved. * To minimize time spent sleeping, we just yield our timeslice. */ rel_quant(); continue; } } } while (TRUE); } else { rctl = rctl->next_rctl; /* Move on to the next available region */ assert(NULL != rctl); continue; } } regcnt_stuck = 0; /* restart the counter now that we found at least one non-stuck region */ MUR_CHANGE_REG(rctl); jctl = rctl->jctl; this_reg_stuck = FALSE; for ( status = SS_NORMAL; SS_NORMAL == status; ) { if (multi_proc && IS_FORCED_MULTI_PROC_EXIT(mp_hdr)) { /* We are at a logical point. So exit if signaled by parent */ status = ERR_FORCEDHALT; goto finish; } assert(jctl == rctl->jctl); rec = rctl->mur_desc->jnlrec; rec_time = rec->prefix.time; if (rec_time > mur_options.before_time) break; /* Records after -BEFORE_TIME do not go to extract or losttrans or brkntrans files */ assert((rec_time >= adjusted_resolve_time) || (mur_options.notncheck && !mur_options.verify)); assert((0 == mur_options.after_time) || (mur_options.forward && !rctl->db_updated)); if (rec_time < mur_options.after_time) { status = mur_next_rec(&jctl); continue; /* Records before -AFTER_TIME do not go to extract or losttrans or brkntrans files */ } /* Check if current journal record can be played right away or need to wait for corresponding journal * records from other participating TP regions to be reached. A non-TP or ZTP transaction can be played * without issues (i.e. has no dependencies with any other regions). A single-region TP transaction too * falls in the same category. A multi-region TP transaction needs to wait until all participating regions * have played all journal records BEFORE this TP in order to ensure recover plays records in the exact * same order that GT.M performed them in. */ /* If FENCE_NONE is specified, we would not have maintained any multi hashtable in mur_back_process for * broken transaction processing. So we process multi-region TP transactions as multiple single-region * TP transactions in forward phase. */ if (FENCE_NONE != mur_options.fences) { rectype = (enum jnl_record_type)rec->prefix.jrec_type; if (IS_TP(rectype) && IS_TUPD(rectype)) { assert(IS_SET_KILL_ZKILL_ZTWORM_LGTRIG_ZTRIG(rectype)); assert(&rec->jrec_set_kill.num_participants == &rec->jrec_ztworm.num_participants); assert(&rec->jrec_set_kill.num_participants == &rec->jrec_lgtrig.num_participants); num_partners = rec->jrec_set_kill.num_participants; assert(0 < num_partners); if (1 < num_partners) { this_reg_stuck = TRUE; assert(&rec->jrec_set_kill.update_num == &rec->jrec_ztworm.update_num); assert(&rec->jrec_set_kill.update_num == &rec->jrec_lgtrig.update_num); } } } if (this_reg_stuck) { rec_token_seq = GET_JNL_SEQNO(rec); MUR_FORW_TOKEN_LOOKUP(forw_multi, rec_token_seq, rec_time); if (NULL != forw_multi) { /* This token has already been seen in another region in forward processing. * Add current region as well. If all regions have been resolved, then play * the entire transaction maintaining the exact same order of updates within. */ if (!forw_multi->no_longer_stuck) MUR_FORW_TOKEN_ONE_MORE_REG(forw_multi, rctl); } else { /* First time we are seeing this token in forward processing. Check if this * has already been determined to be a broken transaction. */ recstat = GOOD_TN; multi = NULL; if (IS_REC_POSSIBLY_BROKEN(rec_time, rec_token_seq)) { multi = MUR_TOKEN_LOOKUP(rec_token_seq, rec_time, TPFENCE); if ((NULL != multi) && (0 < multi->partner)) recstat = BROKEN_TN; } MUR_FORW_TOKEN_ADD(forw_multi, rec_token_seq, rec_time, rctl, num_partners, recstat, multi); } /* Check that "tabent" field has been initialized above (by either the MUR_FORW_TOKEN_LOOKUP * or MUR_FORW_TOKEN_ADD macros). This is relied upon by "mur_forward_play_multireg_tp" below. */ assert(NULL != forw_multi->u.tabent); assert(forw_multi->num_reg_seen_forward <= forw_multi->num_reg_seen_backward); if (multi_proc) { sfm = forw_multi->shm_forw_multi; ok_to_play = (NULL == sfm) || (sfm->num_reg_seen_forward == sfm->num_reg_seen_backward); } else ok_to_play = (forw_multi->num_reg_seen_forward == forw_multi->num_reg_seen_backward); assert(ok_to_play || !forw_multi->no_longer_stuck); if (ok_to_play ) { /* We have enough information to proceed with playing this multi-region TP in * forward processing (even if we might not have seen all needed regions). Now play it. * Note that the TP could be BROKEN_TN or GOOD_TN. The callee handles it. */ assert(forw_multi == rctl->forw_multi); status = mur_forward_play_multireg_tp(forw_multi, rctl); this_reg_stuck = FALSE; /* Note that as part of playing the TP transaction, we could have reached * the EOF of rctl. In this case, we need to break out of the loop. */ if ((SS_NORMAL != status) || rctl->forw_eof_seen) break; assert(NULL == rctl->forw_multi); assert(!dollar_tlevel); jctl = rctl->jctl; /* In case the first record after the most recently processed * TP transaction is in the next generation journal file */ continue; } break; } else { status = mur_forward_play_cur_jrec(rctl); if (SS_NORMAL != status) break; } assert(!this_reg_stuck); status = mur_next_rec(&jctl); } assert((NULL == rctl->forw_multi) || this_reg_stuck); assert((NULL != rctl->forw_multi) || !this_reg_stuck); if (!this_reg_stuck) { /* We are not stuck in this region (to resolve a multi-region TP). * This means we are done processing all the records of this region. */ assert(NULL == rctl->forw_multi); if (!rctl->forw_eof_seen) { CHECK_IF_EOF_REACHED(rctl, status); /* sets rctl->forw_eof_seen if needed; resets "status" to SS_NORMAL */ if (SS_NORMAL != status) { assert(ERR_FILENOTCREATE == status); goto finish; } assert(!dollar_tlevel); DELETE_RCTL_FROM_UNPROCESSED_LIST(rctl); /* since all of its records should have been processed */ } else { /* EOF was seen in rctl inside "mur_forward_play_multireg_tp" and it was removed * from the unprocessed list of rctls. At the time rctl was removed, its "next_rctl" * field could have been pointing to another <rctl> that has since then also been * removed inside the same function. Therefore the "next_rctl" field is not reliable * in this case but instead we should rely on the global variable "rctl_start" which * points to the list of unprocessed rctls. Set "next_rctl" accordingly. */ rctl->next_rctl = rctl_start; if (ERR_JNLREADEOF == status) status = SS_NORMAL; } assert(rctl->deleted_from_unprocessed_list); } assert(SS_NORMAL == status); assert(!this_reg_stuck || !rctl->forw_eof_seen); assert((NULL == rctl->next_rctl) || (NULL != rctl_start)); assert((NULL == rctl->next_rctl) || (0 < murgbl.regcnt_remaining)); rctl = rctl->next_rctl; /* Note : even though "rctl" could have been deleted from the doubly linked list above, * rctl->next_rctl is not touched so we can still use it to get to the next element. */ } assert(0 == murgbl.regcnt_remaining); jgbl.mur_pini_addr_reset_fnptr = NULL; /* No more simulation of GT.M activity for any region */ prc_vec = murgbl.prc_vec; /* Use process-vector of MUPIP RECOVER (not any simulating GT.M process) now onwards */ assert(0 == dollar_tlevel); for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { if (!rctl->this_pid_is_owner) { assert(multi_proc_in_use); continue; /* in a parallel processing environment, process only regions we own */ } if (multi_proc) { /* Set key to print this rctl's region-name as prefix in case this forked off process prints any output */ MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key); } PRINT_VERBOSE_STAT(rctl->jctl, "mur_forward:at the end"); assert(!mur_options.rollback || (0 != murgbl.consist_jnl_seqno)); assert(mur_options.rollback || (0 == murgbl.consist_jnl_seqno)); assert(!dollar_tlevel); /* In case it applied a broken TUPD */ assert(murgbl.ok_to_update_db || !rctl->db_updated); rctl->mur_plst = NULL; /* reset now that simulation of GT.M updates is done */ /* Ensure mur_block_count_correct is called if updates allowed */ if (murgbl.ok_to_update_db && (SS_NORMAL != mur_block_count_correct(rctl))) { gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(4) ERR_BLKCNTEDITFAIL, 2, DB_LEN_STR(rctl->gd)); murgbl.wrn_count++; } } finish: if (multi_proc) multi_proc_key = NULL; /* reset key until it can be set to rctl's region-name again */ if ((SS_NORMAL == status) && mur_options.show) mur_output_show(); if (NULL != first_shm_rctl) { /* Transfer needed process-private information to shared memory so parent process can later inherit this. */ first_shm_rctl->err_cnt = murgbl.err_cnt; first_shm_rctl->wrn_count = murgbl.wrn_count; first_shm_rctl->consist_jnl_seqno = murgbl.consist_jnl_seqno; /* If extract files were created by this process for one or more regions, then copy that information to * shared memory so parent process can use this information to do a merge sort. */ shm_rctl = mur_shm_hdr->shm_rctl_start; for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++, shm_rctl++) { assert(multi_proc_in_use); if (!rctl->this_pid_is_owner) continue; /* in a parallel processing environment, process only regions we own */ /* Cancel any flush/dbsync timers by this child process for this region. This is because the * child is not going to go through exit handling code (no gds_rundown etc.). And we need to * clear up csa->nl->wcs_timers. (normally done by gds_rundown). */ if (NULL != rctl->csa) /* rctl->csa can be NULL in case of "mupip journal -extract" etc. */ CANCEL_DB_TIMERS(rctl->gd, rctl->csa, cancelled_timer, cancelled_dbsync_timer); reccnt = 0; for (size_ptr = &rctl->jnlext_multi_list_size[0], recstat = 0; recstat < TOT_EXTR_TYPES; recstat++, size_ptr++) { /* Assert "extr_file_created" information is in sync between rctl and shm_rctl. * This was done at the end of "mur_cre_file_extfmt". */ assert(shm_rctl->extr_file_created[recstat] == rctl->extr_file_created[recstat]); /* Assert that if *size_ptr is non-zero, then we better have created an extract file. * Note that the converse is not true. It is possible we created a file for example to * write an INCTN record but decided to not write anything because it was not a -detail * type of extract. So *sizeptr could be 0 even though we created the extract file. */ assert(!*size_ptr || rctl->extr_file_created[recstat]); shm_rctl->jnlext_list_size[recstat] = *size_ptr; reccnt += *size_ptr; } assert(INVALID_SHMID == shm_rctl->jnlext_shmid); shm_size = reccnt * SIZEOF(jnlext_multi_t); /* If we are quitting because of an abnormal status OR a forced signal to terminate * OR if the parent is dead (kill -9) dont bother creating shmid to communicate back with parent. */ if (mp_hdr->parent_pid != getppid()) { SET_FORCED_MULTI_PROC_EXIT; /* Also signal sibling children to stop processing */ if (SS_NORMAL != status) status = ERR_FORCEDHALT; } if ((SS_NORMAL == status) && shm_size) { shmid = shmget(IPC_PRIVATE, shm_size, 0600 | IPC_CREAT); if (-1 == shmid) { save_errno = errno; SNPRINTF(errstr, SIZEOF(errstr), "shmget() : shmsize=0x%llx", shm_size); MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key); /* to print region name prefix */ rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, LEN_AND_STR(errstr), CALLFROM, save_errno); } shmPtr = (char *)do_shmat(shmid, 0, 0); if (-1 == (sm_long_t)shmPtr) { save_errno = errno; SNPRINTF(errstr, SIZEOF(errstr), "shmat() : shmid=%d shmsize=0x%llx", shmid, shm_size); MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key); /* to print region name prefix */ rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, LEN_AND_STR(errstr), CALLFROM, save_errno); } shm_rctl->jnlext_shmid = shmid; shm_rctl->jnlext_shm_size = shm_size; for (size_ptr = &rctl->jnlext_multi_list_size[0], recstat = 0; recstat < TOT_EXTR_TYPES; recstat++, size_ptr++) { shm_size = *size_ptr; if (shm_size) { copy_size = copy_list_to_buf(rctl->jnlext_multi_list[recstat], (int4)shm_size, shmPtr); assert(copy_size == (shm_size * SIZEOF(jnlext_multi_t))); shmPtr += copy_size; } } } } } mur_close_file_extfmt(IN_MUR_CLOSE_FILES_FALSE); /* Need to flush buffered extract/losttrans/brokentrans files */ return (int)status; }
/* Takes an entry from 'ipcs -am' and checks for its validity to be a GT.M replication segment. * Returns TRUE if the shared memory segment is a valid GT.M replication segment * (based on a check on some fields in the shared memory) else FALSE. * If the segment belongs to GT.M, it returns the replication id of the segment * by the second argument. * Sets exit_stat to ERR_MUNOTALLSEC if appropriate. */ boolean_t validate_replpool_shm_entry(shm_parms *parm_buff, replpool_id_ptr_t replpool_id, int *exit_stat) { boolean_t remove_shmid, jnlpool_segment; int fd; repl_inst_hdr repl_instance; sm_uc_ptr_t start_addr; int save_errno, status, shmid; struct shmid_ds shmstat; char msgbuff[OUT_BUFF_SIZE], *instfilename; if (NULL == parm_buff) return FALSE; /* Check for the bare minimum size of the replic shared segment that we expect */ /* if (parm_buff->sgmnt_siz < (SIZEOF(replpool_identifier) + MIN(MIN_JNLPOOL_SIZE, MIN_RECVPOOL_SIZE))) */ if (parm_buff->sgmnt_siz < MIN(MIN_JNLPOOL_SIZE, MIN_RECVPOOL_SIZE)) return FALSE; if (IPC_PRIVATE != parm_buff->key) return FALSE; shmid = parm_buff->shmid; /* we do not need to lock the shm for reading the rundown information as * the other rundowns (if any) can also be allowed to share reading the * same info concurrently. */ if (-1 == (sm_long_t)(start_addr = (sm_uc_ptr_t) do_shmat(shmid, 0, SHM_RND))) return FALSE; memcpy((void *)replpool_id, (void *)start_addr, SIZEOF(replpool_identifier)); instfilename = replpool_id->instfilename; /* Even though we could be looking at a replication pool structure that has been created by an older version * or newer version of GT.M, the format of the "replpool_identifier" structure is expected to be the same * across all versions so we can safely dereference the "label" and "instfilename" fields in order to generate * user-friendly error messages. Asserts for the layout are in "mu_rndwn_repl_instance" (not here) with a * comment there as to why that location was chosen. */ if (memcmp(replpool_id->label, GDS_RPL_LABEL, GDS_LABEL_SZ - 1)) { if (!memcmp(replpool_id->label, GDS_RPL_LABEL, GDS_LABEL_SZ - 3)) { util_out_print("Cannot rundown replpool shmid = !UL as it has format !AD " "created by !AD but this mupip is version and uses format !AD", TRUE, shmid, GDS_LABEL_SZ - 1, replpool_id->label, LEN_AND_STR(replpool_id->now_running), gtm_release_name_len, gtm_release_name, GDS_LABEL_SZ - 1, GDS_RPL_LABEL); *exit_stat = ERR_MUNOTALLSEC; } shmdt((void *)start_addr); return FALSE; } assert(JNLPOOL_SEGMENT == replpool_id->pool_type || RECVPOOL_SEGMENT == replpool_id->pool_type); if(JNLPOOL_SEGMENT != replpool_id->pool_type && RECVPOOL_SEGMENT != replpool_id->pool_type) { shmdt((void *)start_addr); return FALSE; } jnlpool_segment = (JNLPOOL_SEGMENT == replpool_id->pool_type); if (-1 == shmctl(shmid, IPC_STAT, &shmstat)) { save_errno = errno; assert(FALSE);/* we were able to attach to this shmid before so should be able to get stats on it */ util_out_print("!AD -> Error with shmctl for shmid = !UL", TRUE, LEN_AND_STR(instfilename), shmid); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno); *exit_stat = ERR_MUNOTALLSEC; shmdt((void *)start_addr); return FALSE; } /* Check if instance filename reported in shared memory still exists. If not, clean this * shared memory section without even invoking "mu_rndwn_repl_instance" as that expects * the instance file to exist. Same case if shared memory points back to an instance file * whose file header does not have this shmid. */ OPENFILE(instfilename, O_RDONLY, fd); /* check if we can open it */ msgbuff[0] = '\0'; remove_shmid = FALSE; if (FD_INVALID == fd) { if (ENOENT == errno) { SNPRINTF(msgbuff, OUT_BUFF_SIZE, "File %s does not exist", instfilename); if (1 < shmstat.shm_nattch) { PRINT_AND_SEND_REPLPOOL_FAILURE_MSG(msgbuff, replpool_id, shmid); *exit_stat = ERR_MUNOTALLSEC; shmdt((void *)start_addr); return FALSE; } remove_shmid = TRUE; } else { /* open() errored out e.g. due to file permissions. Log that */ save_errno = errno; util_out_print("Cannot rundown replpool shmid !UL for instance file" " !AD as open() on the file returned the following error", TRUE, shmid, LEN_AND_STR(instfilename)); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno); *exit_stat = ERR_MUNOTALLSEC; shmdt((void *)start_addr); return FALSE; } } else { LSEEKREAD(fd, 0, &repl_instance, SIZEOF(repl_inst_hdr), status); if (0 != status) { save_errno = errno; util_out_print("!AD -> Error with LSEEKREAD for shmid = !UL", TRUE, LEN_AND_STR(instfilename), shmid); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno); *exit_stat = ERR_MUNOTALLSEC; shmdt((void *)start_addr); return FALSE; } if ((jnlpool_segment && (repl_instance.jnlpool_shmid != shmid)) || (!jnlpool_segment && (repl_instance.recvpool_shmid != shmid))) { SNPRINTF(msgbuff, OUT_BUFF_SIZE, "%s SHMID (%d) in the instance file header does not match with the" " one reported by \"ipcs\" command (%d)", jnlpool_segment ? "Journal Pool" : "Receive Pool", jnlpool_segment ? repl_instance.jnlpool_shmid : repl_instance.recvpool_shmid, shmid); if (1 < shmstat.shm_nattch) { PRINT_AND_SEND_REPLPOOL_FAILURE_MSG(msgbuff, replpool_id, shmid); *exit_stat = ERR_MUNOTALLSEC; shmdt((void *)start_addr); return FALSE; } remove_shmid = TRUE; } CLOSEFILE_RESET(fd, status); /* resets "fd" to FD_INVALID */ } shmdt((void *)start_addr); if (remove_shmid) { assert('\0' != msgbuff[0]); if (0 != shm_rmid(shmid)) { save_errno = errno; util_out_print("!AD -> Error removing shared memory for shmid = !UL", TRUE, LEN_AND_STR(instfilename), shmid); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno); *exit_stat = ERR_MUNOTALLSEC; return FALSE; } PRINT_AND_SEND_SHMREMOVED_MSG(msgbuff, STRLEN(instfilename), instfilename, shmid); *exit_stat = ERR_SHMREMOVED; } else *exit_stat = SS_NORMAL; return TRUE; }
/* Takes an entry from 'ipcs -m' and checks for its validity to be a GT.M db segment. * Returns TRUE if the shared memory segment is a valid GT.M db segment * (based on a check on some fields in the shared memory) else FALSE. * If the segment belongs to GT.M it returns the database file name by the second argument. * Sets exit_stat to ERR_MUNOTALLSEC if appropriate. */ boolean_t validate_db_shm_entry(shm_parms *parm_buff, char *fname, int *exit_stat) { boolean_t remove_shmid; file_control *fc; int fname_len, save_errno, status, shmid; node_local_ptr_t nl_addr; sm_uc_ptr_t start_addr; struct stat st_buff; struct shmid_ds shmstat; sgmnt_data tsd; unix_db_info *udi; char msgbuff[OUT_BUFF_SIZE]; if (NULL == parm_buff) return FALSE; /* check for the bare minimum size of the shared memory segment that we expect * (with no fileheader related information at hand) */ if (MIN_NODE_LOCAL_SPACE + SHMPOOL_SECTION_SIZE > parm_buff->sgmnt_siz) return FALSE; if (IPC_PRIVATE != parm_buff->key) return FALSE; shmid = parm_buff->shmid; /* we do not need to lock the shm for reading the rundown information as * the other rundowns (if any) can also be allowed to share reading the * same info concurrently. */ if (-1 == (sm_long_t)(start_addr = (sm_uc_ptr_t) do_shmat(shmid, 0, SHM_RND))) return FALSE; nl_addr = (node_local_ptr_t)start_addr; memcpy(fname, nl_addr->fname, MAX_FN_LEN + 1); fname[MAX_FN_LEN] = '\0'; /* make sure the fname is null terminated */ fname_len = STRLEN(fname); msgbuff[0] = '\0'; if (memcmp(nl_addr->label, GDS_LABEL, GDS_LABEL_SZ - 1)) { if (!memcmp(nl_addr->label, GDS_LABEL, GDS_LABEL_SZ - 3)) { util_out_print("Cannot rundown shmid = !UL for database !AD as it has format !AD " "but this mupip uses format !AD", TRUE, shmid, fname_len, fname, GDS_LABEL_SZ - 1, nl_addr->label, GDS_LABEL_SZ - 1, GDS_LABEL); *exit_stat = ERR_MUNOTALLSEC; } shmdt((void *)start_addr); return FALSE; } if (-1 == shmctl(shmid, IPC_STAT, &shmstat)) { save_errno = errno; assert(FALSE);/* we were able to attach to this shmid before so should be able to get stats on it */ util_out_print("!AD -> Error with shmctl for shmid = !UL", TRUE, fname_len, fname, shmid); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno); *exit_stat = ERR_MUNOTALLSEC; shmdt((void *)start_addr); return FALSE; } remove_shmid = FALSE; /* Check if db filename reported in shared memory still exists. If not, clean this shared memory section * without even invoking "mu_rndwn_file" as that expects the db file to exist. Same case if shared memory * points back to a database whose file header does not have this shmid. */ if (-1 == Stat(fname, &st_buff)) { if (ENOENT == errno) { SNPRINTF(msgbuff, OUT_BUFF_SIZE, "File %s does not exist", fname); if (1 < shmstat.shm_nattch) { PRINT_AND_SEND_DBRNDWN_FAILURE_MSG(msgbuff, fname, shmid); *exit_stat = ERR_MUNOTALLSEC; shmdt((void *)start_addr); return FALSE; } remove_shmid = TRUE; } else { /* Stat errored out e.g. due to file permissions. Log that */ save_errno = errno; util_out_print("Cannot rundown shmid !UL for database file !AD as stat() on the file" " returned the following error", TRUE, shmid, fname_len, fname); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno); *exit_stat = ERR_MUNOTALLSEC; shmdt((void *)start_addr); return FALSE; } } else { mu_gv_cur_reg_init(); gv_cur_region->dyn.addr->fname_len = strlen(fname); STRNCPY_STR(gv_cur_region->dyn.addr->fname, fname, gv_cur_region->dyn.addr->fname_len); fc = gv_cur_region->dyn.addr->file_cntl; fc->op = FC_OPEN; status = dbfilop(fc); if (SS_NORMAL != status) { util_out_print("!AD -> Error with dbfilop for shmid = !UL", TRUE, fname_len, fname, shmid); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(5) status, 2, DB_LEN_STR(gv_cur_region), errno); *exit_stat = ERR_MUNOTALLSEC; shmdt((void *)start_addr); return FALSE; } udi = FILE_INFO(gv_cur_region); LSEEKREAD(udi->fd, 0, &tsd, SIZEOF(sgmnt_data), status); if (0 != status) { save_errno = errno; util_out_print("!AD -> Error with LSEEKREAD for shmid = !UL", TRUE, fname_len, fname, shmid); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno); *exit_stat = ERR_MUNOTALLSEC; shmdt((void *)start_addr); return FALSE; } mu_gv_cur_reg_free(); if (tsd.shmid != shmid) { SNPRINTF(msgbuff, OUT_BUFF_SIZE, "Shared memory ID (%d) in the DB file header does not match with the one" " reported by \"ipcs\" command (%d)", tsd.shmid, shmid); if (1 < shmstat.shm_nattch) { PRINT_AND_SEND_DBRNDWN_FAILURE_MSG(msgbuff, fname, shmid); *exit_stat = ERR_MUNOTALLSEC; shmdt((void *)start_addr); return FALSE; } remove_shmid = TRUE; } else if (tsd.gt_shm_ctime.ctime != shmstat.shm_ctime) { SNPRINTF(msgbuff, OUT_BUFF_SIZE, "Shared memory creation time in the DB file header does not match with" " the one reported by shmctl"); if (1 < shmstat.shm_nattch) { PRINT_AND_SEND_DBRNDWN_FAILURE_MSG(msgbuff, fname, shmid); *exit_stat = ERR_MUNOTALLSEC; shmdt((void *)start_addr); return FALSE; } remove_shmid = TRUE; } } shmdt((void *)start_addr); if (remove_shmid) { assert('\0' != msgbuff[0]); if (0 != shm_rmid(shmid)) { save_errno = errno; gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_DBFILERR, 2, fname_len, fname, ERR_TEXT, 2, RTS_ERROR_TEXT("Error removing shared memory")); util_out_print("!AD -> Error removing shared memory for shmid = !UL", TRUE, fname_len, fname, shmid); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno); *exit_stat = ERR_MUNOTALLSEC; return FALSE; } PRINT_AND_SEND_SHMREMOVED_MSG(msgbuff, fname_len, fname, shmid); *exit_stat = ERR_SHMREMOVED; } else *exit_stat = SS_NORMAL; return TRUE; }
SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, unsigned long, third, void __user *, ptr, long, fifth) { int version, ret; version = call >> 16; /* hack for backward compatibility */ call &= 0xffff; switch (call) { case SEMOP: return sys_semtimedop(first, (struct sembuf __user *)ptr, second, NULL); case SEMTIMEDOP: return sys_semtimedop(first, (struct sembuf __user *)ptr, second, (const struct timespec __user *)fifth); case SEMGET: return sys_semget(first, second, third); case SEMCTL: { unsigned long arg; if (!ptr) return -EINVAL; if (get_user(arg, (unsigned long __user *) ptr)) return -EFAULT; return sys_semctl(first, second, third, arg); } case MSGSND: return sys_msgsnd(first, (struct msgbuf __user *) ptr, second, third); case MSGRCV: switch (version) { case 0: { struct ipc_kludge tmp; if (!ptr) return -EINVAL; if (copy_from_user(&tmp, (struct ipc_kludge __user *) ptr, sizeof(tmp))) return -EFAULT; return sys_msgrcv(first, tmp.msgp, second, tmp.msgtyp, third); } default: return sys_msgrcv(first, (struct msgbuf __user *) ptr, second, fifth, third); } case MSGGET: return sys_msgget((key_t) first, second); case MSGCTL: return sys_msgctl(first, second, (struct msqid_ds __user *)ptr); case SHMAT: switch (version) { default: { unsigned long raddr; ret = do_shmat(first, (char __user *)ptr, second, &raddr, SHMLBA); if (ret) return ret; return put_user(raddr, (unsigned long __user *) third); } case 1: /* * This was the entry point for kernel-originating calls * from iBCS2 in 2.2 days. */ return -EINVAL; } case SHMDT: return sys_shmdt((char __user *)ptr); case SHMGET: return sys_shmget(first, second, third); case SHMCTL: return sys_shmctl(first, second, (struct shmid_ds __user *) ptr); default: return -ENOSYS; } }
/* * sys_ipc() is the de-multiplexer for the SysV IPC calls.. * * This is really horribly ugly. */ asmlinkage int sys_ipc (uint call, int first, int second, unsigned long third, void *ptr, long fifth) { int version, ret; version = call >> 16; /* hack for backward compatibility */ call &= 0xffff; switch (call) { case SEMOP: return sys_semtimedop (first, (struct sembuf *)ptr, second, NULL); case SEMTIMEDOP: return sys_semtimedop (first, (struct sembuf *)ptr, second, (const struct timespec __user *)fifth); case SEMGET: return sys_semget (first, second, third); case SEMCTL: { union semun fourth; if (!ptr) return -EINVAL; if (get_user(fourth.__pad, (void **) ptr)) return -EFAULT; return sys_semctl (first, second, third, fourth); } case MSGSND: return sys_msgsnd (first, (struct msgbuf *) ptr, second, third); case MSGRCV: switch (version) { case 0: { struct ipc_kludge tmp; if (!ptr) return -EINVAL; if (copy_from_user(&tmp, (struct ipc_kludge *) ptr, sizeof (tmp))) return -EFAULT; return sys_msgrcv (first, tmp.msgp, second, tmp.msgtyp, third); } default: return sys_msgrcv (first, (struct msgbuf *) ptr, second, fifth, third); } case MSGGET: return sys_msgget ((key_t) first, second); case MSGCTL: return sys_msgctl (first, second, (struct msqid_ds *) ptr); case SHMAT: switch (version) { default: { ulong raddr; ret = do_shmat (first, (char *) ptr, second, &raddr); if (ret) return ret; return put_user (raddr, (ulong *) third); } case 1: /* iBCS2 emulator entry point */ if (!segment_eq(get_fs(), get_ds())) return -EINVAL; return do_shmat (first, (char *) ptr, second, (ulong *) third); } case SHMDT: return sys_shmdt ((char *)ptr); case SHMGET: return sys_shmget (first, second, third); case SHMCTL: return sys_shmctl (first, second, (struct shmid_ds *) ptr); default: return -ENOSYS; } }