STATICFNDEF int extend_wait_for_write(unix_db_info *udi, off_t new_eof, char *buff) { int to_wait, to_msg, wait_period, save_errno; /* Attempt to write every second, and send message to operator every 1/20 of cs_data->wait_disk_space */ wait_period = to_wait = DIVIDE_ROUND_UP(cs_data->wait_disk_space, CDB_STAGNATE + 1); to_msg = (to_wait / 8) ? (to_wait / 8) : 1; /* send around 8 messages during 1 wait_period */ do { if ((to_wait == cs_data->wait_disk_space) || (to_wait % to_msg == 0)) ISSUE_WAITDSKSPACE(to_wait, wait_period, send_msg_csa); hiber_start(1000); to_wait--; LSEEKWRITE(udi->fd, new_eof, buff, DISK_BLOCK_SIZE, save_errno); } while ((to_wait > 0) && (ENOSPC == save_errno)); return save_errno; }
int dsk_write (gd_region *reg, block_id blk, sm_uc_ptr_t buff) { unix_db_info *udi; int4 size, save_errno; sgmnt_addrs *csa; udi = (unix_db_info *)(reg->dyn.addr->file_cntl->file_info); csa = &udi->s_addrs; assert(csa->hdr); size = (((blk_hdr_ptr_t)buff)->bsiz + 1) & ~1; if (csa->do_fullblockwrites) { /* round size up to next full logical filesys block. */ size = ROUND_UP(size, csa->fullblockwrite_len); assert(size <= csa->hdr->blk_size); } assert(FALSE == reg->read_only); assert(dba_bg == reg->dyn.addr->acc_meth || non_buffer_write); assert(!csa->acc_meth.bg.cache_state->cache_array || buff != (sm_uc_ptr_t)csa->hdr); assert(non_buffer_write || !csa->acc_meth.bg.cache_state->cache_array || (buff >= (sm_uc_ptr_t)csa->acc_meth.bg.cache_state->cache_array + (sizeof(cache_rec) * (csa->hdr->bt_buckets + csa->hdr->n_bts)))); assert(non_buffer_write || !csa->critical || buff < (sm_uc_ptr_t)csa->critical); /* assumes critical follows immediately after the buffer pool */ assert(sizeof(blk_hdr) <= size); assert(size <= csa->hdr->blk_size); if (udi->raw) size = ROUND_UP(size, DISK_BLOCK_SIZE); /* raw I/O must be a multiple of DISK_BLOCK_SIZE */ LSEEKWRITE(udi->fd, (DISK_BLOCK_SIZE * (csa->hdr->start_vbn - 1) + (off_t)blk * csa->hdr->blk_size), buff, size, save_errno); if (0 != save_errno) /* If it didn't work for whatever reason.. */ return -1; return SS_NORMAL; }
void mupip_restore(void) { static readonly char label[] = GDS_LABEL; char db_name[MAX_FN_LEN + 1], *inbuf, *p; inc_list_struct *ptr; inc_header *inhead; sgmnt_data *old_data; short iosb[4]; unsigned short n_len; int4 status, vbn, rsize, temp, save_errno; uint4 rest_blks, totblks; trans_num curr_tn; uint4 ii; block_id blk_num; bool extend; uint4 cli_status; BFILE *in; int i, db_fd; uint4 old_blk_size, old_tot_blks, bplmap; short old_start_vbn; off_t new_eof; char buff[DISK_BLOCK_SIZE]; char msg_buffer[1024], *newmap, *newmap_bptr; mstr msg_string; char addr[SA_MAXLEN+1]; unsigned char tcp[5]; backup_type type; unsigned short port; int4 timeout, cut, match; char debug_info[256]; void (*common_read)(); char *errptr; pid_t waitpid_res; error_def(ERR_MUPRESTERR); error_def(ERR_MUPCLIERR); error_def(ERR_IOEOF); extend = TRUE; if (CLI_NEGATED == (cli_status = cli_present("EXTEND"))) extend = FALSE; mu_outofband_setup(); mu_gv_cur_reg_init(); n_len = sizeof(db_name); if (cli_get_str("DATABASE", db_name, &n_len) == FALSE) mupip_exit(ERR_MUPCLIERR); strcpy((char *)gv_cur_region->dyn.addr->fname, db_name); gv_cur_region->dyn.addr->fname_len = n_len; if (!mu_rndwn_file(gv_cur_region, TRUE)) { util_out_print("Error securing stand alone access to output file !AD. Aborting restore.", TRUE, n_len, db_name); mupip_exit(ERR_MUPRESTERR); } OPENFILE(db_name, O_RDWR, db_fd); if (-1 == db_fd) { save_errno = errno; util_out_print("Error accessing output file !AD. Aborting restore.", TRUE, n_len, db_name); errptr = (char *)STRERROR(save_errno); util_out_print("open : !AZ", TRUE, errptr); mupip_exit(save_errno); } murgetlst(); inbuf = (char*)malloc(INC_BACKUP_CHUNK_SIZE); old_data = (sgmnt_data*)malloc(sizeof(sgmnt_data)); LSEEKREAD(db_fd, 0, old_data, sizeof(sgmnt_data), save_errno); if (0 != save_errno) { util_out_print("Error accessing output file !AD. Aborting restore.", TRUE, n_len, db_name); if (-1 != save_errno) { errptr = (char *)STRERROR(save_errno); util_out_print("read : !AZ", TRUE, errptr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(save_errno); } else { db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_IOEOF); } } if (memcmp(&old_data->label[0], &label[0], GDS_LABEL_SZ)) { util_out_print("Output file !AD has an unrecognizable format", TRUE, n_len, db_name); free(old_data); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } curr_tn = old_data->trans_hist.curr_tn; old_blk_size = old_data->blk_size; old_tot_blks = old_data->trans_hist.total_blks; old_start_vbn = old_data->start_vbn; bplmap = old_data->bplmap; free(old_data); msg_string.addr = msg_buffer; msg_string.len = sizeof(msg_buffer); inhead = (inc_header *)malloc(sizeof(inc_header) + 8); inhead = (inc_header *)((((int4)inhead) + 7) & -8); rest_blks = 0; for (ptr = in_files.next; ptr; ptr = ptr->next) { /* --- determine source type --- */ type = backup_to_file; if (0 == ptr->input_file.len) continue; else if ('|' == *(ptr->input_file.addr + ptr->input_file.len - 1)) { type = backup_to_exec; ptr->input_file.len--; *(ptr->input_file.addr + ptr->input_file.len) = '\0'; } else if (ptr->input_file.len > 5) { lower_to_upper(tcp, (uchar_ptr_t)ptr->input_file.addr, 5); if (0 == memcmp(tcp, "TCP:/", 5)) { type = backup_to_tcp; cut = 5; while ('/' == *(ptr->input_file.addr + cut)) cut++; ptr->input_file.len -= cut; p = ptr->input_file.addr; while (p < ptr->input_file.addr + ptr->input_file.len) { *p = *(p + cut); p++; } *p = '\0'; } } /* --- open the input stream --- */ restore_read_errno = 0; switch(type) { case backup_to_file: common_read = iob_read; if ((in = iob_open_rd(ptr->input_file.addr, DISK_BLOCK_SIZE, BLOCKING_FACTOR)) == NULL) { save_errno = errno; util_out_print("Error accessing input file !AD. Aborting restore.", TRUE, ptr->input_file.len, ptr->input_file.addr); errptr = (char *)STRERROR(save_errno); util_out_print("open : !AZ", TRUE, errptr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(save_errno); } ESTABLISH(iob_io_error); break; case backup_to_exec: pipe_child = 0; common_read = exec_read; in = (BFILE *)malloc(sizeof(BFILE)); if (0 > (in->fd = gtm_pipe(ptr->input_file.addr, input_from_comm))) { util_out_print("Error creating input pipe from !AD.", TRUE, ptr->input_file.len, ptr->input_file.addr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } #ifdef DEBUG_ONLINE PRINTF("file descriptor for the openned pipe is %d.\n", in->fd); PRINTF("the command passed to gtm_pipe is %s.\n", ptr->input_file.addr); #endif break; case backup_to_tcp: common_read = tcp_read; /* parse the input */ switch (match = SSCANF(ptr->input_file.addr, "%[^:]:%hu", addr, &port)) { case 1 : port = DEFAULT_BKRS_PORT; case 2 : break; default : util_out_print("Error : A hostname has to be specified.", TRUE); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } if ((0 == cli_get_int("NETTIMEOUT", &timeout)) || (0 > timeout)) timeout = DEFAULT_BKRS_TIMEOUT; in = (BFILE *)malloc(sizeof(BFILE)); iotcp_fillroutine(); if (0 > (in->fd = tcp_open(addr, port, timeout, TRUE))) { util_out_print("Error establishing TCP connection to !AD.", TRUE, ptr->input_file.len, ptr->input_file.addr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } break; default: util_out_print("Aborting restore!/", TRUE); util_out_print("Unrecognized input format !AD", TRUE, ptr->input_file.len, ptr->input_file.addr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } COMMON_READ(in, inhead, sizeof(inc_header)); if (memcmp(&inhead->label[0], INC_HEADER_LABEL, INC_HDR_LABEL_SZ)) { util_out_print("Input file !AD has an unrecognizable format", TRUE, ptr->input_file.len, ptr->input_file.addr); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } if (curr_tn != inhead->start_tn) { util_out_print("Transaction in input file !AD does not align with database TN.!/DB: !XL!_Input file: !XL", TRUE, ptr->input_file.len, ptr->input_file.addr, curr_tn, inhead->start_tn); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } if (old_blk_size != inhead->blk_size) { util_out_print("Incompatable block size. Output file !AD has block size !XL,", TRUE, n_len, db_name); util_out_print("while input file !AD is from a database with block size !XL,", TRUE, ptr->input_file.len, ptr->input_file.addr, inhead->blk_size); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } if (old_tot_blks != inhead->db_total_blks) { if (old_tot_blks > inhead->db_total_blks || !extend) { totblks = old_tot_blks - DIVIDE_ROUND_UP(old_tot_blks, DISK_BLOCK_SIZE); util_out_print("Incompatable database sizes. Output file !AD has!/ !UL (!XL hex) total blocks,", TRUE, n_len, db_name, totblks, totblks); totblks = inhead->db_total_blks - DIVIDE_ROUND_UP(inhead->db_total_blks, DISK_BLOCK_SIZE); util_out_print("while input file !AD is from a database with!/ !UL (!XL hex) total blocks", TRUE, ptr->input_file.len, ptr->input_file.addr, totblks, totblks); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } else { /* this part of the code is similar to gdsfilext except that you don't need to do * most of the work that gdsfilext does. However, for situations where the database * extended since the last backup (the beginning of this incremental backup), and * there are new bitmaps that are never touched later on by GT.M, these bitmaps * will have tn == 0, which prevents the backup process to pick up these blocks, * so, we need to initialize these bitmaps here */ new_eof = ((off_t)(old_start_vbn - 1) * DISK_BLOCK_SIZE) + ((off_t)inhead->db_total_blks * old_blk_size); memset(buff, 0, DISK_BLOCK_SIZE); LSEEKWRITE(db_fd, new_eof, buff, DISK_BLOCK_SIZE, status); if (0 != status) { util_out_print("Aborting restore!/", TRUE); util_out_print("lseek or write error : Unable to extend output file !AD!/", TRUE, n_len, db_name); util_out_print(" from !UL (!XL hex) total blocks to !UL (!XL hex) total blocks.!/", TRUE, old_tot_blks, old_tot_blks, inhead->db_total_blks, inhead->db_total_blks); util_out_print(" Current input file is !AD with !UL (!XL hex) total blocks!/", TRUE, ptr->input_file.len, ptr->input_file.addr, inhead->db_total_blks, inhead->db_total_blks); gtm_putmsg(VARLSTCNT(1) status); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } /* --- initialize all new bitmaps, just in case they are not touched later --- */ if (DIVIDE_ROUND_DOWN(inhead->db_total_blks, bplmap) > DIVIDE_ROUND_DOWN(old_tot_blks, bplmap)) { /* -- similar logic exist in bml_newmap.c, which need to pick up any new updates here -- */ newmap = (char *)malloc(old_blk_size); ((blk_hdr *)newmap)->bsiz = BM_SIZE(bplmap); ((blk_hdr *)newmap)->levl = LCL_MAP_LEVL; ((blk_hdr *)newmap)->tn = curr_tn; newmap_bptr = newmap + sizeof(blk_hdr); *newmap_bptr++ = THREE_BLKS_FREE; memset(newmap_bptr, FOUR_BLKS_FREE, BM_SIZE(bplmap) - sizeof(blk_hdr) - 1); for (ii = ROUND_UP(old_tot_blks, bplmap); ii <= inhead->db_total_blks; ii += bplmap) { new_eof = (off_t)(old_start_vbn - 1) * DISK_BLOCK_SIZE + (off_t)ii * old_blk_size; LSEEKWRITE(db_fd, new_eof, newmap, old_blk_size, status); if (0 != status) { util_out_print("Aborting restore!/", TRUE); util_out_print("Bitmap 0x!XL initialization error!", TRUE, ii); gtm_putmsg(VARLSTCNT(1) status); free(inbuf); free(newmap); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } } free(newmap); } old_tot_blks = inhead->db_total_blks; } } COMMON_READ(in, &rsize, sizeof(int4)); for ( ; ;) { /* rsize is the size of the record, including the size, but, since the size has already been read in, this will read in the current record and the size for the next record */ /* ensure we have a reasonable record size, at least */ if (rsize - sizeof(int4) - sizeof(block_id) > old_blk_size) { util_out_print("Invalid information in restore file !AD. Aborting restore.", TRUE, ptr->input_file.len, ptr->input_file.addr); iob_close(in); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } COMMON_READ(in, inbuf, rsize); if (!memcmp(inbuf, &end_msg[0], sizeof end_msg - 1)) break; rest_blks++; blk_num = *(block_id*)inbuf; vbn = old_start_vbn - 1 + (old_blk_size / DISK_BLOCK_SIZE * blk_num); LSEEKWRITE(db_fd, (off_t)vbn * DISK_BLOCK_SIZE, inbuf + sizeof(block_id), rsize - sizeof(block_id) - sizeof(int4), save_errno); if (0 != save_errno) { util_out_print("Error accessing output file !AD. Aborting restore.", TRUE, n_len, db_name); errptr = (char *)STRERROR(save_errno); util_out_print("write : !AZ", TRUE, errptr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(save_errno); } GET_LONG(temp, (inbuf + rsize - sizeof(int4))); rsize = temp; } GET_LONG(temp, (inbuf + rsize - sizeof(int4))); rsize = temp; vbn = 0; for (i = 0; ; i++) /* Restore file header */ { COMMON_READ(in, inbuf, rsize); if (!memcmp(inbuf, &hdr_msg[0], sizeof hdr_msg - 1)) break; LSEEKWRITE(db_fd, vbn, inbuf, rsize - sizeof(int4), save_errno); if (0 != save_errno) { util_out_print("Error accessing output file !AD. Aborting restore.", TRUE, n_len, db_name); errptr = (char *)STRERROR(save_errno); util_out_print("write : !AZ", TRUE, errptr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(save_errno); } vbn += rsize - sizeof(int4); GET_LONG(temp, (inbuf + rsize - sizeof(int4))); rsize = temp; } curr_tn = inhead->end_tn; switch (type) { case backup_to_file: REVERT; iob_close(in); break; case backup_to_exec: close(in->fd); if ((pipe_child > 0) && (FALSE != is_proc_alive(pipe_child, 0))) WAITPID(pipe_child, (int *)&status, 0, waitpid_res); break; case backup_to_tcp: break; } } util_out_print("!/RESTORE COMPLETED", TRUE); util_out_print("!UL blocks restored", TRUE, rest_blks); free(inbuf); db_ipcs_reset(gv_cur_region, FALSE); mu_gv_cur_reg_free(); mupip_exit(SS_NORMAL); }
void db_init(gd_region *reg, sgmnt_data_ptr_t tsd) { static boolean_t mutex_init_done = FALSE; boolean_t is_bg, read_only; char machine_name[MAX_MCNAMELEN]; file_control *fc; int gethostname_res, stat_res, mm_prot; int4 status, semval, dblksize, fbwsize; sm_long_t status_l; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; struct sembuf sop[3]; struct stat stat_buf; union semun semarg; struct semid_ds semstat; struct shmid_ds shmstat; struct statvfs dbvfs; uint4 sopcnt; unix_db_info *udi; #ifdef periodic_timer_removed void periodic_flush_check(); #endif error_def(ERR_CLSTCONFLICT); error_def(ERR_CRITSEMFAIL); error_def(ERR_DBNAMEMISMATCH); error_def(ERR_DBIDMISMATCH); error_def(ERR_NLMISMATCHCALC); error_def(ERR_REQRUNDOWN); error_def(ERR_SYSCALL); assert(tsd->acc_meth == dba_bg || tsd->acc_meth == dba_mm); is_bg = (dba_bg == tsd->acc_meth); read_only = reg->read_only; new_dbinit_ipc = FALSE; /* we did not create a new ipc resource */ udi = FILE_INFO(reg); memset(machine_name, 0, sizeof(machine_name)); if (GETHOSTNAME(machine_name, MAX_MCNAMELEN, gethostname_res)) rts_error(VARLSTCNT(5) ERR_TEXT, 2, LEN_AND_LIT("Unable to get the hostname"), errno); assert(strlen(machine_name) < MAX_MCNAMELEN); csa = &udi->s_addrs; csa->db_addrs[0] = csa->db_addrs[1] = csa->lock_addrs[0] = NULL; /* to help in dbinit_ch and gds_rundown */ reg->opening = TRUE; /* * Create ftok semaphore for this region. * We do not want to make ftok counter semaphore to be 2 for on mupip journal recover process. */ if (!ftok_sem_get(reg, !mupip_jnl_recover, GTM_ID, FALSE)) rts_error(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg)); /* * At this point we have ftok_semid sempahore based on ftok key. * Any ftok conflicted region will block at this point. * Say, a.dat and b.dat both has same ftok and we have process A to access a.dat and * process B to access b.dat. In this case only one can continue to do db_init() */ fc = reg->dyn.addr->file_cntl; fc->file_type = reg->dyn.addr->acc_meth; fc->op = FC_READ; fc->op_buff = (sm_uc_ptr_t)tsd; fc->op_len = sizeof(*tsd); fc->op_pos = 1; dbfilop(fc); /* Read file header */ udi->shmid = tsd->shmid; udi->semid = tsd->semid; udi->sem_ctime = tsd->sem_ctime.ctime; udi->shm_ctime = tsd->shm_ctime.ctime; dbsecspc(reg, tsd); /* Find db segment size */ if (!mupip_jnl_recover) { if (INVALID_SEMID == udi->semid) { if (0 != udi->sem_ctime || INVALID_SHMID != udi->shmid || 0 != udi->shm_ctime) /* We must have somthing wrong in protocol or, code, if this happens */ GTMASSERT; /* * Create new semaphore using IPC_PRIVATE. System guarantees a unique id. */ if (-1 == (udi->semid = semget(IPC_PRIVATE, FTOK_SEM_PER_ID, RWDALL | IPC_CREAT))) { udi->semid = INVALID_SEMID; rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database control semget"), errno); } udi->shmid = INVALID_SHMID; /* reset shmid so dbinit_ch does not get confused in case we go there */ new_dbinit_ipc = TRUE; tsd->semid = udi->semid; semarg.val = GTM_ID; /* * Following will set semaphore number 2 (=FTOK_SEM_PER_ID - 1) value as GTM_ID. * In case we have orphaned semaphore for some reason, mupip rundown will be * able to identify GTM semaphores from the value and can remove. */ if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, SETVAL, semarg)) rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl SETVAL"), errno); /* * Warning: We must read the sem_ctime using IPC_STAT after SETVAL, which changes it. * We must NOT do any more SETVAL after this. Our design is to use * sem_ctime as creation time of semaphore. */ semarg.buf = &semstat; if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, IPC_STAT, semarg)) rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl IPC_STAT"), errno); tsd->sem_ctime.ctime = udi->sem_ctime = semarg.buf->sem_ctime; } else { if (INVALID_SHMID == udi->shmid) /* if mu_rndwn_file gets standalone access of this region and * somehow mupip process crashes, we can have semid != -1 but shmid == -1 */ rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name), ERR_TEXT, 2, LEN_AND_LIT("semid is valid but shmid is invalid")); semarg.buf = &semstat; if (-1 == semctl(udi->semid, 0, IPC_STAT, semarg)) /* file header has valid semid but semaphore does not exists */ rts_error(VARLSTCNT(6) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name)); else if (semarg.buf->sem_ctime != tsd->sem_ctime.ctime) rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name), ERR_TEXT, 2, LEN_AND_LIT("sem_ctime does not match")); if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat)) rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl"), errno); else if (shmstat.shm_ctime != tsd->shm_ctime.ctime) rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name), ERR_TEXT, 2, LEN_AND_LIT("shm_ctime does not match")); } /* We already have ftok semaphore of this region, so just plainly do semaphore operation */ /* This is the database access control semaphore for any region */ sop[0].sem_num = 0; sop[0].sem_op = 0; /* Wait for 0 */ sop[1].sem_num = 0; sop[1].sem_op = 1; /* Lock */ sopcnt = 2; if (!read_only) { sop[2].sem_num = 1; sop[2].sem_op = 1; /* increment r/w access counter */ sopcnt = 3; } sop[0].sem_flg = sop[1].sem_flg = sop[2].sem_flg = SEM_UNDO | IPC_NOWAIT; SEMOP(udi->semid, sop, sopcnt, status); if (-1 == status) { errno_save = errno; gtm_putmsg(VARLSTCNT(4) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg)); rts_error(VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("semop()"), CALLFROM, errno_save); } } else /* for mupip_jnl_recover we were already in mu_rndwn_file and got "semid" semaphore */ { if (INVALID_SEMID == udi->semid || 0 == udi->sem_ctime) /* make sure mu_rndwn_file() has reset created semaphore for standalone access */ GTMASSERT; if (INVALID_SHMID != udi->shmid || 0 != udi->shm_ctime) /* make sure mu_rndwn_file() has reset shared memory */ GTMASSERT; udi->shmid = INVALID_SHMID; /* reset shmid so dbinit_ch does not get confused in case we go there */ new_dbinit_ipc = TRUE; } sem_incremented = TRUE; if (new_dbinit_ipc) { /* Create new shared memory using IPC_PRIVATE. System guarantees a unique id */ #ifdef __MVS__ if (-1 == (status_l = udi->shmid = shmget(IPC_PRIVATE, ROUND_UP(reg->sec_size, MEGA_BOUND), __IPC_MEGA | IPC_CREAT | RWDALL))) #else if (-1 == (status_l = udi->shmid = shmget(IPC_PRIVATE, reg->sec_size, RWDALL | IPC_CREAT))) #endif { udi->shmid = status_l = INVALID_SHMID; rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database shmget"), errno); } tsd->shmid = udi->shmid; if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat)) rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl"), errno); tsd->shm_ctime.ctime = udi->shm_ctime = shmstat.shm_ctime; } #ifdef DEBUG_DB64 status_l = (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)do_shmat(udi->shmid, next_smseg, SHM_RND)); next_smseg = (sm_uc_ptr_t)ROUND_UP((sm_long_t)(next_smseg + reg->sec_size), SHMAT_ADDR_INCS); #else status_l = (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)do_shmat(udi->shmid, 0, SHM_RND)); #endif if (-1 == status_l) { rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error attaching to database shared memory"), errno); } csa->nl = (node_local_ptr_t)csa->db_addrs[0]; csa->critical = (mutex_struct_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SIZE); assert(((int)csa->critical & 0xf) == 0); /* critical should be 16-byte aligned */ #ifdef CACHELINE_SIZE assert(0 == ((int)csa->critical & (CACHELINE_SIZE - 1))); #endif /* Note: Here we check jnl_sate from database file and its value cannot change without standalone access. * The jnl_buff buffer should be initialized irrespective of read/write process */ JNL_INIT(csa, reg, tsd); csa->backup_buffer = (backup_buff_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SPACE + JNL_SHARE_SIZE(tsd)); csa->lock_addrs[0] = (sm_uc_ptr_t)csa->backup_buffer + BACKUP_BUFFER_SIZE + 1; csa->lock_addrs[1] = csa->lock_addrs[0] + LOCK_SPACE_SIZE(tsd) - 1; csa->total_blks = tsd->trans_hist.total_blks; /* For test to see if file has extended */ if (new_dbinit_ipc) { memset(csa->nl, 0, sizeof(*csa->nl)); /* We allocated shared storage -- we have to init it */ if (JNL_ALLOWED(csa)) { /* initialize jb->cycle to a value different from initial value of jpc->cycle (0). although this is not * necessary right now, in the future, the plan is to change jnl_ensure_open() to only do a cycle mismatch * check in order to determine whether to call jnl_file_open() or not. this is in preparation for that. */ csa->jnl->jnl_buff->cycle = 1; } } if (is_bg) csd = csa->hdr = (sgmnt_data_ptr_t)(csa->lock_addrs[1] + 1 + CACHE_CONTROL_SIZE(tsd)); else { csa->acc_meth.mm.mmblk_state = (mmblk_que_heads_ptr_t)(csa->lock_addrs[1] + 1); FSTAT_FILE(udi->fd, &stat_buf, stat_res); if (-1 == stat_res) rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno); mm_prot = read_only ? PROT_READ : (PROT_READ | PROT_WRITE); #ifdef DEBUG_DB64 if (-1 == (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)mmap((caddr_t)get_mmseg((size_t)stat_buf.st_size), (size_t)stat_buf.st_size, mm_prot, GTM_MM_FLAGS, udi->fd, (off_t)0))) rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno); put_mmseg((caddr_t)(csa->db_addrs[0]), (size_t)stat_buf.st_size); #else if (-1 == (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)mmap((caddr_t)NULL, (size_t)stat_buf.st_size, mm_prot, GTM_MM_FLAGS, udi->fd, (off_t)0))) rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno); #endif csa->db_addrs[1] = csa->db_addrs[0] + stat_buf.st_size - 1; csd = csa->hdr = (sgmnt_data_ptr_t)csa->db_addrs[0]; } if (!csa->nl->glob_sec_init) { assert(new_dbinit_ipc); if (is_bg) *csd = *tsd; if (csd->machine_name[0]) /* crash occured */ { if (0 != memcmp(csd->machine_name, machine_name, MAX_MCNAMELEN)) /* crashed on some other node */ rts_error(VARLSTCNT(6) ERR_CLSTCONFLICT, 4, DB_LEN_STR(reg), LEN_AND_STR(csd->machine_name)); else rts_error(VARLSTCNT(6) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(csd->machine_name)); } if (is_bg) { bt_malloc(csa); csa->nl->cache_off = -CACHE_CONTROL_SIZE(tsd); db_csh_ini(csa); } db_csh_ref(csa); strcpy(csa->nl->machine_name, machine_name); /* machine name */ assert(MAX_REL_NAME > gtm_release_name_len); memcpy(csa->nl->now_running, gtm_release_name, gtm_release_name_len + 1); /* GT.M release name */ memcpy(csa->nl->label, GDS_LABEL, GDS_LABEL_SZ - 1); /* GDS label */ memcpy(csa->nl->fname, reg->dyn.addr->fname, reg->dyn.addr->fname_len); /* database filename */ csa->nl->creation_date_time = csd->creation.date_time; csa->nl->highest_lbm_blk_changed = -1; csa->nl->wcs_timers = -1; csa->nl->nbb = BACKUP_NOT_IN_PROGRESS; csa->nl->unique_id.uid = FILE_INFO(reg)->fileid; /* save what file we initialized this storage for */ /* save pointers in csa to access shared memory */ csa->nl->critical = (sm_off_t)((sm_uc_ptr_t)csa->critical - (sm_uc_ptr_t)csa->nl); if (JNL_ALLOWED(csa)) csa->nl->jnl_buff = (sm_off_t)((sm_uc_ptr_t)csa->jnl->jnl_buff - (sm_uc_ptr_t)csa->nl); csa->nl->backup_buffer = (sm_off_t)((sm_uc_ptr_t)csa->backup_buffer - (sm_uc_ptr_t)csa->nl); csa->nl->hdr = (sm_off_t)((sm_uc_ptr_t)csd - (sm_uc_ptr_t)csa->nl); csa->nl->lock_addrs = (sm_off_t)((sm_uc_ptr_t)csa->lock_addrs[0] - (sm_uc_ptr_t)csa->nl); if (!read_only || is_bg) { csd->trans_hist.early_tn = csd->trans_hist.curr_tn; csd->max_update_array_size = csd->max_non_bm_update_array_size = ROUND_UP2(MAX_NON_BITMAP_UPDATE_ARRAY_SIZE(csd), UPDATE_ARRAY_ALIGN_SIZE); csd->max_update_array_size += ROUND_UP2(MAX_BITMAP_UPDATE_ARRAY_SIZE, UPDATE_ARRAY_ALIGN_SIZE); /* add current db_csh counters into the cumulative counters and reset the current counters */ #define TAB_DB_CSH_ACCT_REC(COUNTER, DUMMY1, DUMMY2) \ csd->COUNTER.cumul_count += csd->COUNTER.curr_count; \ csd->COUNTER.curr_count = 0; #include "tab_db_csh_acct_rec.h" #undef TAB_DB_CSH_ACCT_REC } if (!read_only) { if (is_bg) { assert(memcmp(csd, GDS_LABEL, GDS_LABEL_SZ - 1) == 0); LSEEKWRITE(udi->fd, (off_t)0, (sm_uc_ptr_t)csd, sizeof(sgmnt_data), errno_save); if (0 != errno_save) { rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database write"), errno_save); } } } reg->dyn.addr->ext_blk_count = csd->extension_size; mlk_shr_init(csa->lock_addrs[0], csd->lock_space_size, csa, (FALSE == read_only)); DEBUG_ONLY(locknl = csa->nl;) /* for DEBUG_ONLY LOCK_HIST macro */
int4 mupip_set_file(int db_fn_len, char *db_fn) { bool got_standalone; boolean_t bypass_partial_recov, need_standalone = FALSE; char acc_spec[MAX_ACC_METH_LEN], ver_spec[MAX_DB_VER_LEN], exit_stat, *fn; unsigned short acc_spec_len = MAX_ACC_METH_LEN, ver_spec_len = MAX_DB_VER_LEN; int fd, fn_len; int4 status; int4 status1; int glbl_buff_status, defer_status, rsrvd_bytes_status, extn_count_status, lock_space_status, disk_wait_status; int4 new_disk_wait, new_cache_size, new_extn_count, new_lock_space, reserved_bytes, defer_time; sgmnt_data_ptr_t csd; tp_region *rptr, single; enum db_acc_method access, access_new; enum db_ver desired_dbver; gd_region *temp_cur_region; char *errptr, *command = "MUPIP SET VERSION"; int save_errno; error_def(ERR_DBPREMATEOF); error_def(ERR_DBRDERR); error_def(ERR_DBRDONLY); error_def(ERR_INVACCMETHOD); error_def(ERR_MUNOACTION); error_def(ERR_RBWRNNOTCHG); error_def(ERR_WCERRNOTCHG); error_def(ERR_WCWRNNOTCHG); error_def(ERR_MMNODYNDWNGRD); exit_stat = EXIT_NRM; defer_status = cli_present("DEFER_TIME"); if (defer_status) need_standalone = TRUE; bypass_partial_recov = cli_present("PARTIAL_RECOV_BYPASS") == CLI_PRESENT; if (bypass_partial_recov) need_standalone = TRUE; if (disk_wait_status = cli_present("WAIT_DISK")) { if (cli_get_int("WAIT_DISK", &new_disk_wait)) { if (new_disk_wait < 0) { util_out_print("!UL negative, minimum WAIT_DISK allowed is 0.", TRUE, new_disk_wait); return (int4)ERR_WCWRNNOTCHG; } need_standalone = TRUE; } else { util_out_print("Error getting WAIT_DISK qualifier value", TRUE); return (int4)ERR_WCWRNNOTCHG; } } if (glbl_buff_status = cli_present("GLOBAL_BUFFERS")) { if (cli_get_int("GLOBAL_BUFFERS", &new_cache_size)) { if (new_cache_size > WC_MAX_BUFFS) { util_out_print("!UL too large, maximum write cache buffers allowed is !UL", TRUE, new_cache_size, WC_MAX_BUFFS); return (int4)ERR_WCWRNNOTCHG; } if (new_cache_size < WC_MIN_BUFFS) { util_out_print("!UL too small, minimum cache buffers allowed is !UL", TRUE, new_cache_size, WC_MIN_BUFFS); return (int4)ERR_WCWRNNOTCHG; } } else { util_out_print("Error getting GLOBAL BUFFER qualifier value", TRUE); return (int4)ERR_WCWRNNOTCHG; } need_standalone = TRUE; } /* EXTENSION_COUNT does not require standalone access and hence need_standalone will not be set to TRUE for this. */ if (extn_count_status = cli_present("EXTENSION_COUNT")) { if (cli_get_int("EXTENSION_COUNT", &new_extn_count)) { if (new_extn_count > MAX_EXTN_COUNT) { util_out_print("!UL too large, maximum extension count allowed is !UL", TRUE, new_extn_count, MAX_EXTN_COUNT); return (int4)ERR_WCWRNNOTCHG; } if (new_extn_count < MIN_EXTN_COUNT) { util_out_print("!UL too small, minimum extension count allowed is !UL", TRUE, new_extn_count, MIN_EXTN_COUNT); return (int4)ERR_WCWRNNOTCHG; } } else { util_out_print("Error getting EXTENSION COUNT qualifier value", TRUE); return (int4)ERR_WCWRNNOTCHG; } } if (lock_space_status = cli_present("LOCK_SPACE")) { if (cli_get_int("LOCK_SPACE", &new_lock_space)) { if (new_lock_space > MAX_LOCK_SPACE) { util_out_print("!UL too large, maximum lock space allowed is !UL", TRUE, new_lock_space, MAX_LOCK_SPACE); return (int4)ERR_WCWRNNOTCHG; } else if (new_lock_space < MIN_LOCK_SPACE) { util_out_print("!UL too small, minimum lock space allowed is !UL", TRUE, new_lock_space, MIN_LOCK_SPACE); return (int4)ERR_WCWRNNOTCHG; } } else { util_out_print("Error getting LOCK_SPACE qualifier value", TRUE); return (int4)ERR_WCWRNNOTCHG; } need_standalone = TRUE; } if (rsrvd_bytes_status = cli_present("RESERVED_BYTES")) { if (!cli_get_int("RESERVED_BYTES", &reserved_bytes)) { util_out_print("Error getting RESERVED BYTES qualifier value", TRUE); return (int4)ERR_RBWRNNOTCHG; } need_standalone = TRUE; } if (cli_present("ACCESS_METHOD")) { cli_get_str("ACCESS_METHOD", acc_spec, &acc_spec_len); cli_strupper(acc_spec); if (0 == memcmp(acc_spec, "MM", acc_spec_len)) access = dba_mm; else if (0 == memcmp(acc_spec, "BG", acc_spec_len)) access = dba_bg; else mupip_exit(ERR_INVACCMETHOD); need_standalone = TRUE; } else access = n_dba; /* really want to keep current method, which has not yet been read */ if (cli_present("VERSION")) { assert(!need_standalone); cli_get_str("VERSION", ver_spec, &ver_spec_len); cli_strupper(ver_spec); if (0 == memcmp(ver_spec, "V4", ver_spec_len)) desired_dbver = GDSV4; else if (0 == memcmp(ver_spec, "V5", ver_spec_len)) desired_dbver = GDSV5; else GTMASSERT; /* CLI should prevent us ever getting here */ } else desired_dbver = GDSVLAST; /* really want to keep version, which has not yet been read */ if (region) rptr = grlist; else { rptr = &single; memset(&single, 0, sizeof(single)); } csd = (sgmnt_data *)malloc(ROUND_UP(sizeof(sgmnt_data), DISK_BLOCK_SIZE)); in_backup = FALSE; /* Only want yes/no from mupfndfil, not an address */ for (; rptr != NULL; rptr = rptr->fPtr) { if (region) { if (dba_usr == rptr->reg->dyn.addr->acc_meth) { util_out_print("!/Region !AD is not a GDS access type", TRUE, REG_LEN_STR(rptr->reg)); exit_stat |= EXIT_WRN; continue; } if (!mupfndfil(rptr->reg, NULL)) continue; fn = (char *)rptr->reg->dyn.addr->fname; fn_len = rptr->reg->dyn.addr->fname_len; } else { fn = db_fn; fn_len = db_fn_len; } mu_gv_cur_reg_init(); strcpy((char *)gv_cur_region->dyn.addr->fname, fn); gv_cur_region->dyn.addr->fname_len = fn_len; if (!need_standalone) { gvcst_init(gv_cur_region); change_reg(); /* sets cs_addrs and cs_data */ if (gv_cur_region->read_only) { gtm_putmsg(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); exit_stat |= EXIT_ERR; gds_rundown(); mu_gv_cur_reg_free(); continue; } grab_crit(gv_cur_region); status = EXIT_NRM; access_new = (n_dba == access ? cs_data->acc_meth : access); /* recalculate; n_dba is a proxy for no change */ change_fhead_timer("FLUSH_TIME", cs_data->flush_time, (dba_bg == access_new ? TIM_FLU_MOD_BG : TIM_FLU_MOD_MM), FALSE); if (GDSVLAST != desired_dbver) { if ((dba_mm != access_new) || (GDSV4 != desired_dbver)) status1 = desired_db_format_set(gv_cur_region, desired_dbver, command); else { status1 = ERR_MMNODYNDWNGRD; gtm_putmsg(VARLSTCNT(4) status1, 2, REG_LEN_STR(gv_cur_region)); } if (SS_NORMAL != status1) { /* "desired_db_format_set" would have printed appropriate error messages */ if (ERR_MUNOACTION != status1) { /* real error occurred while setting the db format. skip to next region */ status = EXIT_ERR; } } } if (EXIT_NRM == status) { if (extn_count_status) cs_data->extension_size = (uint4)new_extn_count; wcs_flu(WCSFLU_FLUSH_HDR); if (extn_count_status) util_out_print("Database file !AD now has extension count !UL", TRUE, fn_len, fn, cs_data->extension_size); if (GDSVLAST != desired_dbver) util_out_print("Database file !AD now has desired DB format !AD", TRUE, fn_len, fn, LEN_AND_STR(gtm_dbversion_table[cs_data->desired_db_format])); } else exit_stat |= status; rel_crit(gv_cur_region); gds_rundown(); } else { /* Following part needs standalone access */ assert(GDSVLAST == desired_dbver); got_standalone = mu_rndwn_file(gv_cur_region, TRUE); if (FALSE == got_standalone) return (int4)ERR_WCERRNOTCHG; /* we should open it (for changing) after mu_rndwn_file, since mu_rndwn_file changes the file header too */ if (-1 == (fd = OPEN(fn, O_RDWR))) { save_errno = errno; errptr = (char *)STRERROR(save_errno); util_out_print("open : !AZ", TRUE, errptr); exit_stat |= EXIT_ERR; db_ipcs_reset(gv_cur_region, FALSE); mu_gv_cur_reg_free(); continue; } LSEEKREAD(fd, 0, csd, sizeof(sgmnt_data), status); if (0 != status) { save_errno = errno; PERROR("Error reading header of file"); errptr = (char *)STRERROR(save_errno); util_out_print("read : !AZ", TRUE, errptr); util_out_print("Error reading header of file", TRUE); util_out_print("Database file !AD not changed: ", TRUE, fn_len, fn); if (-1 != status) rts_error(VARLSTCNT(4) ERR_DBRDERR, 2, fn_len, fn); else rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); } if (rsrvd_bytes_status) { if (reserved_bytes > MAX_RESERVE_B(csd)) { util_out_print("!UL too large, maximum reserved bytes allowed is !UL for database file !AD", TRUE, reserved_bytes, MAX_RESERVE_B(csd), fn_len, fn); close(fd); db_ipcs_reset(gv_cur_region, FALSE); return (int4)ERR_RBWRNNOTCHG; } csd->reserved_bytes = reserved_bytes; } access_new = (n_dba == access ? csd->acc_meth : access); /* recalculate; n_dba is a proxy for no change */ change_fhead_timer("FLUSH_TIME", csd->flush_time, (dba_bg == access_new ? TIM_FLU_MOD_BG : TIM_FLU_MOD_MM), FALSE); if ((n_dba != access) && (csd->acc_meth != access)) /* n_dba is a proxy for no change */ { if (dba_mm == access) csd->defer_time = 1; /* defer defaults to 1 */ csd->acc_meth = access; if (0 == csd->n_bts) { csd->n_bts = WC_DEF_BUFFS; csd->bt_buckets = getprime(csd->n_bts); } } if (glbl_buff_status) { csd->n_bts = BT_FACTOR(new_cache_size); csd->bt_buckets = getprime(csd->n_bts); csd->n_wrt_per_flu = 7; csd->flush_trigger = FLUSH_FACTOR(csd->n_bts); } if (disk_wait_status) csd->wait_disk_space = new_disk_wait; if (extn_count_status) csd->extension_size = (uint4)new_extn_count; if (lock_space_status) csd->lock_space_size = (uint4)new_lock_space * OS_PAGELET_SIZE; if (bypass_partial_recov) { csd->file_corrupt = FALSE; util_out_print("Database file !AD now has partial recovery flag set to !UL(FALSE) ", TRUE, fn_len, fn, csd->file_corrupt); } if (dba_mm == access_new) { if (CLI_NEGATED == defer_status) csd->defer_time = 0; else if (CLI_PRESENT == defer_status) { if (!cli_get_num("DEFER_TIME", &defer_time)) { util_out_print("Error getting DEFER_TIME qualifier value", TRUE); db_ipcs_reset(gv_cur_region, FALSE); return (int4)ERR_RBWRNNOTCHG; } if (-1 > defer_time) { util_out_print("DEFER_TIME cannot take negative values less than -1", TRUE); util_out_print("Database file !AD not changed", TRUE, fn_len, fn); exit_stat |= EXIT_WRN; db_ipcs_reset(gv_cur_region, FALSE); mu_gv_cur_reg_free(); continue; } csd->defer_time = defer_time; } if (csd->blks_to_upgrd) { util_out_print("MM access method cannot be set if there are blocks to upgrade", TRUE); util_out_print("Database file !AD not changed", TRUE, fn_len, fn); exit_stat |= EXIT_WRN; db_ipcs_reset(gv_cur_region, FALSE); mu_gv_cur_reg_free(); continue; } if (GDSVCURR != csd->desired_db_format) { util_out_print("MM access method cannot be set in DB compatibility mode", TRUE); util_out_print("Database file !AD not changed", TRUE, fn_len, fn); exit_stat |= EXIT_WRN; db_ipcs_reset(gv_cur_region, FALSE); mu_gv_cur_reg_free(); continue; } if (JNL_ENABLED(csd) && csd->jnl_before_image) { util_out_print("MM access method cannot be set with BEFORE image journaling", TRUE); util_out_print("Database file !AD not changed", TRUE, fn_len, fn); exit_stat |= EXIT_WRN; db_ipcs_reset(gv_cur_region, FALSE); mu_gv_cur_reg_free(); continue; } csd->jnl_before_image = FALSE; } else { if (defer_status) { util_out_print("DEFER cannot be specified with BG access method.", TRUE); util_out_print("Database file !AD not changed", TRUE, fn_len, fn); exit_stat |= EXIT_WRN; db_ipcs_reset(gv_cur_region, FALSE); mu_gv_cur_reg_free(); continue; } } LSEEKWRITE(fd, 0, csd, sizeof(sgmnt_data), status); if (0 != status) { save_errno = errno; errptr = (char *)STRERROR(save_errno); util_out_print("write : !AZ", TRUE, errptr); util_out_print("Error writing header of file", TRUE); util_out_print("Database file !AD not changed: ", TRUE, fn_len, fn); rts_error(VARLSTCNT(4) ERR_DBRDERR, 2, fn_len, fn); } close(fd); /* --------------------- report results ------------------------- */ if (glbl_buff_status) util_out_print("Database file !AD now has !UL global buffers", TRUE, fn_len, fn, csd->n_bts); if (defer_status && (dba_mm == csd->acc_meth)) util_out_print("Database file !AD now has defer_time set to !SL", TRUE, fn_len, fn, csd->defer_time); if (rsrvd_bytes_status) util_out_print("Database file !AD now has !UL reserved bytes", TRUE, fn_len, fn, csd->reserved_bytes); if (extn_count_status) util_out_print("Database file !AD now has extension count !UL", TRUE, fn_len, fn, csd->extension_size); if (lock_space_status) util_out_print("Database file !AD now has lock space !UL pages", TRUE, fn_len, fn, csd->lock_space_size/OS_PAGELET_SIZE); if (disk_wait_status) util_out_print("Database file !AD now has wait disk set to !UL seconds", TRUE, fn_len, fn, csd->wait_disk_space); db_ipcs_reset(gv_cur_region, FALSE); } /* end of else part if (!need_standalone) */ mu_gv_cur_reg_free(); } free(csd); assert(!(exit_stat & EXIT_INF)); return (exit_stat & EXIT_ERR ? (int4)ERR_WCERRNOTCHG : (exit_stat & EXIT_WRN ? (int4)ERR_WCWRNNOTCHG : SS_NORMAL)); }
void mupip_upgrade(void) { bool rbno; unsigned char *upgrd_buff[2], upgrd_label[GDS_LABEL_SZ]="UPGRADE0304"; char fn[256]; char answer[4]; unsigned short fn_len; int4 fd, save_errno, old_hdr_size, new_hdr_size, status, bufsize, dsize, datasize[2]; int4 old_hdr_size_vbn, new_hdr_size_vbn; int fstat_res; off_t last_full_grp_startoff, old_file_len, old_file_len2, read_off, write_off, old_start_vbn_off; block_id last_full_grp_startblk; v3_sgmnt_data old_head_data, *old_head; sgmnt_data new_head_data, *new_head; struct stat stat_buf; error_def(ERR_MUNODBNAME); error_def(ERR_MUNOUPGRD); error_def(ERR_DBOPNERR); error_def(ERR_DBRDONLY); error_def(ERR_DBFILOPERR); error_def(ERR_DBPREMATEOF); ESTABLISH(mupip_upgrade_ch); fn_len = sizeof(fn); if (!cli_get_str("FILE", fn, &fn_len)) rts_error(VARLSTCNT(1) ERR_MUNODBNAME); if (!(mupip_upgrade_standalone(fn, &upgrade_standalone_sems))) rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); if (-1 == (fd = OPEN(fn, O_RDWR))) { save_errno = errno; if (-1 != (fd = OPEN(fn, O_RDONLY))) { util_out_print("Cannot update read-only database.", FLUSH); rts_error(VARLSTCNT(5) ERR_DBRDONLY, 2, fn_len, fn, errno); } rts_error(VARLSTCNT(5) ERR_DBRDONLY, 2, fn_len, fn, save_errno); } /* Confirm before proceed */ if (!mu_upgrd_confirmed(TRUE)) { util_out_print("Upgrade canceled by user", FLUSH); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } util_out_print("Do not interrupt to avoid damage in database!!", FLUSH); util_out_print("Mupip upgrade started ...!/", FLUSH); mu_upgrd_sig_init(); /* get file status */ FSTAT_FILE(fd, &stat_buf, fstat_res); if (-1 == fstat_res) rts_error(VARLSTCNT(5) ERR_DBOPNERR, 2, fn_len, fn, errno); old_file_len = stat_buf.st_size; /* Prepare v3.x file header buffer */ old_hdr_size = sizeof(*old_head); old_head = &old_head_data; /* Prepare v4.x file header buffer */ new_hdr_size = sizeof(*new_head); new_head = &new_head_data; memset(new_head, 0, new_hdr_size); old_hdr_size_vbn = DIVIDE_ROUND_UP(old_hdr_size, DISK_BLOCK_SIZE); new_hdr_size_vbn = DIVIDE_ROUND_UP(new_hdr_size, DISK_BLOCK_SIZE); /* READ header from V3.x file */ LSEEKREAD(fd, 0, old_head, old_hdr_size, status); if (0 != status) if (-1 == status) rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); else rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); /* Check version */ if (memcmp(&old_head->label[0], GDS_LABEL, GDS_LABEL_SZ - 1)) { if (memcmp(&old_head->label[0], GDS_LABEL, GDS_LABEL_SZ - 3)) { /* it is not a GTM database */ close(fd); util_out_print("File !AD is not a GT.M database.!/", FLUSH, fn_len, fn); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); }else { /* it is GTM database */ /* is it not v3.x database? */ if (memcmp(&old_head->label[GDS_LABEL_SZ - 3],GDS_V30,2) !=0 && memcmp(&old_head->label[GDS_LABEL_SZ - 3],GDS_ALT_V30,2) != 0) { close(fd); util_out_print("File !AD has an unrecognized database version!/", FLUSH, fn_len, fn); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } } } else { /* Note: We assume that if the V4.x header and current GT.M file header * has same field names, they are at same offset */ /* READ the header from file again as V4.x header */ LSEEKREAD(fd, 0, new_head, new_hdr_size, status); if (0 != status) if (-1 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); else rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); if (QWNE(new_head->reg_seqno, seq_num_zero) || QWNE(new_head->resync_seqno, seq_num_zero) || (new_head->resync_tn != 0) || new_head->repl_state != repl_closed) { util_out_print("!AD might already have been upgraded", FLUSH, fn_len, fn); util_out_print("Do you wish to continue with the upgrade? [y/n] ", FLUSH); SCANF("%s", answer); if (answer[0] != 'y' && answer[0] != 'Y') { close(fd); util_out_print("Upgrade canceled by user", FLUSH); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } } init_replication(new_head); new_head->max_update_array_size = new_head->max_non_bm_update_array_size = ROUND_UP2(MAX_NON_BITMAP_UPDATE_ARRAY_SIZE(new_head), UPDATE_ARRAY_ALIGN_SIZE); new_head->max_update_array_size += ROUND_UP2(MAX_BITMAP_UPDATE_ARRAY_SIZE, UPDATE_ARRAY_ALIGN_SIZE); new_head->mutex_spin_parms.mutex_hard_spin_count = MUTEX_HARD_SPIN_COUNT; new_head->mutex_spin_parms.mutex_sleep_spin_count = MUTEX_SLEEP_SPIN_COUNT; new_head->mutex_spin_parms.mutex_spin_sleep_mask = MUTEX_SPIN_SLEEP_MASK; new_head->semid = INVALID_SEMID; new_head->shmid = INVALID_SHMID; if (JNL_ALLOWED(new_head)) { /* Following 3 are new fields starting from V43001. * Initialize them appropriately. */ new_head->epoch_interval = DEFAULT_EPOCH_INTERVAL; new_head->alignsize = DISK_BLOCK_SIZE * JNL_DEF_ALIGNSIZE; if (!new_head->jnl_alq) new_head->jnl_alq = JNL_ALLOC_DEF; /* note new_head->jnl_deq is carried over without any change even if it is zero since a zero * jnl file extension size is supported starting V43001 */ new_head->autoswitchlimit = ALIGNED_ROUND_DOWN(JNL_ALLOC_MAX, new_head->jnl_alq, new_head->jnl_deq); /* following field is assumed as non-zero by set_jnl_info starting V43001A */ if (JNL_ALLOWED(new_head) && !new_head->jnl_buffer_size) new_head->jnl_buffer_size = JNL_BUFFER_DEF; } else { new_head->epoch_interval = 0; new_head->alignsize = 0; new_head->autoswitchlimit = 0; } new_head->yield_lmt = DEFAULT_YIELD_LIMIT; /* writing header */ LSEEKWRITE(fd, 0, new_head, new_hdr_size, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); close(fd); util_out_print("File !AD successfully upgraded.!/", FLUSH, fn_len, fn); if (0 != sem_rmid(upgrade_standalone_sems)) { util_out_print("Error with sem_rmid : %d [0x%x]", TRUE, upgrade_standalone_sems, upgrade_standalone_sems); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } mupip_exit(SS_NORMAL); } util_out_print("Old header size: !SL", FLUSH, old_hdr_size); util_out_print("New header size: !SL", FLUSH, new_hdr_size); if (old_head->createinprogress) { close(fd); util_out_print("Database creation in progress on file !AD.!/", FLUSH, fn_len, fn); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } if (old_head->file_corrupt) { close(fd); util_out_print("Database !AD is corrupted.!/", FLUSH, fn_len, fn); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } if ((((off_t)old_head->start_vbn - 1) * DISK_BLOCK_SIZE + (off_t)old_head->trans_hist.total_blks * old_head->blk_size + (off_t)DISK_BLOCK_SIZE != old_file_len) && (((off_t)old_head->start_vbn - 1) * DISK_BLOCK_SIZE + (off_t)old_head->trans_hist.total_blks * old_head->blk_size + (off_t)old_head->blk_size != old_file_len)) { util_out_print("Incorrect start_vbn !SL or, block size !SL or, total blocks !SL", FLUSH, old_head->start_vbn, old_head->blk_size, old_head->trans_hist.total_blks); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } if (ROUND_DOWN(old_head->blk_size, DISK_BLOCK_SIZE) != old_head->blk_size) { util_out_print("Database block size !SL is not divisible by DISK_BLOCK_SIZE", FLUSH, old_head->blk_size); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } mu_upgrd_header(old_head, new_head); /* Update header from v3.x to v4.x */ new_head->start_vbn = new_hdr_size_vbn + 1; new_head->free_space = 0; new_head->wc_blocked_t_end_hist.evnt_cnt = old_head->wc_blocked_t_end_hist2.evnt_cnt; new_head->wc_blocked_t_end_hist.evnt_tn = old_head->wc_blocked_t_end_hist2.evnt_tn; init_replication(new_head); /* A simple way of doing mupip upgrade is to move all the data after file header towards the eof to make space and write down the header. This does not need any computation or, change in data/index blocks. This is a slow process because it has mainly I/O, though no manipulation of database structures. or index blocks. This is okay for small database. A time efficient way is to physically move second group of BLKS_PER_LMAP number of blocks towards the eof and move first group of BLKS_PER_LMAP number of blocks in place of 2nd group. Finally adjust all indices to point to the blocks correctly. Also adjust master bit map. (note: we cannot move first group from the beginning). Detail algorithm as follows: --------------------------- // Allocate two buffers each to hold one group of data. Read v3.x header and upgrade to v4.x if file is big enough read group 1 in buff[0] read_off = offset of starting block of 2nd group. read group 2 in buff[1] write buff[0] at offset read_off last_full_grp_startblk = points to the block where 2nd group of 512 blocks of old file will be written back. //Instead of searching for a free group we will write at the last full group //Say, we have 3000 blocks. last_full_grp_startblk = 2048 // (not 2560, because it is not full) //All data from that point upto eof will be read and saved in buffer read all remaining data from the point last_full_grp_startblk upto eof in buff[0] write buff[1] at the point of last_full_grp_startblk Now write buff[0] at the end of last write //Graphical Example: Each letter corresponds to a group of 512 blocks where first block // is local bit map. Last group U may be a group of less than 512 blocks. // Extend towards right -------------------------------------------------------> // old permutation: [v3 head] A B C D E F G H I J K L M N O P Q R S T U // new permutation: [v4 head ] A C D E F G H I J K L M N O P Q R S T B U Finally traverse the tree and adjust block pointers Adjust master map write new v4.x header at bof else bufsize = size of data for a group rbno = 0 // read buffer no. This switches between 0 and 1 read_off = 0 write_off = 0 upgrd_buff[rbno] = new header data_size[rbno] = new header size rbno = INVERT(rbno); do while not eof data_size[rbno] = MIN(bufsize, remaining_data_size) Read data of size data_size[rbno] in upgrd_buff[rbno] and adjust read_off rbno = INVERT(rbno); Write upgrd_buff[rbno] of datasize[rbno] at write_off and increase write_off Enddo rbno = INVERT(rbno) Write upgrd_buff[rbno] of datasize[rbno] at write_off endif */ bufsize = old_head->blk_size * BLKS_PER_LMAP; upgrd_buff[0] = (unsigned char*) malloc(bufsize); upgrd_buff[1] = (unsigned char*) malloc(bufsize); read_off = old_start_vbn_off = (off_t)(old_head->start_vbn - 1) * DISK_BLOCK_SIZE; /* start vbn offset in bytes */ last_full_grp_startblk = ROUND_DOWN(new_head->trans_hist.total_blks, BLKS_PER_LMAP); /* in block_id */ last_full_grp_startoff = old_start_vbn_off + (off_t)last_full_grp_startblk * new_head->blk_size; /* offset in bytes */ /* this calculation is used because some 3.2x database has GDS blk_size bytes at the end instead of DISK_BLOCK_SIZE bytes. */ old_file_len2 = old_head->start_vbn * DISK_BLOCK_SIZE + (off_t)old_head->blk_size * old_head->trans_hist.total_blks; /* Change Label to a temporary dummy value, so that other GTM process does not come while doing upgrade and corrupts database */ LSEEKWRITE(fd, 0, upgrd_label, GDS_LABEL_SZ - 1, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); if (old_head->trans_hist.total_blks > BLKS_PER_LMAP * 2) { /* recalculate start_vbn and free space, because there will be a gap after header */ new_head->start_vbn = old_head->start_vbn + bufsize / DISK_BLOCK_SIZE; new_head->free_space = bufsize - (new_hdr_size_vbn - old_hdr_size_vbn) * DISK_BLOCK_SIZE; util_out_print("New starting VBN is: !SL !/", FLUSH, new_head->start_vbn); /* read 1st group of blocks */ LSEEKREAD(fd, read_off, upgrd_buff[0], bufsize, status); if (0 != status) if (-1 == status) rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); else rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); read_off = read_off + bufsize; /* read 2nd group of blocks */ LSEEKREAD(fd, read_off, upgrd_buff[1], bufsize, status); if (0 != status) if (-1 == status) rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); else rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); /* write 1st group of blocks in place of 2nd group */ write_off = old_start_vbn_off + bufsize; LSEEKWRITE(fd, write_off, upgrd_buff[0], bufsize, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); /* read last group (# of blks <= BLKS_PER_LMAP) */ dsize = old_file_len2 - last_full_grp_startoff; assert (dsize <= bufsize); LSEEKREAD(fd, last_full_grp_startoff, upgrd_buff[0], dsize, status); if (0 != status) if (-1 == status) rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); else rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); /* write 2nd group of blocks */ LSEEKWRITE(fd, last_full_grp_startoff, upgrd_buff[1], bufsize, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); /* write last group read from old file */ LSEEKWRITE(fd, last_full_grp_startoff + bufsize, upgrd_buff[0], dsize, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); util_out_print("Please wait while index is being adjusted...!/", FLUSH); mu_upgrd_adjust_blkptr(1L, TRUE, new_head, fd, fn, fn_len); mu_upgrd_adjust_mm(new_head->master_map, DIVIDE_ROUND_UP(new_head->trans_hist.total_blks+1,BLKS_PER_LMAP)); /* writing header */ LSEEKWRITE(fd, 0, new_head, new_hdr_size, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); } else /* very small database */ { rbno = 0; write_off = 0; datasize[rbno] = new_hdr_size; memcpy(upgrd_buff[0], new_head, new_hdr_size); rbno = INVERT(rbno); while(read_off < old_file_len2) { datasize[rbno] = MIN (old_file_len2 - read_off, bufsize); LSEEKREAD(fd, read_off, upgrd_buff[rbno], datasize[rbno], status); if (0 != status) if (-1 == status) rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); else rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); read_off += datasize[rbno]; rbno = INVERT(rbno); LSEEKWRITE(fd, write_off, upgrd_buff[rbno], datasize[rbno], status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); write_off+= datasize[rbno]; } rbno = INVERT(rbno); LSEEKWRITE(fd, write_off, upgrd_buff[rbno], datasize[rbno], status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); } /* end if small database */ free(upgrd_buff[0]); free(upgrd_buff[1]); close(fd); util_out_print("File !AD successfully upgraded.!/", FLUSH, fn_len, fn); REVERT; if (0 != sem_rmid(upgrade_standalone_sems)) { util_out_print("Error with sem_rmid : %d [0x%x]", TRUE, upgrade_standalone_sems, upgrade_standalone_sems); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } mupip_exit(SS_NORMAL); }
uint4 gdsfilext (uint4 blocks, uint4 filesize) { sm_uc_ptr_t old_base[2]; boolean_t was_crit, need_to_restore_mask = FALSE; char *buff; int mm_prot, result, save_errno, status; uint4 new_bit_maps, bplmap, map, new_blocks, new_total, max_tot_blks; uint4 jnl_status, to_wait, to_msg, wait_period; GTM_BAVAIL_TYPE avail_blocks; sgmnt_data_ptr_t tmp_csd; off_t new_eof; trans_num curr_tn; unix_db_info *udi; sigset_t savemask; inctn_opcode_t save_inctn_opcode; int4 prev_extend_blks_to_upgrd; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; error_def(ERR_DBFILERR); error_def(ERR_DBFILEXT); error_def(ERR_DSKSPACEFLOW); error_def(ERR_JNLFLUSH); error_def(ERR_TEXT); error_def(ERR_TOTALBLKMAX); error_def(ERR_WAITDSKSPACE); #ifdef __hppa if (dba_mm == cs_addrs->hdr->acc_meth) return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not allowed ? */ #endif /* Both blocks and total blocks are unsigned ints so make sure we aren't asking for huge numbers that will overflow and end up doing silly things. */ assert(blocks <= (MAXTOTALBLKS(cs_data) - cs_data->trans_hist.total_blks)); if (!blocks) return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not enabled ? */ bplmap = cs_data->bplmap; /* new total of non-bitmap blocks will be number of current, non-bitmap blocks, plus new blocks desired There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this manner as every non-bitmap block must have an associated bitmap) Current number of bitmaps is (total number of current blocks + bplmap - 1) / bplmap. Subtract current number of bitmaps from number needed for expanded database to get number of new bitmaps needed. */ new_bit_maps = DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks - DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap) + blocks, bplmap - 1) - DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap); new_blocks = blocks + new_bit_maps; assert(0 < (int)new_blocks); udi = FILE_INFO(gv_cur_region); if (0 != (save_errno = disk_block_available(udi->fd, &avail_blocks, FALSE))) { send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno); rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno); } else { avail_blocks = avail_blocks / (cs_data->blk_size / DISK_BLOCK_SIZE); if ((blocks * EXTEND_WARNING_FACTOR) > avail_blocks) { send_msg(VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, DB_LEN_STR(gv_cur_region), (uint4)(avail_blocks - ((new_blocks <= avail_blocks) ? new_blocks : 0))); #ifndef __MVS__ if (blocks > (uint4)avail_blocks) return (uint4)(NO_FREE_SPACE); #endif } } cs_addrs->extending = TRUE; was_crit = cs_addrs->now_crit; /* If we are coming from mupip_extend (which gets crit itself) we better have waited for any unfreezes to occur */ assert(!was_crit || CDB_STAGNATE == t_tries || FALSE == cs_data->freeze); for ( ; ; ) { /* If we are in the final retry and already hold crit, it is possible that csd->wc_blocked is also set to TRUE * (by a concurrent process in phase2 which encountered an error in the midst of commit and secshr_db_clnup * finished the job for it). In this case we do NOT want to invoke wcs_recover as that will update the "bt" * transaction numbers without correspondingly updating the history transaction numbers (effectively causing * a cdb_sc_blkmod type of restart). Therefore do NOT call grab_crit (which unconditionally invokes wcs_recover) * if we already hold crit. */ if (!was_crit) grab_crit(gv_cur_region); if (FALSE == cs_data->freeze) break; rel_crit(gv_cur_region); if (was_crit) { /* Two cases. * (i) Final retry and in TP. We might be holding crit in other regions too. * We can't do a grab_crit() on this region again unless it is deadlock-safe. * To be on the safer side, we do a restart. The tp_restart() logic will wait * for this region's freeze to be removed before grabbing crit. * (ii) Final retry and not in TP. In that case too, it is better to restart in case there is * some validation code that shortcuts the checking for the final retry assuming we were * in crit from t_begin() to t_end(). t_retry() has logic that will wait for unfreeze. * In either case, we need to restart. Returning EXTEND_UNFREEZECRIT will cause one in t_end/tp_tend. */ return (uint4)(EXTEND_UNFREEZECRIT); } while (cs_data->freeze) hiber_start(1000); } assert(cs_addrs->ti->total_blks == cs_data->trans_hist.total_blks); if (cs_data->trans_hist.total_blks != filesize) { /* somebody else has already extended it, since we are in crit, this is trust-worthy * however, in case of MM, we still need to remap the database */ assert(cs_data->trans_hist.total_blks > filesize); GDSFILEXT_CLNUP; return (SS_NORMAL); } if (run_time && (2 * ((0 < dollar_tlevel) ? sgm_info_ptr->cw_set_depth : cw_set_depth) < cs_addrs->ti->free_blocks)) { if (FALSE == was_crit) { rel_crit(gv_cur_region); return (uint4)(EXTEND_SUSPECT); } /* If free_blocks counter is not ok, then correct it. Do the check again. If still fails, then GTMASSERT. */ if (is_free_blks_ctr_ok() || (2 * ((0 < dollar_tlevel) ? sgm_info_ptr->cw_set_depth : cw_set_depth) < cs_addrs->ti->free_blocks)) GTMASSERT; /* held crit through bm_getfree into gdsfilext and still didn't get it right */ } if (JNL_ENABLED(cs_data)) { if (!jgbl.dont_reset_gbl_jrec_time) SET_GBL_JREC_TIME; /* needed before jnl_ensure_open as that can write jnl records */ jpc = cs_addrs->jnl; jbp = jpc->jnl_buff; /* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write * journal records (if it decides to switch to a new journal file). */ ADJUST_GBL_JREC_TIME(jgbl, jbp); jnl_status = jnl_ensure_open(); if (jnl_status) { GDSFILEXT_CLNUP; send_msg(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region)); return (uint4)(NO_FREE_SPACE); /* should have better return status */ } } if (dba_mm == cs_addrs->hdr->acc_meth) { #if defined(UNTARGETED_MSYNC) status = msync((caddr_t)cs_addrs->db_addrs[0], (size_t)(cs_addrs->db_addrs[1] - cs_addrs->db_addrs[0]), MS_SYNC); #else cs_addrs->nl->mm_extender_pid = process_id; status = wcs_wtstart(gv_cur_region, 0); cs_addrs->nl->mm_extender_pid = 0; if (0 != cs_addrs->acc_meth.mm.mmblk_state->mmblkq_active.fl) GTMASSERT; status = 0; #endif if (0 == status) { /* Block SIGALRM for the duration when cs_data and cs_addrs are out of sync */ sigprocmask(SIG_BLOCK, &blockalrm, &savemask); need_to_restore_mask = TRUE; tmp_csd = cs_data; cs_data = (sgmnt_data_ptr_t)malloc(sizeof(*cs_data)); memcpy((sm_uc_ptr_t)cs_data, (uchar_ptr_t)tmp_csd, sizeof(*cs_data)); status = munmap((caddr_t)cs_addrs->db_addrs[0], (size_t)(cs_addrs->db_addrs[1] - cs_addrs->db_addrs[0])); #ifdef DEBUG_DB64 if (-1 != status) rel_mmseg((caddr_t)cs_addrs->db_addrs[0]); #endif } else tmp_csd = NULL; if (0 != status) { if (tmp_csd) { free(cs_data); cs_data = tmp_csd; } GDSFILEXT_CLNUP; send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), status); return (uint4)(NO_FREE_SPACE); } cs_addrs->hdr = cs_data; cs_addrs->ti = &cs_data->trans_hist; } if (new_blocks + cs_data->trans_hist.total_blks > MAXTOTALBLKS(cs_data)) { GDSFILEXT_CLNUP; send_msg(VARLSTCNT(1) ERR_TOTALBLKMAX); return (uint4)(NO_FREE_SPACE); } CHECK_TN(cs_addrs, cs_data, cs_data->trans_hist.curr_tn); /* can issue rts_error TNTOOLARGE */ assert(0 < (int)new_blocks); new_total = cs_data->trans_hist.total_blks + new_blocks; new_eof = ((off_t)(cs_data->start_vbn - 1) * DISK_BLOCK_SIZE) + ((off_t)new_total * cs_data->blk_size); buff = (char *)malloc(DISK_BLOCK_SIZE); memset(buff, 0, DISK_BLOCK_SIZE); LSEEKWRITE(udi->fd, new_eof, buff, DISK_BLOCK_SIZE, save_errno); if ((ENOSPC == save_errno) && run_time) { /* try to write it every second, and send message to operator * log every 1/20 of cs_data->wait_disk_space */ wait_period = to_wait = DIVIDE_ROUND_UP(cs_data->wait_disk_space, CDB_STAGNATE + 1); to_msg = (to_wait / 8) ? (to_wait / 8) : 1; /* send around 8 messages during 1 wait_period */ while ((to_wait > 0) && (ENOSPC == save_errno)) { if ((to_wait == cs_data->wait_disk_space) || (to_wait % to_msg == 0)) { send_msg(VARLSTCNT(11) ERR_WAITDSKSPACE, 4, process_id, to_wait + (CDB_STAGNATE - t_tries) * wait_period, DB_LEN_STR(gv_cur_region), ERR_TEXT, 2, RTS_ERROR_TEXT("Please make more disk space available or shutdown GT.M to avoid data loss"), save_errno); gtm_putmsg(VARLSTCNT(11) ERR_WAITDSKSPACE, 4, process_id, to_wait + (CDB_STAGNATE - t_tries) * wait_period, DB_LEN_STR(gv_cur_region), ERR_TEXT, 2, RTS_ERROR_TEXT("Please make more disk space available or shutdown GT.M to avoid data loss"), save_errno); } if (!was_crit) rel_crit(gv_cur_region); hiber_start(1000); to_wait--; if (!was_crit) grab_crit(gv_cur_region); LSEEKWRITE(udi->fd, new_eof, buff, DISK_BLOCK_SIZE, save_errno); } } free(buff); if (0 != save_errno) { GDSFILEXT_CLNUP; if (ENOSPC == save_errno) return (uint4)(NO_FREE_SPACE); send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno); return (uint4)(NO_FREE_SPACE); } DEBUG_ONLY(prev_extend_blks_to_upgrd = cs_data->blks_to_upgrd;)
unsigned char mu_cre_file(void) { char *cc = NULL, path[MAX_FBUFF + 1], errbuff[512]; unsigned char buff[DISK_BLOCK_SIZE]; int fd = -1, i, lower, upper, status, padded_len, padded_vbn, norm_vbn; uint4 raw_dev_size; /* size of a raw device, in bytes */ int4 blocks_for_create, blocks_for_extension, save_errno; GTM_BAVAIL_TYPE avail_blocks; file_control fc; mstr file; parse_blk pblk; unix_db_info udi_struct, *udi; char *fgets_res; gd_segment *seg; error_def(ERR_NOSPACECRE); error_def(ERR_LOWSPACECRE); assert((-(sizeof(uint4) * 2) & sizeof(sgmnt_data)) == sizeof(sgmnt_data)); cs_addrs = &udi_struct.s_addrs; cs_data = (sgmnt_data_ptr_t)NULL; /* for CLEANUP */ memset(&pblk, 0, sizeof(pblk)); pblk.fop = (F_SYNTAXO | F_PARNODE); pblk.buffer = path; pblk.buff_size = MAX_FBUFF; file.addr = (char*)gv_cur_region->dyn.addr->fname; file.len = gv_cur_region->dyn.addr->fname_len; strncpy(path,file.addr,file.len); *(path+file.len) = '\0'; if (is_raw_dev(path)) { /* do not use a default extension for raw device files */ pblk.def1_buf = DEF_NODBEXT; pblk.def1_size = sizeof(DEF_NODBEXT) - 1; } else { pblk.def1_buf = DEF_DBEXT; pblk.def1_size = sizeof(DEF_DBEXT) - 1; } if (1 != (parse_file(&file, &pblk) & 1)) { PRINTF("Error translating filename %s.\n", gv_cur_region->dyn.addr->fname); return EXIT_ERR; } path[pblk.b_esl] = 0; if (pblk.fnb & F_HAS_NODE) { /* Remote node specification given */ assert(pblk.b_node); PRINTF("Database file for region %s not created; cannot create across network.\n", path); return EXIT_WRN; } udi = &udi_struct; udi->raw = is_raw_dev(pblk.l_dir); if (udi->raw) { fd = OPEN(pblk.l_dir,O_EXCL | O_RDWR); if (-1 == fd) { SPRINTF_AND_PERROR("Error opening file %s\n"); return EXIT_ERR; } if (-1 != (status = (ssize_t)lseek(fd, 0, SEEK_SET))) { DOREADRC(fd, buff, sizeof(buff), status); } else status = errno; if (0 != status) { SPRINTF_AND_PERROR("Error reading header for file %s\n"); return EXIT_ERR; } if (!memcmp(buff, GDS_LABEL, STR_LIT_LEN(GDS_LABEL))) { char rsp[80]; PRINTF("Database already exists on device %s\n", path); PRINTF("Do you wish to re-initialize (all current data will be lost) [y/n] ? "); FGETS(rsp, 79, stdin, fgets_res); if ('y' != *rsp) return EXIT_NRM; } PRINTF("Determining size of raw device...\n"); for(i = 1; read(fd, buff, sizeof(buff)) == sizeof(buff);) { i *= 2; lseek(fd, (off_t)i * BUFSIZ, SEEK_SET); } lower = i / 2; upper = i; while ((lower + upper) / 2 != lower) { i = (lower + upper) / 2; lseek(fd, (off_t)i * BUFSIZ, SEEK_SET); if (read(fd, buff, sizeof(buff)) == sizeof(buff)) lower = i; else upper = i; } raw_dev_size = i * BUFSIZ; } else { fd = OPEN3(pblk.l_dir, O_CREAT | O_EXCL | O_RDWR, 0600); if (-1 == fd) { SPRINTF_AND_PERROR("Error opening file %s\n"); return EXIT_ERR; } if (0 != (save_errno = disk_block_available(fd, &avail_blocks, FALSE))) { errno = save_errno; SPRINTF_AND_PERROR("Error checking available disk space for %s\n"); CLEANUP(EXIT_ERR); return EXIT_ERR; } seg = gv_cur_region->dyn.addr; /* blocks_for_create is in the unit of DISK_BLOCK_SIZE */ blocks_for_create = DIVIDE_ROUND_UP(sizeof(sgmnt_data), DISK_BLOCK_SIZE) + 1 + (seg->blk_size / DISK_BLOCK_SIZE * ((DIVIDE_ROUND_UP(seg->allocation, BLKS_PER_LMAP - 1)) + seg->allocation)); if ((uint4)avail_blocks < blocks_for_create) { gtm_putmsg(VARLSTCNT(6) ERR_NOSPACECRE, 4, LEN_AND_STR(path), blocks_for_create, (uint4)avail_blocks); send_msg(VARLSTCNT(6) ERR_NOSPACECRE, 4, LEN_AND_STR(path), blocks_for_create, (uint4)avail_blocks); CLEANUP(EXIT_ERR); return EXIT_ERR; } blocks_for_extension = (seg->blk_size / DISK_BLOCK_SIZE * ((DIVIDE_ROUND_UP(EXTEND_WARNING_FACTOR * seg->ext_blk_count, BLKS_PER_LMAP - 1)) + EXTEND_WARNING_FACTOR * seg->ext_blk_count)); if ((uint4)(avail_blocks - blocks_for_create) < blocks_for_extension) { gtm_putmsg(VARLSTCNT(8) ERR_LOWSPACECRE, 6, LEN_AND_STR(path), EXTEND_WARNING_FACTOR, blocks_for_extension, DISK_BLOCK_SIZE, (uint4)(avail_blocks - blocks_for_create)); send_msg(VARLSTCNT(8) ERR_LOWSPACECRE, 6, LEN_AND_STR(path), EXTEND_WARNING_FACTOR, blocks_for_extension, DISK_BLOCK_SIZE, (uint4)(avail_blocks - blocks_for_create)); } } gv_cur_region->dyn.addr->file_cntl = &fc; fc.file_info = (void*)&udi_struct; udi->fd = fd; cs_data = (sgmnt_data_ptr_t)malloc(sizeof(sgmnt_data)); memset(cs_data, 0, sizeof(*cs_data)); cs_data->createinprogress = TRUE; cs_data->semid = INVALID_SEMID; cs_data->shmid = INVALID_SHMID; /* We want our datablocks to start on what would be a block boundary within the file so pad the fileheader if necessary to make this happen. */ padded_len = ROUND_UP(sizeof(sgmnt_data), BLK_SIZE); padded_vbn = DIVIDE_ROUND_UP(padded_len, DISK_BLOCK_SIZE) + 1; norm_vbn = DIVIDE_ROUND_UP(sizeof(sgmnt_data), DISK_BLOCK_SIZE) + 1; cs_data->start_vbn = padded_vbn; cs_data->free_space += (padded_vbn - norm_vbn) * DISK_BLOCK_SIZE; cs_data->acc_meth = gv_cur_region->dyn.addr->acc_meth; if (udi->raw) { /* calculate total blocks, reduce to make room for the * database header (size rounded up to a block), then * make into a multiple of BLKS_PER_LMAP to have a complete bitmap * for each set of blocks. */ cs_data->trans_hist.total_blks = raw_dev_size - ROUND_UP(sizeof(sgmnt_data), DISK_BLOCK_SIZE); cs_data->trans_hist.total_blks /= (uint4)(((gd_segment *)gv_cur_region->dyn.addr)->blk_size); if (0 == (cs_data->trans_hist.total_blks - DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, BLKS_PER_LMAP - 1) % (BLKS_PER_LMAP - 1))) cs_data->trans_hist.total_blks -= 1; /* don't create a bitmap with no data blocks */ cs_data->extension_size = 0; PRINTF("Raw device size is %dK, %d GDS blocks\n", raw_dev_size / 1000, cs_data->trans_hist.total_blks); } else { cs_data->trans_hist.total_blks = gv_cur_region->dyn.addr->allocation; /* There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks * and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this * manner as every non-bitmap block must have an associated bitmap) */ cs_data->trans_hist.total_blks += DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, BLKS_PER_LMAP - 1); cs_data->extension_size = gv_cur_region->dyn.addr->ext_blk_count; } mucregini(cs_data->trans_hist.total_blks); cs_data->createinprogress = FALSE; LSEEKWRITE(udi->fd, 0, cs_data, sizeof(sgmnt_data), status); if (0 != status) { SPRINTF_AND_PERROR("Error writing out header for file %s\n"); CLEANUP(EXIT_ERR); return EXIT_ERR; } cc = (char*)malloc(DISK_BLOCK_SIZE); memset(cc, 0, DISK_BLOCK_SIZE); LSEEKWRITE(udi->fd, (cs_data->start_vbn - 1) * DISK_BLOCK_SIZE + ((off_t)(cs_data->trans_hist.total_blks) * cs_data->blk_size), cc, DISK_BLOCK_SIZE, status); if (0 != status) { SPRINTF_AND_PERROR("Error writing out end of file %s\n"); CLEANUP(EXIT_ERR); return EXIT_ERR; } if ((!udi->raw) && (-1 == CHMOD(pblk.l_dir, 0666))) { SPRINTF_AND_PERROR("Error changing file mode on file %s\n"); CLEANUP(EXIT_WRN); return EXIT_WRN; } CLEANUP(EXIT_NRM); PRINTF("Created file %s\n", path); return EXIT_NRM; }