Ejemplo n.º 1
0
int jnl_file_extend(jnl_private_control *jpc, uint4 total_jnl_rec_size)
{
	file_control		*fc;
	boolean_t		need_extend;
	jnl_buffer_ptr_t     	jb;
	jnl_create_info 	jnl_info;
	jnl_file_header		header;
	uint4			new_alq;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	char			prev_jnl_fn[JNL_NAME_SIZE];
	uint4			jnl_status = 0, status;
	int			new_blocks, result;
	GTM_BAVAIL_TYPE		avail_blocks;
	uint4			aligned_tot_jrec_size, count;

	switch(jpc->region->dyn.addr->acc_meth)
	{
	case dba_mm:
	case dba_bg:
		csa = &FILE_INFO(jpc->region)->s_addrs;
		break;
	default:
		GTMASSERT;
	}
	csd = csa->hdr;
	assert(csa == cs_addrs && csd == cs_data);
	assert(csa->now_crit || (csd->clustered && (CCST_CLOSED == csa->nl->ccp_state)));
	assert(jpc->region == gv_cur_region);
	assert(csa->jnl_state == csd->jnl_state);
	if (!JNL_ENABLED(csa) || (NOJNL == jpc->channel) || (JNL_FILE_SWITCHED(jpc)))
		GTMASSERT;	/* crit and messing with the journal file - how could it have vanished? */
	if (!csd->jnl_deq)
	{
		assert(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE) <= csd->jnl_alq);
		assert(csd->jnl_alq == csd->autoswitchlimit);
		new_blocks = csd->jnl_alq;
	} else
		/* May cause extension of  csd->jnl_deq * n blocks where n > 0 */
		new_blocks = ROUND_UP(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE), csd->jnl_deq);
	jpc->status = SS_NORMAL;
	jb = jpc->jnl_buff;
	assert(0 <= new_blocks);
	DEBUG_ONLY(count = 0);
	for (need_extend = (0 != new_blocks); need_extend; )
	{
		DEBUG_ONLY(count++);
		/* usually we will do the loop just once where we do the file extension.
		 * rarely we might need to do an autoswitch instead after which again rarely
		 * 	we might need to do an extension on the new journal to fit in the transaction's	journal requirements.
		 * therefore we should do this loop a maximum of twice. hence the assert below.
		 */
		assert(count <= 2);
		need_extend = FALSE;
		if (SS_NORMAL == (status = disk_block_available(jpc->channel, &avail_blocks, TRUE)))
		{
			if ((new_blocks * EXTEND_WARNING_FACTOR) > avail_blocks)
			{
				if (new_blocks > avail_blocks)
				{	/* if we cannot satisfy the request, it is an error */
					send_msg(VARLSTCNT(6) ERR_NOSPACEEXT, 4, JNL_LEN_STR(csd),
						new_blocks, avail_blocks);
					new_blocks = 0;
					jpc->status = SS_NORMAL;
					break;
				} else
					send_msg(VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, JNL_LEN_STR(csd),
						(avail_blocks - new_blocks));
			}
		} else
			send_msg(VARLSTCNT(5) ERR_JNLFILEXTERR, 2, JNL_LEN_STR(csd), status);
		new_alq = jb->filesize + new_blocks;
		/* ensure current journal file size is well within autoswitchlimit --> design constraint */
		assert(csd->autoswitchlimit >= jb->filesize);
		if (csd->autoswitchlimit < (jb->filesize + (EXTEND_WARNING_FACTOR * new_blocks)))	/* close to max */
			send_msg(VARLSTCNT(5) ERR_JNLSPACELOW, 3, JNL_LEN_STR(csd), csd->autoswitchlimit - jb->filesize);
		if (csd->autoswitchlimit < new_alq)
		{	/* Reached max, need to autoswitch */
			/* Ensure new journal file can hold the entire current transaction's journal record requirements */
			assert(csd->autoswitchlimit >= MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size));
			memset(&jnl_info, 0, sizeof(jnl_info));
			jnl_info.prev_jnl = &prev_jnl_fn[0];
			set_jnl_info(gv_cur_region, &jnl_info);
			assert(JNL_ENABLED(csa) && (NOJNL != jpc->channel) && !(JNL_FILE_SWITCHED(jpc)));
			jnl_status = jnl_ensure_open();
			if (0 == jnl_status)
			{	/* flush the cache and jnl-buffer-contents to current journal file before
				 * switching to a new journal. */
				wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH);
				jnl_file_close(gv_cur_region, TRUE, TRUE);
			} else
				rts_error(VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region), jpc->status);
			assert(!jgbl.forw_phase_recovery || (NULL != jgbl.mur_pini_addr_reset_fnptr));
			if (jgbl.forw_phase_recovery && (NULL != jgbl.mur_pini_addr_reset_fnptr))
				(*jgbl.mur_pini_addr_reset_fnptr)();
			assert(!jnl_info.no_rename);
			assert(!jnl_info.no_prev_link);
			if (EXIT_NRM == cre_jnl_file(&jnl_info))
			{
				assert(0 == memcmp(csd->jnl_file_name, jnl_info.jnl, jnl_info.jnl_len));
				assert(csd->jnl_file_name[jnl_info.jnl_len] == '\0');
				assert(csd->jnl_file_len == jnl_info.jnl_len);
				assert(csd->jnl_buffer_size == jnl_info.buffer);
				assert(csd->jnl_alq == jnl_info.alloc);
				assert(csd->jnl_deq == jnl_info.extend);
				assert(csd->jnl_before_image == jnl_info.before_images);
				csd->trans_hist.header_open_tn = jnl_info.tn;	/* needed for successful jnl_file_open() */
				send_msg(VARLSTCNT(4) ERR_NEWJNLFILECREATE, 2, JNL_LEN_STR(csd));
				fc = gv_cur_region->dyn.addr->file_cntl;
				fc->op = FC_WRITE;
				fc->op_buff = (sm_uc_ptr_t)csd;
				status = dbfilop(fc);
				if (SS_NORMAL != status)
					send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), status);
				assert(JNL_ENABLED(csa));
				/* call jnl_ensure_open instead of jnl_file_open to make sure jpc->pini_addr is set to 0 */
				jnl_status = jnl_ensure_open();	/* sets jpc->status */
				if (0 != jnl_status)
					rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region));
				assert(jb->filesize == csd->jnl_alq);
				aligned_tot_jrec_size = ALIGNED_ROUND_UP(MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size),
										csd->jnl_alq, csd->jnl_deq);
				if (aligned_tot_jrec_size > csd->jnl_alq)
				{	/* need to extend more than initial allocation in the new journal file
					 * to accommodate the current transaction.
					 */
					new_blocks = aligned_tot_jrec_size - csd->jnl_alq;
					assert(new_blocks);
					assert(0 == new_blocks % csd->jnl_deq);
					need_extend = TRUE;
				}
			} else
			{
				send_msg(VARLSTCNT(4) ERR_JNLCREATERR, 2, JNL_LEN_STR(csd));
				jpc->status = ERR_JNLNOCREATE;
				new_blocks = -1;
			}
		} else
		{
			assert(!need_extend);	/* ensure we won't go through the for loop again */
			/* Virtually extend currently used journal file */
			jb->filesize = new_alq;	/* Actually this is virtual file size blocks */
			DO_FILE_READ(jpc->channel, 0, &header, JNL_HDR_LEN, jpc->status, jpc->status2);
			if (SS_NORMAL != jpc->status)
				rts_error(VARLSTCNT(5) ERR_JNLRDERR, 2, JNL_LEN_STR(csd), jpc->status);
			assert((header.virtual_size + new_blocks) == new_alq);
			header.virtual_size = new_alq;
			DO_FILE_WRITE(jpc->channel, 0, &header, JNL_HDR_LEN, jpc->status, jpc->status2);
			if (SS_NORMAL != jpc->status)
				rts_error(VARLSTCNT(5) ERR_JNLWRERR, 2, JNL_LEN_STR(csd), jpc->status);
		}
		if (0 >= new_blocks)
			break;
	}
	if (0 >= new_blocks)
	{
		jpc->status = ERR_JNLREADEOF;
		jnl_file_lost(jpc, ERR_JNLEXTEND);
       		new_blocks = -1;
	}
	return new_blocks;
}
Ejemplo n.º 2
0
uint4	 gdsfilext(uint4 blocks, uint4 filesize, boolean_t trans_in_prog)
{
	sm_uc_ptr_t		old_base[2], mmap_retaddr;
	boolean_t		was_crit, is_mm;
	int			result, save_errno, status;
	DEBUG_ONLY(int		first_save_errno);
	uint4			new_bit_maps, bplmap, map, new_blocks, new_total, max_tot_blks, old_total;
	uint4			jnl_status;
	gtm_uint64_t		avail_blocks, mmap_sz;
	off_t			new_eof, new_size;
	trans_num		curr_tn;
	unix_db_info		*udi;
	inctn_opcode_t		save_inctn_opcode;
	int4			prev_extend_blks_to_upgrd;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;
	cache_rec_ptr_t         cr;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	assert(!IS_DSE_IMAGE);
	assert((cs_addrs->nl == NULL) || (process_id != cs_addrs->nl->trunc_pid)); /* mu_truncate shouldn't extend file... */
	assert(!process_exiting);
	DEBUG_ONLY(old_base[0] = old_base[1] = NULL);
	assert(!gv_cur_region->read_only);
	udi = FILE_INFO(gv_cur_region);
	is_mm = (dba_mm == cs_addrs->hdr->acc_meth);
#	if !defined(MM_FILE_EXT_OK)
	if (!udi->grabbed_access_sem && is_mm)
		return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not allowed ? */
#	endif
	/* Both blocks and total blocks are unsigned ints so make sure we aren't asking for huge numbers that will
	   overflow and end up doing silly things.
	*/
	assert((blocks <= (MAXTOTALBLKS(cs_data) - cs_data->trans_hist.total_blks)) || WBTEST_ENABLED(WBTEST_FILE_EXTEND_ERROR));
#	if defined(__sun) || defined(__hpux)
	cs_data->defer_allocate = TRUE;
#	endif
	if (!blocks && (cs_data->defer_allocate || (TRANS_IN_PROG_TRUE == trans_in_prog)))
		return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not enabled ? */
	bplmap = cs_data->bplmap;
	/* New total of non-bitmap blocks will be number of current, non-bitmap blocks, plus new blocks desired
	 * There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks
	 *      and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this
	 *      manner as every non-bitmap block must have an associated bitmap)
	 * Current number of bitmaps is (total number of current blocks + bplmap - 1) / bplmap.
	 * Subtract current number of bitmaps from number needed for expanded database to get number of new bitmaps needed.
	 */
	new_bit_maps = DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks
			- DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap) + blocks, bplmap - 1)
			- DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap);
	new_blocks = blocks + new_bit_maps;
	assert((0 < (int)new_blocks) || (!cs_data->defer_allocate && (0 == new_blocks)));
	if (new_blocks + cs_data->trans_hist.total_blks > MAXTOTALBLKS(cs_data))
	{
		assert(WBTEST_ENABLED(WBTEST_FILE_EXTEND_ERROR));
		send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(1) ERR_TOTALBLKMAX);
		return (uint4)(NO_FREE_SPACE);
	}
	if (0 != (save_errno = disk_block_available(udi->fd, &avail_blocks, FALSE)))
	{
		send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno);
		rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno);
	} else
	{
		if (!(gtmDebugLevel & GDL_IgnoreAvailSpace))
		{	/* Bypass this space check if debug flag above is on. Allows us to create a large sparce DB
			 * in space it could never fit it if wasn't sparse. Needed for some tests.
			 */
			avail_blocks = avail_blocks / (cs_data->blk_size / DISK_BLOCK_SIZE);
			if ((blocks * EXTEND_WARNING_FACTOR) > avail_blocks)
			{
				if (blocks > (uint4)avail_blocks)
				{
					if (!INST_FREEZE_ON_NOSPC_ENABLED(cs_addrs))
						return (uint4)(NO_FREE_SPACE);
					else
						send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(6) MAKE_MSG_WARNING(ERR_NOSPACEEXT), 4,
							DB_LEN_STR(gv_cur_region), new_blocks, (uint4)avail_blocks);
				} else
					send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, DB_LEN_STR(gv_cur_region),
						 (uint4)(avail_blocks - ((new_blocks <= avail_blocks) ? new_blocks : 0)));
			}
		}
	}
#	ifdef DEBUG
	if (WBTEST_ENABLED(WBTEST_MM_CONCURRENT_FILE_EXTEND) && dollar_tlevel && !MEMCMP_LIT(gv_cur_region->rname, "DEFAULT"))
	{
		SYSTEM("$gtm_dist/mumps -run $gtm_wbox_mrtn");
		assert(1 == cs_addrs->nl->wbox_test_seq_num);	/* should have been set by mubfilcpy */
		cs_addrs->nl->wbox_test_seq_num = 2;	/* signal mupip backup to stop sleeping in mubfilcpy */
	}
#	endif
	/* From here on, we need to use GDSFILEXT_CLNUP before returning to the caller */
	was_crit = cs_addrs->now_crit;
	assert(!cs_addrs->hold_onto_crit || was_crit);
	/* If we are coming from mupip_extend (which gets crit itself) we better have waited for any unfreezes to occur.
	 * If we are coming from online rollback (when that feature is available), we will come in holding crit and in
	 * 	the final retry. In that case too, we expect to have waited for unfreezes to occur in the caller itself.
	 * Therefore if we are coming in holding crit from MUPIP, we expect the db to be unfrozen so no need to wait for
	 * freeze.
	 * If we are coming from GT.M and final retry (in which case we come in holding crit) we expect to have waited
	 * 	for any unfreezes (by invoking tp_crit_all_regions) to occur (TP or non-TP) before coming into this
	 *	function. However, there is one exception. In the final retry, if tp_crit_all_regions notices that
	 *	at least one of the participating regions did ONLY READs, it will not wait for any freeze on THAT region
	 *	to complete before grabbing crit. Later, in the final retry, if THAT region did an update which caused
	 *	op_tcommit to invoke bm_getfree->gdsfilext, then we would have come here with a frozen region on which
	 *	we hold crit.
	 */
	assert(!was_crit || !FROZEN_HARD(cs_data) || (dollar_tlevel && (CDB_STAGNATE <= t_tries)));
	/*
	 * If we are in the final retry and already hold crit, it is possible that csa->nl->wc_blocked is also set to TRUE
	 * (by a concurrent process in phase2 which encountered an error in the midst of commit and secshr_db_clnup
	 * finished the job for it). In this case we do NOT want to invoke wcs_recover as that will update the "bt"
	 * transaction numbers without correspondingly updating the history transaction numbers (effectively causing
	 * a cdb_sc_blkmod type of restart). Therefore do NOT call grab_crit (which unconditionally invokes wcs_recover)
	 * if we already hold crit.
	 */
	if (!was_crit)
	{
		for ( ; ; )
		{
			grab_crit(gv_cur_region);
			if (FROZEN_CHILLED(cs_data))
				DO_CHILLED_AUTORELEASE(cs_addrs, cs_data);
			if (!FROZEN(cs_data) && !IS_REPL_INST_FROZEN)
				break;
			rel_crit(gv_cur_region);
			while (FROZEN(cs_data) || IS_REPL_INST_FROZEN)
			{
				hiber_start(1000);
				if (FROZEN_CHILLED(cs_data) && CHILLED_AUTORELEASE(cs_data))
					break;
			}
		}
	} else if (FROZEN_HARD(cs_data) && dollar_tlevel)
	{	/* We don't want to continue with file extension as explained above. Hence return with an error code which
		 * op_tcommit will recognize (as a cdb_sc_needcrit/cdb_sc_instancefreeze type of restart) and restart accordingly.
		 */
		assert(CDB_STAGNATE <= t_tries);
		GDSFILEXT_CLNUP;
		return (uint4)FINAL_RETRY_FREEZE_PROG;
	} else
		WAIT_FOR_REGION_TO_UNCHILL(cs_addrs, cs_data);
	if (IS_REPL_INST_FROZEN && trans_in_prog)
	{
		assert(CDB_STAGNATE <= t_tries);
		GDSFILEXT_CLNUP;
		return (uint4)FINAL_RETRY_INST_FREEZE;
	}
	assert(cs_addrs->ti->total_blks == cs_data->trans_hist.total_blks);
	old_total = cs_data->trans_hist.total_blks;
	if (old_total != filesize)
	{	/* Somebody else has already extended it, since we are in crit, this is trust-worthy. However, in case of MM,
		 * we still need to remap the database
		 */
		assert((old_total > filesize) || !is_mm);
		/* For BG, someone else could have truncated or extended - we have no idea */
		GDSFILEXT_CLNUP;
		return (SS_NORMAL);
	}
	if (trans_in_prog && SUSPICIOUS_EXTEND)
	{
		if (!was_crit)
		{
			GDSFILEXT_CLNUP;
			return (uint4)(EXTEND_SUSPECT);
		}
		/* If free_blocks counter is not ok, then correct it. Do the check again. If still fails, then it means we held
		 * crit through bm_getfree into gdsfilext and still didn't get it right.
		 */
		assertpro(!is_free_blks_ctr_ok() && !SUSPICIOUS_EXTEND);
	}
	if (JNL_ENABLED(cs_data))
	{
		if (!jgbl.dont_reset_gbl_jrec_time)
			SET_GBL_JREC_TIME;	/* needed before jnl_ensure_open as that can write jnl records */
		jpc = cs_addrs->jnl;
		jbp = jpc->jnl_buff;
		/* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order
		 * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write
		 * journal records (if it decides to switch to a new journal file).
		 */
		ADJUST_GBL_JREC_TIME(jgbl, jbp);
		jnl_status = jnl_ensure_open(gv_cur_region, cs_addrs);
		if (jnl_status)
		{
			GDSFILEXT_CLNUP;
			send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region));
			return (uint4)(NO_FREE_SPACE);	/* should have better return status */
		}
	}
	if (is_mm)
	{
		cs_addrs->nl->mm_extender_pid = process_id;
		status = wcs_wtstart(gv_cur_region, 0, NULL, NULL);
		cs_addrs->nl->mm_extender_pid = 0;
		assertpro(SS_NORMAL == status);
		old_base[0] = cs_addrs->db_addrs[0];
		old_base[1] = cs_addrs->db_addrs[1];
		cs_addrs->db_addrs[0] = NULL; /* don't rely on it until the mmap below */
#		ifdef _AIX
		status = shmdt(old_base[0] - BLK_ZERO_OFF(cs_data->start_vbn));
#		else
		status = munmap((caddr_t)old_base[0], (size_t)(old_base[1] - old_base[0]));
#		endif
		if (0 != status)
		{
			save_errno = errno;
			GDSFILEXT_CLNUP;
			send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(12) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region),
					ERR_SYSCALL, 5, LEN_AND_STR(MEM_UNMAP_SYSCALL), CALLFROM, save_errno);
			return (uint4)(NO_FREE_SPACE);
		}
	} else
	{	/* Due to concurrency issues, it is possible some process had issued a disk read of the GDS block# corresponding
		 * to "old_total" right after a truncate wrote a GDS-block of zeros on disk (to signal end of the db file).
		 * If so, the global buffer containing this block needs to be invalidated now as part of the extend. If not, it is
		 * possible the EOF block on disk is now going to be overwritten by a properly initialized bitmap block (as part
		 * of the gdsfilext below) while the global buffer continues to have an incorrect copy of that bitmap block and
		 * this in turn would cause XXXX failures due to a bad bitmap block in shared memory. (GTM-7519)
		 */
		cr = db_csh_get((block_id)old_total);
		if ((NULL != cr) && ((cache_rec_ptr_t)CR_NOTVALID != cr))
		{
			assert((0 == cr->dirty) && (0 == cr->bt_index) && !cr->stopped);
			cr->cycle++;
			cr->blk = CR_BLKEMPTY;
		}
	}
	CHECK_TN(cs_addrs, cs_data, cs_data->trans_hist.curr_tn);	/* can issue rts_error TNTOOLARGE */
	new_total = old_total + new_blocks;
	new_eof = BLK_ZERO_OFF(cs_data->start_vbn) + ((off_t)new_total * cs_data->blk_size);
#	if !defined(__sun) && !defined(__hpux)
	if (!cs_data->defer_allocate)
	{
		new_size = new_eof + cs_data->blk_size;
		save_errno = posix_fallocate(udi->fd, 0, new_size);
		DEBUG_ONLY(first_save_errno = save_errno);
		if ((ENOSPC == save_errno) && IS_GTM_IMAGE)
			save_errno = extend_wait_for_fallocate(udi, new_size);
		if (0 != save_errno)
		{
			GDSFILEXT_CLNUP;
			assert(ENOSPC == save_errno);
			if (ENOSPC != save_errno)
				send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_PREALLOCATEFAIL, 2, DB_LEN_STR(gv_cur_region),
					     save_errno);
			return (uint4)(NO_FREE_SPACE);
		}
	}
#	endif
	save_errno = db_write_eof_block(udi, udi->fd, cs_data->blk_size, new_eof, &(TREF(dio_buff)));
	if ((ENOSPC == save_errno) && IS_GTM_IMAGE)
		save_errno = extend_wait_for_write(udi, cs_data->blk_size, new_eof);
	if (0 != save_errno)
	{
		GDSFILEXT_CLNUP;
		if (ENOSPC != save_errno)
			send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno);
		return (uint4)(NO_FREE_SPACE);
	}
	if (WBTEST_ENABLED(WBTEST_FILE_EXTEND_INTERRUPT_1))
	{
		LONG_SLEEP(600);
		assert(FALSE);
	}
	/* Ensure the EOF and metadata get to disk BEFORE any bitmap writes. Otherwise, the file size could no longer reflect
	 * a proper extent and subsequent invocations of gdsfilext could corrupt the database.
	 */
	if (!IS_STATSDB_CSA(cs_addrs))
	{
		GTM_DB_FSYNC(cs_addrs, udi->fd, status);
		assert(0 == status);
		if (0 != status)
		{
			GDSFILEXT_CLNUP;
			send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(8) ERR_DBFILERR, 5,
						RTS_ERROR_LITERAL("fsync1()"), CALLFROM, status);
			return (uint4)(NO_FREE_SPACE);
		}
	}
	if (WBTEST_ENABLED(WBTEST_FILE_EXTEND_INTERRUPT_2))
	{
		LONG_SLEEP(600);
		assert(FALSE); /* Should be killed before that */
	}
	DEBUG_ONLY(prev_extend_blks_to_upgrd = cs_data->blks_to_upgrd;)
Ejemplo n.º 3
0
unsigned char mu_cre_file(void)
{
	unsigned char		*inadr[2], *c, exit_stat;
	enum db_acc_method	temp_acc_meth;
	uint4			lcnt, retadr[2];
	int4			blk_init_size, initial_alq, free_blocks;
	gtm_uint64_t		free_blocks_ll, blocks_for_extension;
	char			buff[GLO_NAME_MAXLEN], fn_buff[MAX_FN_LEN];
	unsigned int		status;
	int			free_space;
	struct FAB		*fcb;
	struct NAM		nam;
	gds_file_id		new_id;
	io_status_block_disk	iosb;
	char			node[16];
	short			len;
	struct {
		short	blen;
		short	code;
		char	*buf;
		short	*len;
		int4	terminator;
	} item = {15, SYI$_NODENAME, &node, &len, 0};
	$DESCRIPTOR(desc, buff);

	exit_stat = EXIT_NRM;
/* The following calculations should duplicate the BT_SIZE macro from GDSBT and the LOCK_BLOCK macro from GDSFHEAD.H,
 * but without using a sgmnt_data which is not yet set up at this point
 */

#ifdef GT_CX_DEF
	/* This section needs serious chnages for the fileheader changes in V5 if it is ever resurrected */
	over_head = DIVIDE_ROUND_UP(SIZEOF_FILE_HDR_DFLT
			+ (WC_MAX_BUFFS + getprime(WC_MAX_BUFFS) + 1) * SIZEOF(bt_rec), DISK_BLOCK_SIZE);
	if (gv_cur_region->dyn.addr->acc_meth == dba_bg)
	{
		free_space = over_head - DIVIDE_ROUND_UP(SIZEOF_FILE_HDR_DFLT
			+ (gv_cur_region->dyn.addr->global_buffers + getprime(gv_cur_region->dyn.addr->global_buffers) + 1)
				* SIZEOF(bt_rec), DISK_BLOCK_SIZE);
		over_head += gv_cur_region->dyn.addr->lock_space ? gv_cur_region->dyn.addr->lock_space
								 : DEF_LOCK_SIZE / OS_PAGELET_SIZE;
	} else if (gv_cur_region->dyn.addr->acc_meth == dba_mm)
	{
		free_space = over_head - DIVIDE_ROUND_UP(SIZEOF_FILE_HDR_DFLT, DISK_BLOCK_SIZE);
		if (gv_cur_region->dyn.addr->lock_space)
		{
			over_head += gv_cur_region->dyn.addr->lock_space;
			free_space += gv_cur_region->dyn.addr->lock_space;
		} else
		{
			over_head += DEF_LOCK_SIZE / OS_PAGELET_SIZE;
			free_space += DEF_LOCK_SIZE / OS_PAGELET_SIZE;
		}
	}
	free_space *= DISK_BLOCK_SIZE;
#else
	assert(START_VBN_CURRENT > DIVIDE_ROUND_UP(SIZEOF_FILE_HDR_DFLT, DISK_BLOCK_SIZE));
	free_space = ((START_VBN_CURRENT - 1) * DISK_BLOCK_SIZE) - SIZEOF_FILE_HDR_DFLT;
#endif
	switch (gv_cur_region->dyn.addr->acc_meth)
	{
		case dba_bg:
		case dba_mm:
			mu_cre_vms_structs(gv_cur_region);
			fcb = ((vms_gds_info *)(gv_cur_region->dyn.addr->file_cntl->file_info))->fab;
			cs_addrs = &((vms_gds_info *)(gv_cur_region->dyn.addr->file_cntl->file_info))->s_addrs;

			fcb->fab$b_shr &= FAB$M_NIL;	/* No access to this file while it is created */
			fcb->fab$l_nam = &nam;
			nam = cc$rms_nam;
			/* There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks
			 * and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this
			 * manner as every non-bitmap block must have an associated bitmap)
			*/
			fcb->fab$l_alq += DIVIDE_ROUND_UP(fcb->fab$l_alq, BLKS_PER_LMAP - 1);	/* Bitmaps */
			blk_init_size = fcb->fab$l_alq;
			fcb->fab$l_alq *= BLK_SIZE / DISK_BLOCK_SIZE;
			fcb->fab$l_alq += START_VBN_CURRENT - 1;
			initial_alq = fcb->fab$l_alq;
			fcb->fab$w_mrs = 512;				/* no longer a relevent field to us */
			break;
		case dba_usr:
			util_out_print("Database file for region !AD not created; access method is not GDS.", TRUE,
				REG_LEN_STR(gv_cur_region));
			return EXIT_WRN;
		default:
			gtm_putmsg(VARLSTCNT(1) ERR_BADACCMTHD);
			return EXIT_ERR;
	}
	nam.nam$b_ess = SIZEOF(fn_buff);
	nam.nam$l_esa = fn_buff;
	nam.nam$b_nop |= NAM$M_SYNCHK;
	status = sys$parse(fcb, 0, 0);
	if (RMS$_NORMAL != status)
	{
		gtm_putmsg(VARLSTCNT(8) ERR_DBFILERR, 2, fcb->fab$b_fns, fcb->fab$l_fna, status, 0, fcb->fab$l_stv, 0);
		return EXIT_ERR;
	}
	if (nam.nam$b_node != 0)
	{
		status = sys$getsyiw(EFN$C_ENF, 0, 0, &item, &iosb, 0, 0);
		if (SS$_NORMAL == status)
			status = iosb.cond;
		if (SS$_NORMAL == status)
		{
			if (len == nam.nam$b_node-2 && !memcmp(nam.nam$l_esa, node, len))
			{
				fcb->fab$l_fna = nam.nam$l_esa + nam.nam$b_node;
				fcb->fab$b_fns = nam.nam$b_esl - nam.nam$b_node;
			}
		} else
		{
			util_out_print("Could not get node for !AD.", TRUE, REG_LEN_STR(gv_cur_region));
			exit_stat = EXIT_WRN;
		}
	}
	assert(gv_cur_region->dyn.addr->acc_meth == dba_bg || gv_cur_region->dyn.addr->acc_meth == dba_mm);
	nam.nam$l_esa = NULL;
	nam.nam$b_esl = 0;
	status = sys$create(fcb);
	if (status != RMS$_CREATED && status != RMS$_FILEPURGED)
	{
		switch(status)
		{
			case RMS$_FLK:
		 		util_out_print("Database file for region !AD not created; currently locked by another user.", TRUE,
					REG_LEN_STR(gv_cur_region));
				exit_stat = EXIT_INF;
				break;
			case RMS$_NORMAL:
		 		util_out_print("Database file for region !AD not created; already exists.", TRUE,
					REG_LEN_STR(gv_cur_region));
				exit_stat = EXIT_INF;
				break;
			case RMS$_SUPPORT:
				util_out_print("Database file for region !AD not created; cannot create across network.", TRUE,
					REG_LEN_STR(gv_cur_region));
				exit_stat = EXIT_WRN;
				break;
			case RMS$_FUL:
				send_msg(VARLSTCNT(8) ERR_DBFILERR, 2, fcb->fab$b_fns, fcb->fab$l_fna,
					status, 0, fcb->fab$l_stv, 0);
				/* intentionally falling through */
			default:
				gtm_putmsg(VARLSTCNT(8) ERR_DBFILERR, 2, fcb->fab$b_fns, fcb->fab$l_fna,
					status, 0, fcb->fab$l_stv, 0);
				exit_stat = EXIT_ERR;
		}
		sys$dassgn(fcb->fab$l_stv);
		return exit_stat;
	}

	memcpy(new_id.dvi, nam.nam$t_dvi, SIZEOF(nam.nam$t_dvi));
	memcpy(new_id.did, nam.nam$w_did, SIZEOF(nam.nam$w_did));
	memcpy(new_id.fid, nam.nam$w_fid, SIZEOF(nam.nam$w_fid));
	global_name("GT$S", &new_id, buff);		/* 2nd parm is actually a gds_file_id * in global_name */
	desc.dsc$w_length = buff[0];			/* By definition, a gds_file_id is dvi,fid,did from nam */
	desc.dsc$a_pointer = &buff[1];
	cs_addrs->db_addrs[0] = cs_addrs->db_addrs[1] = inadr[0] = inadr[1] = inadr;	/* used to determine p0 or p1 allocation */
	status = init_sec(cs_addrs->db_addrs, &desc, fcb->fab$l_stv, (START_VBN_CURRENT - 1),
			  SEC$M_DZRO|SEC$M_GBL|SEC$M_WRT|SEC$M_EXPREG);
	if ((SS$_CREATED != status) && (SS$_NORMAL != status))
	{
		gtm_putmsg(VARLSTCNT(8) ERR_DBFILERR, 2, fcb->fab$b_fns, fcb->fab$l_fna, status, 0, fcb->fab$l_stv, 0);
		sys$dassgn(fcb->fab$l_stv);
		return EXIT_ERR;
	}
	cs_data = (sgmnt_data *)cs_addrs->db_addrs[0];
	memset(cs_data, 0, SIZEOF_FILE_HDR_DFLT);
	cs_data->createinprogress = TRUE;
	cs_data->trans_hist.total_blks = (initial_alq - (START_VBN_CURRENT - 1)) / (BLK_SIZE / DISK_BLOCK_SIZE);
	/* assert that total_blks stored in file-header = non-bitmap blocks (initial allocation) + bitmap blocks */
	assert(cs_data->trans_hist.total_blks == gv_cur_region->dyn.addr->allocation +
				DIVIDE_ROUND_UP(gv_cur_region->dyn.addr->allocation, BLKS_PER_LMAP - 1));
	cs_data->start_vbn = START_VBN_CURRENT;
	temp_acc_meth = gv_cur_region->dyn.addr->acc_meth;
	cs_data->acc_meth = gv_cur_region->dyn.addr->acc_meth = dba_bg;
	cs_data->extension_size = gv_cur_region->dyn.addr->ext_blk_count;
	mucregini(blk_init_size);
	cs_addrs->hdr->free_space = free_space;
#ifndef GT_CX_DEF
	cs_addrs->hdr->unbacked_cache = TRUE;
#endif
	cs_data->acc_meth = gv_cur_region->dyn.addr->acc_meth = temp_acc_meth;
	cs_data->createinprogress = FALSE;
	if (SS$_NORMAL == (status = disk_block_available(fcb->fab$l_stv, &free_blocks)))
	{
		blocks_for_extension = (cs_data->blk_size / DISK_BLOCK_SIZE *
				  (DIVIDE_ROUND_UP(EXTEND_WARNING_FACTOR * (gtm_uint64_t)cs_data->extension_size, BLKS_PER_LMAP - 1)
					 + EXTEND_WARNING_FACTOR * (gtm_uint64_t)cs_data->extension_size));
		if ((gtm_uint64_t)free_blocks < blocks_for_extension)
		{
			free_blocks_ll = (gtm_uint64_t)free_blocks;
			gtm_putmsg(VARLSTCNT(8) ERR_LOWSPACECRE, 6, fcb->fab$b_fns, fcb->fab$l_fna, EXTEND_WARNING_FACTOR,
					&blocks_for_extension, DISK_BLOCK_SIZE, &free_blocks_ll);
			send_msg(VARLSTCNT(8) ERR_LOWSPACECRE, 6, fcb->fab$b_fns, fcb->fab$l_fna, EXTEND_WARNING_FACTOR,
					&blocks_for_extension, DISK_BLOCK_SIZE, &free_blocks_ll);
		}
	}
	if (SS$_NORMAL == (status = sys$updsec(((vms_gds_info *)(gv_cur_region->dyn.addr->file_cntl->file_info))->s_addrs.db_addrs,
			NULL, PSL$C_USER, 0, efn_immed_wait, &iosb, NULL, 0)))
	{
		status = sys$synch(efn_immed_wait, &iosb);
		if (SS$_NORMAL == status)
			status = iosb.cond;
	} else  if (SS$_NOTMODIFIED == status)
		status = SS$_NORMAL;
	if (SS$_NORMAL == status)
		status = del_sec(SEC$M_GBL, &desc, 0);
	if (SS$_NORMAL == status)
		status = sys$deltva(cs_addrs->db_addrs, retadr, PSL$C_USER);
	if (SS$_NORMAL == status)
		status = sys$dassgn(fcb->fab$l_stv);
	if (SS$_NORMAL == status)
	{
	 	util_out_print("Database file for region !AD created.", TRUE, REG_LEN_STR(gv_cur_region));
		/* the open and close are an attempt to ensure that the file is available, not under the control of an ACP,
		 * before MUPIP exits */
		fcb->fab$b_shr = FAB$M_SHRPUT | FAB$M_SHRGET | FAB$M_UPI;
		fcb->fab$l_fop = 0;
		for (lcnt = 1;  (60 * MAX_OPEN_RETRY) >= lcnt;  lcnt++)
		{	/* per VMS engineering a delay is expected.  We will wait up to an hour as a
			 * Delete Global Section operation is essentially and inherently asynchronous in nature
			 * and could take an arbitrary amount of time.
			 */
			if (RMS$_FLK != (status = sys$open(fcb, NULL, NULL)))
				break;
			wcs_sleep(lcnt);
		}
		assert(RMS$_NORMAL == status);
		if (RMS$_NORMAL == status)
		{
			status = sys$close(fcb);
			assert(RMS$_NORMAL == status);
		}
		if (RMS$_NORMAL != status)
			exit_stat = EXIT_WRN;
	} else
		exit_stat = EXIT_ERR;
	if (RMS$_NORMAL != status)
		gtm_putmsg(VARLSTCNT(8) ERR_DBFILERR, 2, fcb->fab$b_fns, fcb->fab$l_fna, status, 0, fcb->fab$l_stv, 0);
	if ((MAX_RMS_RECORDSIZE - SIZEOF(shmpool_blk_hdr)) < cs_data->blk_size)
		gtm_putmsg(VARLSTCNT(5) ERR_MUNOSTRMBKUP, 3, fcb->fab$b_fns, fcb->fab$l_fna, 32 * 1024 - DISK_BLOCK_SIZE);
	return exit_stat;
}
Ejemplo n.º 4
0
OS_PAGE_SIZE_DECLARE

uint4	 gdsfilext(uint4 blocks, uint4 filesize, boolean_t trans_in_prog)
{
	sm_uc_ptr_t		old_base[2], mmap_retaddr;
	boolean_t		was_crit, is_mm;
	char			buff[DISK_BLOCK_SIZE];
	int			result, save_errno, status;
	uint4			new_bit_maps, bplmap, map, new_blocks, new_total, max_tot_blks, old_total;
	uint4			jnl_status, to_wait, to_msg, wait_period;
	gtm_uint64_t		avail_blocks, mmap_sz;
	off_t			new_eof;
	trans_num		curr_tn;
	unix_db_info		*udi;
	inctn_opcode_t		save_inctn_opcode;
	int4			prev_extend_blks_to_upgrd;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;
	cache_rec_ptr_t         cr;
	DCL_THREADGBL_ACCESS;

	assert(!IS_DSE_IMAGE);
	assert((cs_addrs->nl == NULL) || (process_id != cs_addrs->nl->trunc_pid)); /* mu_truncate shouldn't extend file... */
	assert(!process_exiting);
	DEBUG_ONLY(old_base[0] = old_base[1] = NULL);
	assert(!gv_cur_region->read_only);
	udi = FILE_INFO(gv_cur_region);
	is_mm = (dba_mm == cs_addrs->hdr->acc_meth);
#	if !defined(MM_FILE_EXT_OK)
	if (!udi->grabbed_access_sem && is_mm)
		return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not allowed ? */
#	endif
	/* Both blocks and total blocks are unsigned ints so make sure we aren't asking for huge numbers that will
	   overflow and end up doing silly things.
	*/
	assert((blocks <= (MAXTOTALBLKS(cs_data) - cs_data->trans_hist.total_blks)) || WBTEST_ENABLED(WBTEST_FILE_EXTEND_ERROR));
	if (!blocks)
		return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not enabled ? */
	bplmap = cs_data->bplmap;
	/* New total of non-bitmap blocks will be number of current, non-bitmap blocks, plus new blocks desired
	 * There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks
	 *      and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this
	 *      manner as every non-bitmap block must have an associated bitmap)
	 * Current number of bitmaps is (total number of current blocks + bplmap - 1) / bplmap.
	 * Subtract current number of bitmaps from number needed for expanded database to get number of new bitmaps needed.
	 */
	new_bit_maps = DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks
			- DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap) + blocks, bplmap - 1)
			- DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap);
	new_blocks = blocks + new_bit_maps;
	assert(0 < (int)new_blocks);
	if (new_blocks + cs_data->trans_hist.total_blks > MAXTOTALBLKS(cs_data))
	{
		assert(FALSE);
		send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(1) ERR_TOTALBLKMAX);
		return (uint4)(NO_FREE_SPACE);
	}
	if (0 != (save_errno = disk_block_available(udi->fd, &avail_blocks, FALSE)))
	{
		send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno);
		rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno);
	} else
	{
		if (!(gtmDebugLevel & GDL_IgnoreAvailSpace))
		{	/* Bypass this space check if debug flag above is on. Allows us to create a large sparce DB
			 * in space it could never fit it if wasn't sparse. Needed for some tests.
			 */
			avail_blocks = avail_blocks / (cs_data->blk_size / DISK_BLOCK_SIZE);
			if ((blocks * EXTEND_WARNING_FACTOR) > avail_blocks)
			{
				if (blocks > (uint4)avail_blocks)
				{
					SETUP_THREADGBL_ACCESS;
					if (!INST_FREEZE_ON_NOSPC_ENABLED(cs_addrs))
						return (uint4)(NO_FREE_SPACE);
					else
						send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(6) MAKE_MSG_WARNING(ERR_NOSPACEEXT), 4,
							DB_LEN_STR(gv_cur_region), new_blocks, (uint4)avail_blocks);
				} else
					send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, DB_LEN_STR(gv_cur_region),
						 (uint4)(avail_blocks - ((new_blocks <= avail_blocks) ? new_blocks : 0)));
			}
		}
	}
	/* From here on, we need to use GDSFILEXT_CLNUP before returning to the caller */
	was_crit = cs_addrs->now_crit;
	assert(!cs_addrs->hold_onto_crit || was_crit);
	/* If we are coming from mupip_extend (which gets crit itself) we better have waited for any unfreezes to occur.
	 * If we are coming from online rollback (when that feature is available), we will come in holding crit and in
	 * 	the final retry. In that case too, we expect to have waited for unfreezes to occur in the caller itself.
	 * Therefore if we are coming in holding crit from MUPIP, we expect the db to be unfrozen so no need to wait for
	 * freeze.
	 * If we are coming from GT.M and final retry (in which case we come in holding crit) we expect to have waited
	 * 	for any unfreezes (by invoking tp_crit_all_regions) to occur (TP or non-TP) before coming into this
	 *	function. However, there is one exception. In the final retry, if tp_crit_all_regions notices that
	 *	at least one of the participating regions did ONLY READs, it will not wait for any freeze on THAT region
	 *	to complete before grabbing crit. Later, in the final retry, if THAT region did an update which caused
	 *	op_tcommit to invoke bm_getfree->gdsfilext, then we would have come here with a frozen region on which
	 *	we hold crit.
	 */
	assert(!was_crit || !cs_data->freeze || (dollar_tlevel && (CDB_STAGNATE <= t_tries)));
	/*
	 * If we are in the final retry and already hold crit, it is possible that csa->nl->wc_blocked is also set to TRUE
	 * (by a concurrent process in phase2 which encountered an error in the midst of commit and secshr_db_clnup
	 * finished the job for it). In this case we do NOT want to invoke wcs_recover as that will update the "bt"
	 * transaction numbers without correspondingly updating the history transaction numbers (effectively causing
	 * a cdb_sc_blkmod type of restart). Therefore do NOT call grab_crit (which unconditionally invokes wcs_recover)
	 * if we already hold crit.
	 */
	if (!was_crit)
	{
		for ( ; ; )
		{
			grab_crit(gv_cur_region);
			if (!cs_data->freeze && !IS_REPL_INST_FROZEN)
				break;
			rel_crit(gv_cur_region);
			while (cs_data->freeze || IS_REPL_INST_FROZEN)
				hiber_start(1000);
		}
	} else if (cs_data->freeze && dollar_tlevel)
	{	/* We don't want to continue with file extension as explained above. Hence return with an error code which
		 * op_tcommit will recognize (as a cdb_sc_needcrit/cdb_sc_instancefreeze type of restart) and restart accordingly.
		 */
		assert(CDB_STAGNATE <= t_tries);
		GDSFILEXT_CLNUP;
		return (uint4)FINAL_RETRY_FREEZE_PROG;
	}
	if (IS_REPL_INST_FROZEN && trans_in_prog)
	{
		assert(CDB_STAGNATE <= t_tries);
		GDSFILEXT_CLNUP;
		return (uint4)FINAL_RETRY_INST_FREEZE;
	}
	assert(cs_addrs->ti->total_blks == cs_data->trans_hist.total_blks);
	old_total = cs_data->trans_hist.total_blks;
	if (old_total != filesize)
	{	/* Somebody else has already extended it, since we are in crit, this is trust-worthy. However, in case of MM,
		 * we still need to remap the database
		 */
		assert((old_total > filesize) GTM_TRUNCATE_ONLY( || !is_mm));
		/* For BG, someone else could have truncated or extended - we have no idea */
		GDSFILEXT_CLNUP;
		return (SS_NORMAL);
	}
Ejemplo n.º 5
0
uint4 jnl_file_extend(jnl_private_control *jpc, uint4 total_jnl_rec_size)
{
	file_control		*fc;
	boolean_t		need_extend;
	jnl_buffer_ptr_t     	jb;
	jnl_create_info 	jnl_info;
	jnl_file_header		*header;
	unsigned char		hdr_buff[REAL_JNL_HDR_LEN + MAX_IO_BLOCK_SIZE];
	uint4			new_alq;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	char			prev_jnl_fn[JNL_NAME_SIZE];
	uint4			jnl_status = 0, status;
	int			new_blocks, warn_blocks, result;
	gtm_uint64_t		avail_blocks;
	uint4			aligned_tot_jrec_size, count;
	uint4			jnl_fs_block_size, read_write_size;
	DCL_THREADGBL_ACCESS;

	switch(jpc->region->dyn.addr->acc_meth)
	{
	case dba_mm:
	case dba_bg:
		csa = &FILE_INFO(jpc->region)->s_addrs;
		break;
	default:
		GTMASSERT;
	}
	csd = csa->hdr;
	assert(csa == cs_addrs && csd == cs_data);
	assert(csa->now_crit || (csd->clustered && (CCST_CLOSED == csa->nl->ccp_state)));
	assert(&FILE_INFO(jpc->region)->s_addrs == csa);
	assert(csa->jnl_state == csd->jnl_state);
	assertpro(JNL_ENABLED(csa) && (NOJNL != jpc->channel) && (!JNL_FILE_SWITCHED(jpc)));
		/* crit and messing with the journal file - how could it have vanished? */
	if (!csd->jnl_deq || (csd->jnl_alq + csd->jnl_deq > csd->autoswitchlimit))
	{
		assert(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE) <= csd->jnl_alq);
		assert(csd->jnl_alq == csd->autoswitchlimit);
		new_blocks = csd->jnl_alq;
	} else
		/* May cause extension of  csd->jnl_deq * n blocks where n > 0 */
		new_blocks = ROUND_UP(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE), csd->jnl_deq);
	jpc->status = SS_NORMAL;
	jb = jpc->jnl_buff;
	assert(0 <= new_blocks);
	DEBUG_ONLY(count = 0);
	for (need_extend = (0 != new_blocks); need_extend; )
	{
		DEBUG_ONLY(count++);
		/* usually we will do the loop just once where we do the file extension.
		 * rarely we might need to do an autoswitch instead after which again rarely
		 * 	we might need to do an extension on the new journal to fit in the transaction's	journal requirements.
		 * therefore we should do this loop a maximum of twice. hence the assert below.
		 */
		assert(count <= 2);
		need_extend = FALSE;
		if (SS_NORMAL == (status = disk_block_available(jpc->channel, &avail_blocks, TRUE)))
		{
			warn_blocks = (csd->jnl_alq + csd->jnl_deq > csd->autoswitchlimit)
					? ((csd->jnl_deq > csd->autoswitchlimit) ? csd->jnl_deq : csd->autoswitchlimit)
					: new_blocks;

			if ((warn_blocks * EXTEND_WARNING_FACTOR) > avail_blocks)
			{
				if (new_blocks > avail_blocks)
				{	/* If we cannot satisfy the request, it is an error, unless the anticipatory freeze
					 * scheme is in effect in which case, we will assume space is available even if
					 * it is not and go ahead with writes to the disk. If the writes fail with ENOSPC
					 * we will freeze the instance and wait for space to become available and keep
					 * retrying the writes. Therefore, we make the NOSPACEEXT a warning in this case.
					 */
					SETUP_THREADGBL_ACCESS;
					if (!ANTICIPATORY_FREEZE_ENABLED(csa))
					{
						send_msg(VARLSTCNT(6) ERR_NOSPACEEXT, 4,
							JNL_LEN_STR(csd), new_blocks, avail_blocks);
						new_blocks = 0;
						jpc->status = SS_NORMAL;
						break;
					} else
						send_msg(VARLSTCNT(6) MAKE_MSG_WARNING(ERR_NOSPACEEXT), 4,
							JNL_LEN_STR(csd), new_blocks, avail_blocks);
				} else
					send_msg(VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, JNL_LEN_STR(csd), (avail_blocks - warn_blocks));
			}
		} else
			send_msg(VARLSTCNT(5) ERR_JNLFILEXTERR, 2, JNL_LEN_STR(csd), status);
		new_alq = jb->filesize + new_blocks;
		/* ensure current journal file size is well within autoswitchlimit --> design constraint */
		assert(csd->autoswitchlimit >= jb->filesize);
		if (csd->autoswitchlimit < (jb->filesize + (EXTEND_WARNING_FACTOR * new_blocks)))	/* close to max */
			send_msg(VARLSTCNT(5) ERR_JNLSPACELOW, 3, JNL_LEN_STR(csd), csd->autoswitchlimit - jb->filesize);
		if (csd->autoswitchlimit < new_alq)
		{	/* Reached max, need to autoswitch */
			/* Ensure new journal file can hold the entire current transaction's journal record requirements */
			assert(csd->autoswitchlimit >= MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size));
			memset(&jnl_info, 0, SIZEOF(jnl_info));
			jnl_info.prev_jnl = &prev_jnl_fn[0];
			set_jnl_info(gv_cur_region, &jnl_info);
			assert(JNL_ENABLED(csa) && (NOJNL != jpc->channel) && !(JNL_FILE_SWITCHED(jpc)));
			jnl_status = jnl_ensure_open();
			if (0 == jnl_status)
			{	/* flush the cache and jnl-buffer-contents to current journal file before
				 * switching to a new journal. Set a global variable in_jnl_file_autoswitch
				 * so jnl_write can know not to do the padding check. But because this is a global
				 * variable, we also need to make sure it is reset in case of errors during the
				 * autoswitch (or else calls to jnl_write after we are out of the autoswitch logic
				 * will continue to incorrectly not do the padding check. Hence a condition handler.
				 */
				assert(!in_jnl_file_autoswitch);
				in_jnl_file_autoswitch = TRUE;
				/* Also make sure time is not changed. This way if "jnl_write" as part of writing a
				 * journal record invokes jnl_file_extend, when the autoswitch is done and writing
				 * of the parent jnl_write resumes, we want it to continue with the same timestamp
				 * and not have to reset its time (non-trivial task) to reflect any changes since then.
				 */
				assert(!jgbl.save_dont_reset_gbl_jrec_time);
				jgbl.save_dont_reset_gbl_jrec_time = jgbl.dont_reset_gbl_jrec_time;
				jgbl.dont_reset_gbl_jrec_time = TRUE;
				/* Establish a condition handler so we reset a few global variables that have
				 * temporarily been modified in case of errors inside wcs_flu/jnl_file_close.
				 */
				ESTABLISH_RET(jnl_file_autoswitch_ch, EXIT_ERR);
				/* It is possible we still have not written a PINI record in this journal file
				 * (e.g. mupip extend saw the need to do jnl_file_extend inside jnl_write while
				 * trying to write a PINI record). Write a PINI record in that case before closing
				 * the journal file that way the EOF record will have a non-zero pini_addr.
				 */
				if (0 == jpc->pini_addr)
					jnl_put_jrt_pini(csa);
				wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_SPEEDUP_NOBEFORE);
				jnl_file_close(gv_cur_region, TRUE, TRUE);
				REVERT;
				in_jnl_file_autoswitch = FALSE;
				jgbl.dont_reset_gbl_jrec_time = jgbl.save_dont_reset_gbl_jrec_time;
				DEBUG_ONLY(jgbl.save_dont_reset_gbl_jrec_time = FALSE);
				assert((dba_mm == cs_data->acc_meth) || (csd == cs_data));
				csd = cs_data;	/* In MM, wcs_flu() can remap an extended DB, so reset csd to be sure */
			} else
			{
				if (SS_NORMAL != jpc->status)
					rts_error(VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region),
						jpc->status);
				else
					rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region));
			}
			assert(!jgbl.forw_phase_recovery || (NULL != jgbl.mur_pini_addr_reset_fnptr));
			assert(jgbl.forw_phase_recovery || (NULL == jgbl.mur_pini_addr_reset_fnptr));
			if (NULL != jgbl.mur_pini_addr_reset_fnptr)
				(*jgbl.mur_pini_addr_reset_fnptr)(csa);
			assert(!jnl_info.no_rename);
			assert(!jnl_info.no_prev_link);
			if (EXIT_NRM == cre_jnl_file(&jnl_info))
			{
				assert(0 == memcmp(csd->jnl_file_name, jnl_info.jnl, jnl_info.jnl_len));
				assert(csd->jnl_file_name[jnl_info.jnl_len] == '\0');
				assert(csd->jnl_file_len == jnl_info.jnl_len);
				assert(csd->jnl_buffer_size == jnl_info.buffer);
				assert(csd->jnl_alq == jnl_info.alloc);
				assert(csd->jnl_deq == jnl_info.extend);
				assert(csd->jnl_before_image == jnl_info.before_images);
				csd->jnl_checksum = jnl_info.checksum;
				csd->jnl_eovtn = csd->trans_hist.curr_tn;
				send_msg(VARLSTCNT(4) ERR_NEWJNLFILECREAT, 2, JNL_LEN_STR(csd));
				fc = gv_cur_region->dyn.addr->file_cntl;
				fc->op = FC_WRITE;
				fc->op_buff = (sm_uc_ptr_t)csd;
				fc->op_len = SGMNT_HDR_LEN;
				fc->op_pos = 1;
				status = dbfilop(fc);
				if (SS_NORMAL != status)
					send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), status);
				assert(JNL_ENABLED(csa));
				/* call jnl_ensure_open instead of jnl_file_open to make sure jpc->pini_addr is set to 0 */
				jnl_status = jnl_ensure_open();	/* sets jpc->status */
				if (0 != jnl_status)
				{
					if (jpc->status)
						rts_error(VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region),
							jpc->status);
					else
						rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region));
				}
				assert(jb->filesize == csd->jnl_alq);
				if (csd->jnl_alq + csd->jnl_deq <= csd->autoswitchlimit)
				{
					aligned_tot_jrec_size = ALIGNED_ROUND_UP(MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size),
											csd->jnl_alq, csd->jnl_deq);
					if (aligned_tot_jrec_size > csd->jnl_alq)
					{	/* need to extend more than initial allocation in the new journal file
						 * to accommodate the current transaction.
						 */
						new_blocks = aligned_tot_jrec_size - csd->jnl_alq;
						assert(new_blocks);
						assert(0 == new_blocks % csd->jnl_deq);
						need_extend = TRUE;
					}
				}
			} else
			{
				send_msg(VARLSTCNT(4) ERR_JNLNOCREATE, 2, JNL_LEN_STR(csd));
				jpc->status = ERR_JNLNOCREATE;
				new_blocks = -1;
			}
		} else
		{
			assert(!need_extend);	/* ensure we won't go through the for loop again */
			/* Virtually extend currently used journal file */
			jnl_fs_block_size = jb->fs_block_size;
			header = (jnl_file_header *)(ROUND_UP2((uintszofptr_t)hdr_buff, jnl_fs_block_size));
			read_write_size = ROUND_UP2(REAL_JNL_HDR_LEN, jnl_fs_block_size);
			assert((unsigned char *)header + read_write_size <= ARRAYTOP(hdr_buff));
			DO_FILE_READ(jpc->channel, 0, header, read_write_size, jpc->status, jpc->status2);
			if (SS_NORMAL != jpc->status)
			{
				assert(FALSE);
				rts_error(VARLSTCNT(5) ERR_JNLRDERR, 2, JNL_LEN_STR(csd), jpc->status);
			}
			assert((header->virtual_size + new_blocks) == new_alq);
			jb->filesize = new_alq;	/* Actually this is virtual file size blocks */
			header->virtual_size = new_alq;
			JNL_DO_FILE_WRITE(csa, csd->jnl_file_name, jpc->channel, 0,
					header, read_write_size, jpc->status, jpc->status2);
			if (SS_NORMAL != jpc->status)
			{
				assert(FALSE);
				rts_error(VARLSTCNT(5) ERR_JNLWRERR, 2, JNL_LEN_STR(csd), jpc->status);
			}
		}
		if (0 >= new_blocks)
			break;
	}
	if (0 < new_blocks)
	{
		INCR_GVSTATS_COUNTER(csa, csa->nl, n_jnl_extends, 1);
		return EXIT_NRM;
	}
	jpc->status = ERR_JNLREADEOF;
	jnl_file_lost(jpc, ERR_JNLEXTEND);
       	return EXIT_ERR;
}
Ejemplo n.º 6
0
uint4	 gdsfilext (uint4 blocks, uint4 filesize)
{
	sm_uc_ptr_t		old_base[2];
	boolean_t		was_crit, need_to_restore_mask = FALSE;
	char			*buff;
	int			mm_prot, result, save_errno, status;
	uint4			new_bit_maps, bplmap, map, new_blocks, new_total, max_tot_blks;
	uint4			jnl_status, to_wait, to_msg, wait_period;
	GTM_BAVAIL_TYPE		avail_blocks;
	sgmnt_data_ptr_t	tmp_csd;
	off_t			new_eof;
	trans_num		curr_tn;
	unix_db_info		*udi;
	sigset_t		savemask;
	inctn_opcode_t		save_inctn_opcode;
	int4			prev_extend_blks_to_upgrd;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;

	error_def(ERR_DBFILERR);
	error_def(ERR_DBFILEXT);
	error_def(ERR_DSKSPACEFLOW);
	error_def(ERR_JNLFLUSH);
	error_def(ERR_TEXT);
	error_def(ERR_TOTALBLKMAX);
	error_def(ERR_WAITDSKSPACE);

#ifdef __hppa
	if (dba_mm == cs_addrs->hdr->acc_meth)
		return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not allowed ? */
#endif

	/* Both blocks and total blocks are unsigned ints so make sure we aren't asking for huge numbers that will
	   overflow and end up doing silly things.
	*/
	assert(blocks <= (MAXTOTALBLKS(cs_data) - cs_data->trans_hist.total_blks));

	if (!blocks)
		return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not enabled ? */
	bplmap = cs_data->bplmap;
	/* new total of non-bitmap blocks will be number of current, non-bitmap blocks, plus new blocks desired
	   There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks
		and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this
		manner as every non-bitmap block must have an associated bitmap)
	   Current number of bitmaps is (total number of current blocks + bplmap - 1) / bplmap.
	   Subtract current number of bitmaps from number needed for expanded database to get number of new bitmaps needed.
	*/
	new_bit_maps = DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks
			- DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap) + blocks, bplmap - 1)
			- DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap);
	new_blocks = blocks + new_bit_maps;
	assert(0 < (int)new_blocks);
	udi = FILE_INFO(gv_cur_region);
	if (0 != (save_errno = disk_block_available(udi->fd, &avail_blocks, FALSE)))
	{
		send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno);
		rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno);
	} else
	{
		avail_blocks = avail_blocks / (cs_data->blk_size / DISK_BLOCK_SIZE);
		if ((blocks * EXTEND_WARNING_FACTOR) > avail_blocks)
		{
			send_msg(VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, DB_LEN_STR(gv_cur_region),
					(uint4)(avail_blocks - ((new_blocks <= avail_blocks) ? new_blocks : 0)));
#ifndef __MVS__
			if (blocks > (uint4)avail_blocks)
				return (uint4)(NO_FREE_SPACE);
#endif
		}
	}
	cs_addrs->extending = TRUE;
	was_crit = cs_addrs->now_crit;
	/* If we are coming from mupip_extend (which gets crit itself) we better have waited for any unfreezes to occur */
	assert(!was_crit || CDB_STAGNATE == t_tries || FALSE == cs_data->freeze);
	for ( ; ; )
	{	/* If we are in the final retry and already hold crit, it is possible that csd->wc_blocked is also set to TRUE
		 * (by a concurrent process in phase2 which encountered an error in the midst of commit and secshr_db_clnup
		 * finished the job for it). In this case we do NOT want to invoke wcs_recover as that will update the "bt"
		 * transaction numbers without correspondingly updating the history transaction numbers (effectively causing
		 * a cdb_sc_blkmod type of restart). Therefore do NOT call grab_crit (which unconditionally invokes wcs_recover)
		 * if we already hold crit.
		 */
		if (!was_crit)
			grab_crit(gv_cur_region);
		if (FALSE == cs_data->freeze)
			break;
		rel_crit(gv_cur_region);
		if (was_crit)
		{	/* Two cases.
			 * (i)  Final retry and in TP. We might be holding crit in other regions too.
			 *	We can't do a grab_crit() on this region again unless it is deadlock-safe.
			 *      To be on the safer side, we do a restart. The tp_restart() logic will wait
			 *	for this region's freeze to be removed before grabbing crit.
			 * (ii) Final retry and not in TP. In that case too, it is better to restart in case there is
			 *	some validation code that shortcuts the checking for the final retry assuming we were
			 *	in crit from t_begin() to t_end(). t_retry() has logic that will wait for unfreeze.
			 * In either case, we need to restart. Returning EXTEND_UNFREEZECRIT will cause one in t_end/tp_tend.
			 */
			return (uint4)(EXTEND_UNFREEZECRIT);
		}
		while (cs_data->freeze)
			hiber_start(1000);
	}
	assert(cs_addrs->ti->total_blks == cs_data->trans_hist.total_blks);
	if (cs_data->trans_hist.total_blks != filesize)
	{
		/* somebody else has already extended it, since we are in crit, this is trust-worthy
		 * however, in case of MM, we still need to remap the database */
		assert(cs_data->trans_hist.total_blks > filesize);
		GDSFILEXT_CLNUP;
		return (SS_NORMAL);
	}
	if (run_time && (2 * ((0 < dollar_tlevel) ? sgm_info_ptr->cw_set_depth : cw_set_depth) < cs_addrs->ti->free_blocks))
	{
		if (FALSE == was_crit)
		{
			rel_crit(gv_cur_region);
			return (uint4)(EXTEND_SUSPECT);
		}
		/* If free_blocks counter is not ok, then correct it. Do the check again. If still fails, then GTMASSERT. */
		if (is_free_blks_ctr_ok() ||
				(2 * ((0 < dollar_tlevel) ? sgm_info_ptr->cw_set_depth : cw_set_depth) < cs_addrs->ti->free_blocks))
			GTMASSERT;	/* held crit through bm_getfree into gdsfilext and still didn't get it right */
	}
	if (JNL_ENABLED(cs_data))
	{
		if (!jgbl.dont_reset_gbl_jrec_time)
			SET_GBL_JREC_TIME;	/* needed before jnl_ensure_open as that can write jnl records */
		jpc = cs_addrs->jnl;
		jbp = jpc->jnl_buff;
		/* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order
		 * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write
		 * journal records (if it decides to switch to a new journal file).
		 */
		ADJUST_GBL_JREC_TIME(jgbl, jbp);
		jnl_status = jnl_ensure_open();
		if (jnl_status)
		{
			GDSFILEXT_CLNUP;
			send_msg(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region));
			return (uint4)(NO_FREE_SPACE);	/* should have better return status */
		}
	}
	if (dba_mm == cs_addrs->hdr->acc_meth)
	{
#if defined(UNTARGETED_MSYNC)
		status = msync((caddr_t)cs_addrs->db_addrs[0], (size_t)(cs_addrs->db_addrs[1] - cs_addrs->db_addrs[0]), MS_SYNC);
#else
		cs_addrs->nl->mm_extender_pid = process_id;
		status = wcs_wtstart(gv_cur_region, 0);
		cs_addrs->nl->mm_extender_pid = 0;
		if (0 != cs_addrs->acc_meth.mm.mmblk_state->mmblkq_active.fl)
			GTMASSERT;
		status = 0;
#endif
		if (0 == status)
		{
			/* Block SIGALRM for the duration when cs_data and cs_addrs are out of sync */
			sigprocmask(SIG_BLOCK, &blockalrm, &savemask);
			need_to_restore_mask = TRUE;
			tmp_csd = cs_data;
			cs_data = (sgmnt_data_ptr_t)malloc(sizeof(*cs_data));
			memcpy((sm_uc_ptr_t)cs_data, (uchar_ptr_t)tmp_csd, sizeof(*cs_data));
			status = munmap((caddr_t)cs_addrs->db_addrs[0],
					     (size_t)(cs_addrs->db_addrs[1] - cs_addrs->db_addrs[0]));
#ifdef DEBUG_DB64
			if (-1 != status)
				rel_mmseg((caddr_t)cs_addrs->db_addrs[0]);
#endif
		} else
			tmp_csd = NULL;
		if (0 != status)
		{
			if (tmp_csd)
			{
				free(cs_data);
				cs_data = tmp_csd;
			}
			GDSFILEXT_CLNUP;
			send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), status);
			return (uint4)(NO_FREE_SPACE);
		}
		cs_addrs->hdr = cs_data;
		cs_addrs->ti = &cs_data->trans_hist;
	}
	if (new_blocks + cs_data->trans_hist.total_blks > MAXTOTALBLKS(cs_data))
	{
		GDSFILEXT_CLNUP;
		send_msg(VARLSTCNT(1) ERR_TOTALBLKMAX);
		return (uint4)(NO_FREE_SPACE);
	}
	CHECK_TN(cs_addrs, cs_data, cs_data->trans_hist.curr_tn);	/* can issue rts_error TNTOOLARGE */
	assert(0 < (int)new_blocks);
	new_total = cs_data->trans_hist.total_blks + new_blocks;
	new_eof = ((off_t)(cs_data->start_vbn - 1) * DISK_BLOCK_SIZE) + ((off_t)new_total * cs_data->blk_size);
	buff = (char *)malloc(DISK_BLOCK_SIZE);
	memset(buff, 0, DISK_BLOCK_SIZE);
	LSEEKWRITE(udi->fd, new_eof, buff, DISK_BLOCK_SIZE, save_errno);
	if ((ENOSPC == save_errno) && run_time)
	{
		/* try to write it every second, and send message to operator
		 * log every 1/20 of cs_data->wait_disk_space
		 */
		wait_period = to_wait = DIVIDE_ROUND_UP(cs_data->wait_disk_space, CDB_STAGNATE + 1);
		to_msg = (to_wait / 8) ? (to_wait / 8) : 1;		/* send around 8 messages during 1 wait_period */
		while ((to_wait > 0) && (ENOSPC == save_errno))
		{
			if ((to_wait == cs_data->wait_disk_space) || (to_wait % to_msg == 0))
			{
				send_msg(VARLSTCNT(11) ERR_WAITDSKSPACE, 4, process_id,
					to_wait + (CDB_STAGNATE - t_tries) * wait_period, DB_LEN_STR(gv_cur_region),
					ERR_TEXT, 2,
					RTS_ERROR_TEXT("Please make more disk space available or shutdown GT.M to avoid data loss"),
					save_errno);
				gtm_putmsg(VARLSTCNT(11) ERR_WAITDSKSPACE, 4, process_id,
					to_wait + (CDB_STAGNATE - t_tries) * wait_period, DB_LEN_STR(gv_cur_region),
					ERR_TEXT, 2,
					RTS_ERROR_TEXT("Please make more disk space available or shutdown GT.M to avoid data loss"),
					save_errno);
			}
			if (!was_crit)
				rel_crit(gv_cur_region);
			hiber_start(1000);
			to_wait--;
			if (!was_crit)
				grab_crit(gv_cur_region);
			LSEEKWRITE(udi->fd, new_eof, buff, DISK_BLOCK_SIZE, save_errno);
		}
	}
	free(buff);
	if (0 != save_errno)
	{
		GDSFILEXT_CLNUP;
		if (ENOSPC == save_errno)
			return (uint4)(NO_FREE_SPACE);
		send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno);
		return (uint4)(NO_FREE_SPACE);
	}
	DEBUG_ONLY(prev_extend_blks_to_upgrd = cs_data->blks_to_upgrd;)
Ejemplo n.º 7
0
unsigned char mu_cre_file(void)
{
	char		*cc = NULL, path[MAX_FBUFF + 1], errbuff[512];
	unsigned char	buff[DISK_BLOCK_SIZE];
	int		fd = -1, i, lower, upper, status, padded_len, padded_vbn, norm_vbn;
	uint4		raw_dev_size;		/* size of a raw device, in bytes */
	int4		blocks_for_create, blocks_for_extension, save_errno;
	GTM_BAVAIL_TYPE	avail_blocks;
	file_control	fc;
	mstr		file;
	parse_blk	pblk;
	unix_db_info	udi_struct, *udi;
	char		*fgets_res;
	gd_segment	*seg;

	error_def(ERR_NOSPACECRE);
	error_def(ERR_LOWSPACECRE);

	assert((-(sizeof(uint4) * 2) & sizeof(sgmnt_data)) == sizeof(sgmnt_data));
	cs_addrs = &udi_struct.s_addrs;
	cs_data = (sgmnt_data_ptr_t)NULL;	/* for CLEANUP */
	memset(&pblk, 0, sizeof(pblk));
	pblk.fop = (F_SYNTAXO | F_PARNODE);
	pblk.buffer = path;
	pblk.buff_size = MAX_FBUFF;
	file.addr = (char*)gv_cur_region->dyn.addr->fname;
	file.len = gv_cur_region->dyn.addr->fname_len;
	strncpy(path,file.addr,file.len);
	*(path+file.len) = '\0';
	if (is_raw_dev(path))
	{	/* do not use a default extension for raw device files */
		pblk.def1_buf = DEF_NODBEXT;
		pblk.def1_size = sizeof(DEF_NODBEXT) - 1;
	} else
	{
		pblk.def1_buf = DEF_DBEXT;
		pblk.def1_size = sizeof(DEF_DBEXT) - 1;
	}
	if (1 != (parse_file(&file, &pblk) & 1))
	{
		PRINTF("Error translating filename %s.\n", gv_cur_region->dyn.addr->fname);
		return EXIT_ERR;
	}
	path[pblk.b_esl] = 0;
	if (pblk.fnb & F_HAS_NODE)
	{	/* Remote node specification given */
		assert(pblk.b_node);
		PRINTF("Database file for region %s not created; cannot create across network.\n", path);
		return EXIT_WRN;
	}
	udi = &udi_struct;
	udi->raw = is_raw_dev(pblk.l_dir);
	if (udi->raw)
	{
		fd = OPEN(pblk.l_dir,O_EXCL | O_RDWR);
		if (-1 == fd)
		{
			SPRINTF_AND_PERROR("Error opening file %s\n");
			return EXIT_ERR;
		}
		if (-1 != (status = (ssize_t)lseek(fd, 0, SEEK_SET)))
		{
			DOREADRC(fd, buff, sizeof(buff), status);
		} else
			status = errno;
		if (0 != status)
		{
			SPRINTF_AND_PERROR("Error reading header for file %s\n");
			return EXIT_ERR;
		}
		if (!memcmp(buff, GDS_LABEL, STR_LIT_LEN(GDS_LABEL)))
		{
			char rsp[80];
			PRINTF("Database already exists on device %s\n", path);
			PRINTF("Do you wish to re-initialize (all current data will be lost) [y/n] ? ");
			FGETS(rsp, 79, stdin, fgets_res);
			if ('y' != *rsp)
				return EXIT_NRM;
		}
		PRINTF("Determining size of raw device...\n");
		for(i = 1; read(fd, buff, sizeof(buff)) == sizeof(buff);)
		{
			i *= 2;
			lseek(fd, (off_t)i * BUFSIZ, SEEK_SET);
		}

		lower = i / 2;
		upper = i;
		while ((lower + upper) / 2 != lower)
		{
			i = (lower + upper) / 2;
			lseek(fd, (off_t)i * BUFSIZ, SEEK_SET);
 			if (read(fd, buff, sizeof(buff)) == sizeof(buff))
				lower = i;
			else
				upper = i;
		}
		raw_dev_size = i * BUFSIZ;
	} else
	{
		fd = OPEN3(pblk.l_dir, O_CREAT | O_EXCL | O_RDWR, 0600);
		if (-1 == fd)
		{
			SPRINTF_AND_PERROR("Error opening file %s\n");
			return EXIT_ERR;
		}
		if (0 != (save_errno = disk_block_available(fd, &avail_blocks, FALSE)))
		{
			errno = save_errno;
			SPRINTF_AND_PERROR("Error checking available disk space for %s\n");
			CLEANUP(EXIT_ERR);
			return EXIT_ERR;
		}
		seg = gv_cur_region->dyn.addr;

		/* blocks_for_create is in the unit of DISK_BLOCK_SIZE */
		blocks_for_create = DIVIDE_ROUND_UP(sizeof(sgmnt_data), DISK_BLOCK_SIZE) + 1 +
					(seg->blk_size / DISK_BLOCK_SIZE *
					 ((DIVIDE_ROUND_UP(seg->allocation, BLKS_PER_LMAP - 1)) + seg->allocation));
		if ((uint4)avail_blocks < blocks_for_create)
		{
			gtm_putmsg(VARLSTCNT(6) ERR_NOSPACECRE, 4, LEN_AND_STR(path), blocks_for_create, (uint4)avail_blocks);
			send_msg(VARLSTCNT(6) ERR_NOSPACECRE, 4, LEN_AND_STR(path), blocks_for_create, (uint4)avail_blocks);
			CLEANUP(EXIT_ERR);
			return EXIT_ERR;
		}
		blocks_for_extension = (seg->blk_size / DISK_BLOCK_SIZE *
					((DIVIDE_ROUND_UP(EXTEND_WARNING_FACTOR * seg->ext_blk_count, BLKS_PER_LMAP - 1))
				 	  + EXTEND_WARNING_FACTOR * seg->ext_blk_count));
		if ((uint4)(avail_blocks - blocks_for_create) < blocks_for_extension)
		{
			gtm_putmsg(VARLSTCNT(8) ERR_LOWSPACECRE, 6, LEN_AND_STR(path), EXTEND_WARNING_FACTOR, blocks_for_extension,
					DISK_BLOCK_SIZE, (uint4)(avail_blocks - blocks_for_create));
			send_msg(VARLSTCNT(8) ERR_LOWSPACECRE, 6, LEN_AND_STR(path), EXTEND_WARNING_FACTOR, blocks_for_extension,
					DISK_BLOCK_SIZE, (uint4)(avail_blocks - blocks_for_create));
		}
	}
	gv_cur_region->dyn.addr->file_cntl = &fc;
	fc.file_info = (void*)&udi_struct;
	udi->fd = fd;
	cs_data = (sgmnt_data_ptr_t)malloc(sizeof(sgmnt_data));
	memset(cs_data, 0, sizeof(*cs_data));
	cs_data->createinprogress = TRUE;
	cs_data->semid = INVALID_SEMID;
	cs_data->shmid = INVALID_SHMID;
	/* We want our datablocks to start on what would be a block boundary within the file so pad the fileheader
	   if necessary to make this happen.
	*/
	padded_len = ROUND_UP(sizeof(sgmnt_data), BLK_SIZE);
	padded_vbn = DIVIDE_ROUND_UP(padded_len, DISK_BLOCK_SIZE) + 1;
	norm_vbn = DIVIDE_ROUND_UP(sizeof(sgmnt_data), DISK_BLOCK_SIZE) + 1;
	cs_data->start_vbn = padded_vbn;
	cs_data->free_space += (padded_vbn - norm_vbn) * DISK_BLOCK_SIZE;
	cs_data->acc_meth = gv_cur_region->dyn.addr->acc_meth;
	if (udi->raw)
	{
		/* calculate total blocks, reduce to make room for the
		 * database header (size rounded up to a block), then
		 * make into a multiple of BLKS_PER_LMAP to have a complete bitmap
		 * for each set of blocks.
		 */
		cs_data->trans_hist.total_blks = raw_dev_size - ROUND_UP(sizeof(sgmnt_data), DISK_BLOCK_SIZE);
		cs_data->trans_hist.total_blks /= (uint4)(((gd_segment *)gv_cur_region->dyn.addr)->blk_size);
		if (0 == (cs_data->trans_hist.total_blks - DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, BLKS_PER_LMAP - 1)
			  % (BLKS_PER_LMAP - 1)))
			cs_data->trans_hist.total_blks -= 1;	/* don't create a bitmap with no data blocks */
		cs_data->extension_size = 0;
		PRINTF("Raw device size is %dK, %d GDS blocks\n",
		raw_dev_size / 1000,
		cs_data->trans_hist.total_blks);
	} else
	{
		cs_data->trans_hist.total_blks = gv_cur_region->dyn.addr->allocation;
		/* There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks
		 * and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this
		 * manner as every non-bitmap block must have an associated bitmap)
		 */
		cs_data->trans_hist.total_blks += DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, BLKS_PER_LMAP - 1);
		cs_data->extension_size = gv_cur_region->dyn.addr->ext_blk_count;
	}
	mucregini(cs_data->trans_hist.total_blks);
	cs_data->createinprogress = FALSE;
	LSEEKWRITE(udi->fd, 0, cs_data, sizeof(sgmnt_data), status);
	if (0 != status)
	{
		SPRINTF_AND_PERROR("Error writing out header for file %s\n");
		CLEANUP(EXIT_ERR);
		return EXIT_ERR;
	}
	cc = (char*)malloc(DISK_BLOCK_SIZE);
	memset(cc, 0, DISK_BLOCK_SIZE);
	LSEEKWRITE(udi->fd,
		   (cs_data->start_vbn - 1) * DISK_BLOCK_SIZE + ((off_t)(cs_data->trans_hist.total_blks) * cs_data->blk_size),
		   cc,
		   DISK_BLOCK_SIZE,
		   status);
	if (0 != status)
	{
		SPRINTF_AND_PERROR("Error writing out end of file %s\n");
		CLEANUP(EXIT_ERR);
		return EXIT_ERR;
	}
	if ((!udi->raw) && (-1 == CHMOD(pblk.l_dir, 0666)))
	{
		SPRINTF_AND_PERROR("Error changing file mode on file %s\n");
		CLEANUP(EXIT_WRN);
		return EXIT_WRN;
	}
	CLEANUP(EXIT_NRM);
	PRINTF("Created file %s\n", path);
	return EXIT_NRM;
}