Beispiel #1
0
static int
NodeReady(RF_DagNode_t *node)
{
	int     ready;

	switch (node->dagHdr->status) {
	case rf_enable:
	case rf_rollForward:
		if ((node->status == rf_wait) &&
		    (node->numAntecedents == node->numAntDone))
			ready = RF_TRUE;
		else
			ready = RF_FALSE;
		break;
	case rf_rollBackward:
		RF_ASSERT(node->numSuccDone <= node->numSuccedents);
		RF_ASSERT(node->numSuccFired <= node->numSuccedents);
		RF_ASSERT(node->numSuccFired <= node->numSuccDone);
		if ((node->status == rf_good) &&
		    (node->numSuccDone == node->numSuccedents))
			ready = RF_TRUE;
		else
			ready = RF_FALSE;
		break;
	default:
		printf("Execution engine found illegal DAG status in NodeReady\n");
		RF_PANIC();
		break;
	}

	return (ready);
}
Beispiel #2
0
void ArrayNode<T>::RemoveChild(ArrayNode<T>* Node)
{
  RF_ASSERT(m_Children.Count() > 0, "Invalid parameter.");
  if(m_Children.Count() > 1)  // most expected case
  {
    RF_Type::MemoryRange index = Node - &m_Children(0);
    Array<ArrayNode<T>> tmp(m_Children.Count() - 1);
    if(index > 0)
    {
      RF_Type::Size rightPart = index + 1;
      m_Children.Copy(0, tmp, 0, index);
      if(index < tmp.Count())  // if not the last one
      {
        m_Children.Copy(rightPart, tmp, index, tmp.Count() - index);
      }
    }
    else  // index==0
    {
      m_Children.Copy(1, tmp, 0, tmp.Count());
    }
    m_Children.Swap(tmp);
  }
  else  // count==1
  {
    RF_ASSERT(Node == &m_Children(0), "Invalid parameter.");
    m_Children.Resize(0);
  }
}
Beispiel #3
0
void
rf_CreateDegradedWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
			  RF_DagHeader_t *dag_h, void *bp,
			  RF_RaidAccessFlags_t flags,
			  RF_AllocListElem_t *allocList)
{

	RF_ASSERT(asmap->numDataFailed == 1);
	dag_h->creator = "DegradedWriteDAG";

	/*
	 * if the access writes only a portion of the failed unit, and also
	 * writes some portion of at least one surviving unit, we create two
	 * DAGs, one for the failed component and one for the non-failed
	 * component, and do them sequentially.  Note that the fact that we're
	 * accessing only a portion of the failed unit indicates that the
	 * access either starts or ends in the failed unit, and hence we need
	 * create only two dags.  This is inefficient in that the same data or
	 * parity can get read and written twice using this structure.  I need
	 * to fix this to do the access all at once.
	 */
	RF_ASSERT(!(asmap->numStripeUnitsAccessed != 1 &&
		    asmap->failedPDAs[0]->numSector !=
			raidPtr->Layout.sectorsPerStripeUnit));
	rf_CreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags,
	    allocList);
}
int
rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc)
{
	int     i;
	RF_DagHeader_t *dag_h;
	RF_DagList_t *dagList;

	/* next state is always rf_State_ProcessDAG important to do
	 * this before firing the first dag (it may finish before we
	 * leave this routine) */
	desc->state++;

	/* sweep dag array, a stripe at a time, firing the first dag
	 * in each stripe */
	dagList = desc->dagList;
	for (i = 0; i < desc->numStripes; i++) {
		RF_ASSERT(dagList->numDags > 0);
		RF_ASSERT(dagList->numDagsDone == 0);
		RF_ASSERT(dagList->numDagsFired == 0);
#if RF_ACC_TRACE > 0
		RF_ETIMER_START(dagList->tracerec.timer);
#endif
		/* fire first dag in this stripe */
		dag_h = dagList->dags;
		RF_ASSERT(dag_h);
		dagList->numDagsFired++;
		rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess, dagList);
		dagList = dagList->next;
	}

	/* the DAG will always call the callback, even if there was no
	 * blocking, so we are always suspended in this state */
	return RF_TRUE;
}
Beispiel #5
0
RF_DiskQueueData_t *
rf_FifoDequeue(void *q_in)
{
	RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
	RF_DiskQueueData_t *nd;

	RF_ASSERT(q);
	if (q->hq_head) {
		RF_ASSERT(q->hq_count != 0 && q->hq_tail != NULL);
		nd = q->hq_head;
		q->hq_head = q->hq_head->next;
		if (!q->hq_head)
			q->hq_tail = NULL;
		nd->next = NULL;
		q->hq_count--;
	} else
		if (q->lq_head) {
			RF_ASSERT(q->lq_count != 0 && q->lq_tail != NULL);
			nd = q->lq_head;
			q->lq_head = q->lq_head->next;
			if (!q->lq_head)
				q->lq_tail = NULL;
			nd->next = NULL;
			q->lq_count--;
			if (rf_fifoDebug) {
				printf("raid%d: fifo: DEQ lopri %lx\n",
				    nd->raidPtr->raidid, (long) nd);
			}
		} else {
			RF_ASSERT(q->hq_count == 0 && q->lq_count == 0 &&
			    q->hq_tail == NULL && q->lq_tail == NULL);
			nd = NULL;
		}
	return (nd);
}
Beispiel #6
0
RF_DiskQueueData_t *
rf_CscanPeek(void *qptr)
{
	RF_DiskQueueData_t *req;
	RF_Sstf_t *cscanq;

	cscanq = (RF_Sstf_t *) qptr;

	RF_ASSERT(cscanq->dir == DIR_RIGHT);
	if (cscanq->right.queue) {
		req = cscanq->right.queue;
	} else {
		RF_ASSERT(cscanq->right.qlen == 0);
		if (cscanq->left.queue == NULL) {
			RF_ASSERT(cscanq->left.qlen == 0);
			if (cscanq->lopri.queue == NULL) {
				RF_ASSERT(cscanq->lopri.qlen == 0);
				return (NULL);
			}
			req = closest_to_arm(&cscanq->lopri, cscanq->last_sector,
			    &cscanq->dir, cscanq->allow_reverse);
		} else {
			/*
			 * There's I/Os to the left of the arm. We'll end
			 * up swinging on back.
			 */
			req = cscanq->left.queue;
		}
	}
	if (req == NULL) {
		RF_ASSERT(QSUM(cscanq) == 0);
	}
	return (req);
}
Beispiel #7
0
static void
do_dequeue(RF_SstfQ_t *queue, RF_DiskQueueData_t *req)
{
	RF_DiskQueueData_t *req2;

#if RF_DEBUG_QUEUE
	if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) {
		printf("raid%d: do_dequeue\n", req->raidPtr->raidid);
	}
#endif
	if (req == queue->queue) {
		DO_HEAD_DEQ(req2, queue);
		RF_ASSERT(req2 == req);
	} else
		if (req == queue->qtail) {
			DO_TAIL_DEQ(req2, queue);
			RF_ASSERT(req2 == req);
		} else {
			/* dequeue from middle of list */
			RF_ASSERT(req->next);
			RF_ASSERT(req->prev);
			queue->qlen--;
			req->next->prev = req->prev;
			req->prev->next = req->next;
			req->next = req->prev = NULL;
		}
}
Beispiel #8
0
/*
 * We use the debug_mem_mutex here because we need to lock it anyway to call
 * free. This is probably a bug somewhere else in the code, but when I call
 * malloc/free outside of any lock, I have endless trouble with malloc
 * appearing to return the same pointer twice. Since we have to lock it
 * anyway, we might as well use it as the lock around the al_free_list.
 * Note that we can't call Free with the debug_mem_mutex locked.
 */
void
rf_FreeAllocList(RF_AllocListElem_t *l)
{
	int i;
	RF_AllocListElem_t *temp, *p;

	for (p = l; p; p = p->next) {
		RF_ASSERT(p->numPointers >= 0 &&
		    p->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT);
		for (i = 0; i < p->numPointers; i++) {
			RF_ASSERT(p->pointers[i]);
			RF_Free(p->pointers[i], p->sizes[i]);
		}
	}
	while (l) {
		temp = l;
		l = l->next;
		if (al_free_list_count > RF_AL_FREELIST_MAX) {
			DO_FREE(temp, sizeof(*temp));
		} else {
			temp->next = al_free_list;
			al_free_list = temp;
			al_free_list_count++;
		}
	}
}
Beispiel #9
0
/* prototyping this inexplicably causes the compile of the layout table (rf_layout.c) to fail */
void
rf_MapParityDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
			RF_RowCol_t *col,
			RF_SectorNum_t *diskSector, int remap)
{
	RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
	RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
	RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
	RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
	RF_StripeNum_t BlockID, BlockOffset, RepIndex;
	RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
	RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
	RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0;

	rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);

	/* compute row & (possibly) spare space exactly as before */
	FullTableID = SUID / sus_per_fulltable;

	if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
		SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
		SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
	}
	/* compute BlockID and RepIndex exactly as before */
	FullTableOffset = SUID % sus_per_fulltable;
	TableID = FullTableOffset / info->SUsPerTable;
	TableOffset = FullTableOffset - TableID * info->SUsPerTable;
	/* TableOffset     = FullTableOffset % info->SUsPerTable; */
	/* BlockID         = (TableOffset / info->PUsPerBlock) %
	 * info->BlocksPerTable; */
	BlockID = TableOffset / info->PUsPerBlock;
	/* BlockOffset     = TableOffset % info->PUsPerBlock; */
	BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
	BlockID %= info->BlocksPerTable;

	/* the parity block is in the position indicated by RepIndex */
	RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->PUsPerBlock - TableID;
	*col = info->LayoutTable[BlockID][RepIndex];

	if (remap) {
		RF_ASSERT(raidPtr->Disks[*col].status == rf_ds_reconstructing || raidPtr->Disks[*col].status == rf_ds_dist_spared ||
		    (rf_copyback_in_progress && raidPtr->Disks[*col].status == rf_ds_optimal));
		rf_remap_to_spare_space(layoutPtr, info, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
	} else {

		/* compute sector as before, except use RepIndex instead of
		 * BlockOffset */
		outSU = base_suid;
		outSU += FullTableID * fulltable_depth;
		outSU += SpareSpace;	/* skip rsvd spare space */
		outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
		outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU;
	}

	outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
	*diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);

	RF_ASSERT(*col != -1);
}
/* Algorithm:
     1. Store the difference of old data and new data in the Rod buffer.
     2. then encode this buffer into the buffer which already have old 'E' information inside it,
	the result can be shown to be the new 'E' information.
     3. xor the Wnd buffer into the difference buffer to recover the  original old data.
   Here we have another alternative: to allocate a temporary buffer for storing the difference of
   old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
   take the same speed as the previous, and need more memory.
*/
int
rf_RegularONEFunc(RF_DagNode_t *node)
{
	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
	int     EpdaIndex = (node->numParams - 1) / 2 - 1;	/* the parameter of node
								 * where you can find
								 * e-pda */
	int     i, k;
	int     suoffset, length;
	RF_RowCol_t scol;
	char   *srcbuf, *destbuf;
	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
	RF_Etimer_t timer;
	RF_PhysDiskAddr_t *pda;
#ifdef RAID_DIAGNOSTIC
	RF_PhysDiskAddr_t *EPDA =
	    (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
	int     ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);

	RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
	RF_ASSERT(ESUOffset == 0);
#endif /* RAID_DIAGNOSTIC */

	RF_ETIMER_START(timer);

	/* Xor the Wnd buffer into Rod buffer, the difference of old data and
	 * new data is stored in Rod buffer */
	for (k = 0; k < EpdaIndex; k += 2) {
		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
		rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
	}
	/* Start to encoding the buffer storing the difference of old data and
	 * new data into 'E' buffer  */
	for (i = 0; i < EpdaIndex; i += 2)
		if (node->params[i + 1].p != node->results[0]) {	/* results[0] is buf ptr
									 * of E */
			pda = (RF_PhysDiskAddr_t *) node->params[i].p;
			srcbuf = (char *) node->params[i + 1].p;
			scol = rf_EUCol(layoutPtr, pda->raidAddress);
			suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
			destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
			rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
		}
	/* Recover the original old data to be used by parity encoding
	 * function in XorNode */
	for (k = 0; k < EpdaIndex; k += 2) {
		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
		rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
	}
	RF_ETIMER_STOP(timer);
	RF_ETIMER_EVAL(timer);
	tracerec->q_us += RF_ETIMER_VAL_US(timer);
	rf_GenericWakeupFunc(node, 0);
#if 1
	return (0);		/* XXX this was missing.. GO */
#endif
}
Beispiel #11
0
void
rf_FreePSStatus(RF_Raid_t *raidPtr, RF_ReconParityStripeStatus_t *p)
{
	RF_ASSERT(p->procWaitList == NULL);
	RF_ASSERT(p->blockWaitList == NULL);
	RF_ASSERT(p->bufWaitList == NULL);

	pool_put(&rf_pools.pss, p);
}
Beispiel #12
0
void
rf_MapSectorDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
			RF_RowCol_t *col,
			RF_SectorNum_t *diskSector, int remap)
{
	RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
	RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
	RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
	RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
	RF_StripeNum_t BlockID, BlockOffset, RepIndex;
	RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
	RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
	RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0;

	rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);

	FullTableID = SUID / sus_per_fulltable;	/* fulltable ID within array
						 * (across rows) */

	if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
		SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
		SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
	}
	FullTableOffset = SUID % sus_per_fulltable;
	TableID = FullTableOffset / info->SUsPerTable;
	TableOffset = FullTableOffset - TableID * info->SUsPerTable;
	BlockID = TableOffset / info->PUsPerBlock;
	BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
	BlockID %= info->BlocksPerTable;
	RepIndex = info->PUsPerBlock - TableID;
	if (!raidPtr->noRotate)
		BlockOffset += ((BlockOffset >= RepIndex) ? 1 : 0);
	*col = info->LayoutTable[BlockID][BlockOffset];

	/* remap to distributed spare space if indicated */
	if (remap) {
		RF_ASSERT(raidPtr->Disks[*col].status == rf_ds_reconstructing || raidPtr->Disks[*col].status == rf_ds_dist_spared ||
		    (rf_copyback_in_progress && raidPtr->Disks[*col].status == rf_ds_optimal));
		rf_remap_to_spare_space(layoutPtr, info, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
	} else {

		outSU = base_suid;
		outSU += FullTableID * fulltable_depth;	/* offs to strt of FT */
		outSU += SpareSpace;	/* skip rsvd spare space */
		outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;	/* offs to strt of tble */
		outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU;	/* offs to the PU */
	}
	outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);	/* offs to the SU within
										 * a PU */

	/* convert SUs to sectors, and, if not aligned to SU boundary, add in
	 * offset to sector.  */
	*diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);

	RF_ASSERT(*col != -1);
}
Beispiel #13
0
bool ast_iterable_range_end_get(const struct ast_node *n, int64_t *ret)
{
    RF_ASSERT(
        n->type == AST_ITERABLE && n->iterable.type == ITERABLE_RANGE,
        "Illegal ast node type. Expected a range iterable"
    );
    RF_ASSERT(n->iterable.range.end_node, "An end node should exist");

    if (ast_is_constant_integer(n->iterable.range.end_node)) {
        *ret = n->iterable.range.end;
        return true;
    }
    return false;
}
Beispiel #14
0
RF_DiskQueueData_t *
rf_ScanDequeue(void *qptr)
{
	RF_DiskQueueData_t *req = NULL;
	RF_Sstf_t *scanq;

	scanq = (RF_Sstf_t *) qptr;

#if RF_DEBUG_QUEUE
	if (rf_scanDebug) {
		RF_DiskQueue_t *dq;
		dq = (RF_DiskQueue_t *) req->queue;
		RF_ASSERT(QSUM(scanq) == dq->queueLength);
		printf("raid%d: scan: Dequeue %d queues are %d,%d,%d\n",
		       req->raidPtr->raidid, dq->col,
		       scanq->left.qlen, scanq->right.qlen, scanq->lopri.qlen);
	}
#endif
	if (scanq->left.queue == NULL) {
		RF_ASSERT(scanq->left.qlen == 0);
		if (scanq->right.queue == NULL) {
			RF_ASSERT(scanq->right.qlen == 0);
			if (scanq->lopri.queue == NULL) {
				RF_ASSERT(scanq->lopri.qlen == 0);
				return (NULL);
			}
			req = closest_to_arm(&scanq->lopri, scanq->last_sector,
			    &scanq->dir, scanq->allow_reverse);
			if (req == NULL)
				return (NULL);
			do_dequeue(&scanq->lopri, req);
		} else {
			scanq->dir = DIR_RIGHT;
			DO_HEAD_DEQ(req, &scanq->right);
		}
	} else
		if (scanq->right.queue == NULL) {
			RF_ASSERT(scanq->right.qlen == 0);
			RF_ASSERT(scanq->left.queue);
			scanq->dir = DIR_LEFT;
			DO_TAIL_DEQ(req, &scanq->left);
		} else {
			RF_ASSERT(scanq->right.queue);
			RF_ASSERT(scanq->left.queue);
			if (scanq->dir == DIR_RIGHT) {
				DO_HEAD_DEQ(req, &scanq->right);
			} else {
				DO_TAIL_DEQ(req, &scanq->left);
			}
		}
	RF_ASSERT(req);
	scanq->last_sector = req->sectorOffset;
	return (req);
}
Beispiel #15
0
RF_DiskQueueData_t *
rf_SstfPeek(void *qptr)
{
	RF_DiskQueueData_t *req;
	RF_Sstf_t *sstfq;

	sstfq = (RF_Sstf_t *) qptr;

	if ((sstfq->left.queue == NULL) && (sstfq->right.queue == NULL)) {
		req = closest_to_arm(&sstfq->lopri, sstfq->last_sector, &sstfq->dir,
		    sstfq->allow_reverse);
	} else {
		if (sstfq->left.queue == NULL)
			req = sstfq->right.queue;
		else {
			if (sstfq->right.queue == NULL)
				req = sstfq->left.queue;
			else {
				if (SNUM_DIFF(sstfq->last_sector, sstfq->right.queue->sectorOffset)
				    < SNUM_DIFF(sstfq->last_sector, sstfq->left.qtail->sectorOffset)) {
					req = sstfq->right.queue;
				} else {
					req = sstfq->left.qtail;
				}
			}
		}
	}
	if (req == NULL) {
		RF_ASSERT(QSUM(sstfq) == 0);
	}
	return (req);
}
Beispiel #16
0
/* Invoked when the copyback has completed. */
void
rf_CopybackComplete(RF_CopybackDesc_t *desc, int status)
{
	RF_Raid_t *raidPtr = desc->raidPtr;
	struct timeval t, diff;

	if (!status) {
		RF_LOCK_MUTEX(raidPtr->mutex);
		if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
			RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D');
			rf_FreeSpareTable(raidPtr);
		} else {
			raidPtr->Disks[desc->spRow][desc->spCol].status =
			    rf_ds_spare;
		}
		RF_UNLOCK_MUTEX(raidPtr->mutex);

		RF_GETTIME(t);
		RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
		printf("Copyback time was %d.%06d seconds.\n",
		    (int) diff.tv_sec, (int) diff.tv_usec);
	} else
		printf("COPYBACK: Failure.\n");

	RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU));
	rf_FreeMCPair(desc->mcpair);
	RF_Free(desc, sizeof(*desc));

	rf_copyback_in_progress = 0;
	rf_ResumeNewRequests(raidPtr);
}
/*******************************************************************************************
 * This degraded function allow only two case:
 *  1. when write access the full failed stripe unit, then the access can be more than
 *     one tripe units.
 *  2. when write access only part of the failed SU, we assume accesses of more than
 *     one stripe unit is not allowed so that the write can be dealt with like a
 *     large write.
 *  The following function is based on these assumptions. So except in the second case,
 *  it looks the same as a large write encodeing function. But this is not exactly the
 *  normal way for doing a degraded write, since raidframe have to break cases of access
 *  other than the above two into smaller accesses. We may have to change
 *  DegrESubroutin in the future.
 *******************************************************************************************/
void
rf_DegrESubroutine(RF_DagNode_t *node, char *ebuf)
{
	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
	RF_PhysDiskAddr_t *pda;
	int     i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
	RF_RowCol_t scol;
	char   *srcbuf, *destbuf;
	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
	RF_Etimer_t timer;

	RF_ETIMER_START(timer);
	for (i = 0; i < node->numParams - 2; i += 2) {
		RF_ASSERT(node->params[i + 1].p != ebuf);
		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
		scol = rf_EUCol(layoutPtr, pda->raidAddress);
		srcbuf = (char *) node->params[i + 1].p;
		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
	}

	RF_ETIMER_STOP(timer);
	RF_ETIMER_EVAL(timer);
	tracerec->q_us += RF_ETIMER_VAL_US(timer);
}
Beispiel #18
0
/* We sometimes need to promote a low priority access to a regular priority access.
 * Currently, this is only used when the user wants to write a stripe which is currently
 * under reconstruction.
 * This routine will promote all accesses tagged with the indicated parityStripeID from
 * the low priority queue to the end of the normal priority queue.
 * We assume the queue is locked upon entry.
 */
int
rf_FifoPromote(void *q_in, RF_StripeNum_t parityStripeID,
	       RF_ReconUnitNum_t which_ru)
{
	RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
	RF_DiskQueueData_t *lp = q->lq_head, *pt = NULL;	/* lp = lo-pri queue
								 * pointer, pt = trailer */
	int     retval = 0;

	while (lp) {

		/* search for the indicated parity stripe in the low-pri queue */
		if (lp->parityStripeID == parityStripeID && lp->which_ru == which_ru) {
			/* printf("FifoPromote:  promoting access for psid
			 * %ld\n",parityStripeID); */
			if (pt)
				pt->next = lp->next;	/* delete an entry other
							 * than the first */
			else
				q->lq_head = lp->next;	/* delete the head entry */

			if (!q->lq_head)
				q->lq_tail = NULL;	/* we deleted the only
							 * entry */
			else
				if (lp == q->lq_tail)
					q->lq_tail = pt;	/* we deleted the tail
								 * entry */

			lp->next = NULL;
			q->lq_count--;

			if (q->hq_tail) {
				q->hq_tail->next = lp;
				q->hq_tail = lp;
			}
			 /* append to hi-priority queue */
			else {
				q->hq_head = q->hq_tail = lp;
			}
			q->hq_count++;

			/* UpdateShortestSeekFinishTimeForced(lp->requestPtr,
			 * lp->diskState); *//* deal with this later, if ever */

			lp = (pt) ? pt->next : q->lq_head;	/* reset low-pri pointer
								 * and continue */
			retval++;

		} else {
			pt = lp;
			lp = lp->next;
		}
	}

	/* sanity check.  delete this if you ever put more than one entry in
	 * the low-pri queue */
	RF_ASSERT(retval == 0 || retval == 1);
	return (retval);
}
Beispiel #19
0
void
rf_ReintLog(RF_Raid_t *raidPtr, int regionID, RF_ParityLog_t *log)
{
	RF_ASSERT(log);

	/*
	 * Insert an in-core parity log (log) into the disk queue of
	 * reintegration work. Set the flag (reintInProgress) for the
	 * specified region (regionID) to indicate that reintegration is in
	 * progress for this region. NON-BLOCKING
	 */

	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
	/* Cleared when reint complete. */
	raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;

	if (rf_parityLogDebug)
		printf("[requesting reintegration of region %d]\n",
		    log->regionID);
	/* Move record to reintegration queue. */
	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
	log->next = raidPtr->parityLogDiskQueue.reintQueue;
	raidPtr->parityLogDiskQueue.reintQueue = log;
	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
}
Beispiel #20
0
static void
do_sstf_ord_q(RF_DiskQueueData_t **queuep, RF_DiskQueueData_t **tailp, RF_DiskQueueData_t *req)
{
	RF_DiskQueueData_t *r, *s;

	if (*queuep == NULL) {
		*queuep = req;
		*tailp = req;
		req->next = NULL;
		req->prev = NULL;
		return;
	}
	if (req->sectorOffset <= (*queuep)->sectorOffset) {
		req->next = *queuep;
		req->prev = NULL;
		(*queuep)->prev = req;
		*queuep = req;
		return;
	}
	if (req->sectorOffset > (*tailp)->sectorOffset) {
		/* optimization */
		r = NULL;
		s = *tailp;
		goto q_at_end;
	}
	for (s = NULL, r = *queuep; r; s = r, r = r->next) {
		if (r->sectorOffset >= req->sectorOffset) {
			/* insert after s, before r */
			RF_ASSERT(s);
			req->next = r;
			r->prev = req;
			s->next = req;
			req->prev = s;
			return;
		}
	}
q_at_end:
	/* insert after s, at end of queue */
	RF_ASSERT(r == NULL);
	RF_ASSERT(s);
	RF_ASSERT(s == (*tailp));
	req->next = NULL;
	req->prev = s;
	s->next = req;
	*tailp = req;
}
Beispiel #21
0
const struct RFstring *type_callable_category_str(const struct type *t)
{
    RF_ASSERT(type_is_callable(t), "Non callable type detected");
    if (type_is_function(t)) {
        return &s_function_;
    }
    return &s_ctor_;
}
Beispiel #22
0
void
rf_memory_hash_insert(void *addr, int size, int line, char *filen)
{
	unsigned long bucket = HASHADDR(addr);
	struct mh_struct *p;

	RF_ASSERT(mh_table_initialized);

	/* Search for this address in the hash table. */
	for (p = mh_table[bucket]; p && (p->address != addr); p = p->next);
	if (!p) {
		RF_Malloc(p, sizeof(struct mh_struct), (struct mh_struct *));
		RF_ASSERT(p);
		p->next = mh_table[bucket];
		mh_table[bucket] = p;
		p->address = addr;
		p->allocated = 0;
	}
Beispiel #23
0
const struct RFstring *ast_identifier_str(const struct ast_node *n)
{
    RF_ASSERT(n->type == AST_IDENTIFIER || n->type == AST_XIDENTIFIER,
              "Unexpected ast node type");
    if (n->type == AST_IDENTIFIER) {
        return &n->identifier.string;
    }
    return ast_xidentifier_str(n);
}
Beispiel #24
0
void
rf_FlushLog(RF_Raid_t *raidPtr, RF_ParityLog_t *log)
{
	/*
	 * Insert a core log (log) into a list of logs
	 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
	 * NON-BLOCKING
	 */

	RF_ASSERT(log);
	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
	RF_ASSERT(log->next == NULL);
	/* Move log to flush queue. */
	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
	log->next = raidPtr->parityLogDiskQueue.flushQueue;
	raidPtr->parityLogDiskQueue.flushQueue = log;
	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
}
Beispiel #25
0
/*****************************************************************************
 *
 * Ensure that all node->dagHdr fields in a dag are consistent.
 *
 * IMPORTANT: This routine recursively searches all succedents of the node.
 * If a succedent is encountered whose dagHdr ptr does not require adjusting,
 * that node's succedents WILL NOT BE EXAMINED.
 *
 *****************************************************************************/
void
rf_UpdateNodeHdrPtr(RF_DagHeader_t *hdr, RF_DagNode_t *node)
{
	int i;
	RF_ASSERT(hdr != NULL && node != NULL);
	for (i = 0; i < node->numSuccedents; i++)
		if (node->succedents[i]->dagHdr != hdr)
			rf_UpdateNodeHdrPtr(hdr, node->succedents[i]);
	node->dagHdr = hdr;
}
Beispiel #26
0
RF_ParityLogData_t *
rf_DequeueParityLogData(RF_Raid_t *raidPtr, RF_ParityLogData_t **head,
    RF_ParityLogData_t **tail, int ignoreLocks)
{
	RF_ParityLogData_t *data;

	/*
	 * Remove and return an in-core parity log from the tail of a disk
	 * queue (*head, *tail). NON-BLOCKING
	 */

	/* Remove from tail, preserving FIFO order. */
	if (!ignoreLocks)
		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
	data = *tail;
	if (data) {
		if (*head == *tail) {
			/* Removing last item from queue. */
			*head = NULL;
			*tail = NULL;
		} else {
			*tail = (*tail)->prev;
			(*tail)->next = NULL;
			RF_ASSERT((*head)->prev == NULL);
			RF_ASSERT((*tail)->next == NULL);
		}
		data->next = NULL;
		data->prev = NULL;
		if (rf_parityLogDebug)
			printf("[dequeueing parity log data, region %d,"
			    " raidAddress %d, numSector %d]\n", data->regionID,
			    (int) data->diskAddress.raidAddress,
			    (int) data->diskAddress.numSector);
	}
	if (*head) {
		RF_ASSERT((*head)->prev == NULL);
		RF_ASSERT((*tail)->next == NULL);
	}
	if (!ignoreLocks)
		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
	return (data);
}
Beispiel #27
0
/*
 * We expect the lists to have at most one or two elements, so we're willing
 * to search for the end. If you ever observe the lists growing longer,
 * increase POINTERS_PER_ALLOC_LIST_ELEMENT.
 */
void
rf_real_AddToAllocList(RF_AllocListElem_t *l, void *p, int size, int lockflag)
{
	RF_AllocListElem_t *newelem;

	for (; l->next; l = l->next)
		RF_ASSERT(l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT);	/* Find end of list. */

	RF_ASSERT(l->numPointers >= 0 &&
	    l->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT);
	if (l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT) {
		newelem = rf_real_MakeAllocList(lockflag);
		l->next = newelem;
		l = newelem;
	}
	l->pointers[l->numPointers] = p;
	l->sizes[l->numPointers] = size;
	l->numPointers++;

}
Beispiel #28
0
/* user context:
 * Attempt to fire each node in a linked list.
 * The entire list is fired atomically.
 */
static void
FireNodeList(RF_DagNode_t *nodeList)
{
	RF_DagNode_t *node, *next;
	RF_DagStatus_t dstat;
	int     j;

	if (nodeList) {
		/* first, mark all nodes which are ready to be fired */
		for (node = nodeList; node; node = next) {
			next = node->next;
			dstat = node->dagHdr->status;
			RF_ASSERT((node->status == rf_wait) ||
				  (node->status == rf_good));
			if (NodeReady(node)) {
				if ((dstat == rf_enable) ||
				    (dstat == rf_rollForward)) {
					RF_ASSERT(node->status == rf_wait);
					if (node->commitNode)
						node->dagHdr->numCommits++;
					node->status = rf_fired;
					for (j = 0; j < node->numAntecedents; j++)
						node->antecedents[j]->numSuccFired++;
				} else {
					RF_ASSERT(dstat == rf_rollBackward);
					RF_ASSERT(node->status == rf_good);
					/* only one commit node per graph */
					RF_ASSERT(node->commitNode == RF_FALSE);
					node->status = rf_recover;
				}
			}
		}
		/* now, fire the nodes */
		for (node = nodeList; node; node = next) {
			next = node->next;
			if ((node->status == rf_fired) ||
			    (node->status == rf_recover))
				FireNode(node);
		}
	}
}
Beispiel #29
0
/*
 * The access has an array of dagLists, one dagList per parity stripe.
 * Fire the first DAG in each parity stripe (dagList).
 * DAGs within a stripe (dagList) must be executed sequentially.
 *  - This preserves atomic parity update.
 * DAGs for independents parity groups (stripes) are fired concurrently.
 */
int
rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc)
{
	int i;
	RF_DagHeader_t *dag_h;
	RF_DagList_t *dagArray = desc->dagArray;

	/*
	 * Next state is always rf_State_ProcessDAG. Important to do this
	 * before firing the first dag (it may finish before we leave this
	 * routine).
	 */
	desc->state++;

	/*
	 * Sweep dag array, a stripe at a time, firing the first dag in each
	 * stripe.
	 */
	for (i = 0; i < desc->numStripes; i++) {
		RF_ASSERT(dagArray[i].numDags > 0);
		RF_ASSERT(dagArray[i].numDagsDone == 0);
		RF_ASSERT(dagArray[i].numDagsFired == 0);
		RF_ETIMER_START(dagArray[i].tracerec.timer);
		/* Fire first dag in this stripe. */
		dag_h = dagArray[i].dags;
		RF_ASSERT(dag_h);
		dagArray[i].numDagsFired++;
		/*
		 * XXX Yet another case where we pass in a conflicting
		 * function pointer :-(  XXX  GO
		 */
		rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess,
		    &dagArray[i]);
	}

	/*
	 * The DAG will always call the callback, even if there was no
	 * blocking, so we are always suspended in this state.
	 */
	return RF_TRUE;
}
Beispiel #30
0
/* enqueues a reconstruction event on the indicated queue */
void
rf_CauseReconEvent(RF_Raid_t *raidPtr, RF_RowCol_t col, void *arg,
		   RF_Revent_t type)
{
	RF_ReconCtrl_t *rctrl = raidPtr->reconControl;
	RF_ReconEvent_t *event = GetReconEventDesc(col, arg, type);

	if (type == RF_REVENT_BUFCLEAR) {
		RF_ASSERT(col != rctrl->fcol);
	}
	RF_ASSERT(col >= 0 && col <= raidPtr->numCol);
	RF_LOCK_MUTEX(rctrl->eq_mutex);
	/* q null and count==0 must be equivalent conditions */
	RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0));
	event->next = rctrl->eventQueue;
	rctrl->eventQueue = event;
	rctrl->eq_count++;
	RF_UNLOCK_MUTEX(rctrl->eq_mutex);

	wakeup(&(rctrl)->eventQueue);
}