Exemple #1
0
void
viommu_dvmamap_sync(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
    bus_addr_t offset, bus_size_t len, int ops)
{
#ifdef DIAGNOSTIC
	struct iommu_map_state *ims = map->_dm_cookie;

	if (ims == NULL)
		panic("viommu_dvmamap_sync: null map state");
	if (ims->ims_iommu == NULL)
		panic("viommu_dvmamap_sync: null iommu");
#endif
	if (len == 0)
		return;

	if (ops & BUS_DMASYNC_PREWRITE)
		membar(MemIssue);

#if 0
	if (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_PREWRITE))
		_viommu_dvmamap_sync(t, t0, map, offset, len, ops);
#endif

	if (ops & BUS_DMASYNC_POSTREAD)
		membar(MemIssue);
}
Exemple #2
0
/*
 * CPU-specific initialization for Fujitsu Zeus CPUs
 */
void
zeus_init(u_int cpu_impl)
{
	u_long val;

	/* Ensure the TSB Extension Registers hold 0 as TSB_Base. */

	stxa(AA_DMMU_TSB_PEXT_REG, ASI_DMMU, 0);
	stxa(AA_IMMU_TSB_PEXT_REG, ASI_IMMU, 0);
	membar(Sync);

	stxa(AA_DMMU_TSB_SEXT_REG, ASI_DMMU, 0);
	/*
	 * NB: the secondary context was removed from the iMMU.
	 */
	membar(Sync);

	stxa(AA_DMMU_TSB_NEXT_REG, ASI_DMMU, 0);
	stxa(AA_IMMU_TSB_NEXT_REG, ASI_IMMU, 0);
	membar(Sync);

	val = ldxa(AA_MCNTL, ASI_MCNTL);
	/* Ensure MCNTL_JPS1_TSBP is 0. */
	val &= ~MCNTL_JPS1_TSBP;
	/*
	 * Ensure 4-Mbyte page entries are stored in the 1024-entry, 2-way set
	 * associative TLB.
	 */
	val = (val & ~MCNTL_RMD_MASK) | MCNTL_RMD_1024;
	stxa(AA_MCNTL, ASI_MCNTL, val);
}
Exemple #3
0
static void
ap_start(phandle_t node, u_int mid, u_int cpu_impl)
{
	volatile struct cpu_start_args *csa;
	struct pcpu *pc;
	register_t s;
	vm_offset_t va;
	u_int cpuid;
	uint32_t clock;

	if (cpuids > mp_maxid)
		return;

	if (OF_getprop(node, "clock-frequency", &clock, sizeof(clock)) <= 0)
		panic("%s: couldn't determine CPU frequency", __func__);
	if (clock != PCPU_GET(clock))
		tick_et_use_stick = 1;

	csa = &cpu_start_args;
	csa->csa_state = 0;
	sun4u_startcpu(node, (void *)mp_tramp, 0);
	s = intr_disable();
	while (csa->csa_state != CPU_TICKSYNC)
		;
	membar(StoreLoad);
	csa->csa_tick = rd(tick);
	if (cpu_impl == CPU_IMPL_SPARC64V ||
	    cpu_impl >= CPU_IMPL_ULTRASPARCIII) {
		while (csa->csa_state != CPU_STICKSYNC)
			;
		membar(StoreLoad);
		csa->csa_stick = rdstick();
	}
	while (csa->csa_state != CPU_INIT)
		;
	csa->csa_tick = csa->csa_stick = 0;
	intr_restore(s);

	cpuid = cpuids++;
	cpuid_to_mid[cpuid] = mid;
	cpu_identify(csa->csa_ver, clock, cpuid);

	va = kmem_malloc(kernel_arena, PCPU_PAGES * PAGE_SIZE,
	    M_WAITOK | M_ZERO);
	pc = (struct pcpu *)(va + (PCPU_PAGES * PAGE_SIZE)) - 1;
	pcpu_init(pc, cpuid, sizeof(*pc));
	dpcpu_init((void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
	    M_WAITOK | M_ZERO), cpuid);
	pc->pc_addr = va;
	pc->pc_clock = clock;
	pc->pc_impl = cpu_impl;
	pc->pc_mid = mid;
	pc->pc_node = node;

	cache_init(pc);

	CPU_SET(cpuid, &all_cpus);
	intr_add_cpu(cpuid);
}
Exemple #4
0
void
tlb_page_demap(struct pmap *pm, vm_offset_t va)
{
	u_long flags;
	void *cookie;
	u_long s;

	critical_enter();
	cookie = ipi_tlb_page_demap(pm, va);
	if (pm->pm_active & PCPU_GET(cpumask)) {
		KASSERT(pm->pm_context[PCPU_GET(cpuid)] != -1,
		    ("tlb_page_demap: inactive pmap?"));
		if (pm == kernel_pmap)
			flags = TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE;
		else
			flags = TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE;
	
		s = intr_disable();
		stxa(TLB_DEMAP_VA(va) | flags, ASI_DMMU_DEMAP, 0);
		stxa(TLB_DEMAP_VA(va) | flags, ASI_IMMU_DEMAP, 0);
		membar(Sync);
		intr_restore(s);
	}
	ipi_wait(cookie);
	critical_exit();
}
Exemple #5
0
void
tlb_context_demap(struct pmap *pm)
{
	void *cookie;
	u_long s;

	/*
	 * It is important that we are not interrupted or preempted while
	 * doing the IPIs. The interrupted CPU may hold locks, and since
	 * it will wait for the CPU that sent the IPI, this can lead
	 * to a deadlock when an interrupt comes in on that CPU and it's
	 * handler tries to grab one of that locks. This will only happen for
	 * spin locks, but these IPI types are delivered even if normal
	 * interrupts are disabled, so the lock critical section will not
	 * protect the target processor from entering the IPI handler with
	 * the lock held.
	 */
	critical_enter();
	cookie = ipi_tlb_context_demap(pm);
	if (pm->pm_active & PCPU_GET(cpumask)) {
		KASSERT(pm->pm_context[PCPU_GET(cpuid)] != -1,
		    ("tlb_context_demap: inactive pmap?"));
		s = intr_disable();
		stxa(TLB_DEMAP_PRIMARY | TLB_DEMAP_CONTEXT, ASI_DMMU_DEMAP, 0);
		stxa(TLB_DEMAP_PRIMARY | TLB_DEMAP_CONTEXT, ASI_IMMU_DEMAP, 0);
		membar(Sync);
		intr_restore(s);
	}
	ipi_wait(cookie);
	critical_exit();
}
void LIR_Assembler::emit_op0(LIR_Op0* op) {
  switch (op->code()) {
    case lir_word_align: {
      while (code_offset() % BytesPerWord != 0) {
        _masm->nop();
      }
      break;
    }

    case lir_nop:
      assert(op->info() == NULL, "not supported");
      _masm->nop();
      break;

    case lir_label:
      Unimplemented();
      break;

    case lir_build_frame:
      build_frame();
      break;

    case lir_std_entry:
      _masm->align(CodeEntryAlignment);
_masm->set_code_start();
      _masm->entry(_compilation->codeprofile());
      build_frame();
      break;

    case lir_osr_entry:
      Unimplemented();
      //osr_entry();
      break;

    case lir_breakpoint:
      breakpoint();
      break;

    case lir_membar:
      membar();
      break;

    case lir_membar_acquire:
      membar_acquire();
      break;

    case lir_membar_release:
      membar_release();
      break;

    case lir_get_thread:
      get_thread(op->result_opr());
      break;

    default: 
      ShouldNotReachHere();
      break;
  }
}
Exemple #7
0
void acquire(lock_queue_t *L, qnode_t *I) {
  assert(I);

  // set this node's status to waiting and get predecessor
  I->next = NULL;
  I->locked = 1;
  membar();

  qnode_t *predecessor = __sync_lock_test_and_set(L, I); // note: this works like a fetch_and_set

  if (predecessor) {
    predecessor->next = I; // set self to next
    while (I->locked); //spin
  }

  membar();
}
Exemple #8
0
/** Process hardware interrupt.
 *
 * @param n Ignored.
 * @param istate Ignored.
 */
void interrupt(unsigned int n, istate_t *istate)
{
	uint64_t status = asi_u64_read(ASI_INTR_DISPATCH_STATUS, 0);
	if (status & (!INTR_DISPATCH_STATUS_BUSY))
		panic("Interrupt Dispatch Status busy bit not set\n");
	
	uint64_t intrcv = asi_u64_read(ASI_INTR_RECEIVE, 0);
#if defined (US)
	uint64_t data0 = asi_u64_read(ASI_INTR_R, ASI_UDB_INTR_R_DATA_0);
#elif defined (US3)
	uint64_t data0 = asi_u64_read(ASI_INTR_R, VA_INTR_R_DATA_0);
#endif
	
	irq_t *irq = irq_dispatch_and_lock(data0);
	if (irq) {
		/*
		 * The IRQ handler was found.
		 */
		irq->handler(irq);
		
		/*
		 * See if there is a clear-interrupt-routine and call it.
		 */
		if (irq->cir)
			irq->cir(irq->cir_arg, irq->inr);
		
		irq_spinlock_unlock(&irq->lock, false);
	} else if (data0 > config.base) {
		/*
		 * This is a cross-call.
		 * data0 contains address of the kernel function.
		 * We call the function only after we verify
		 * it is one of the supported ones.
		 */
#ifdef CONFIG_SMP
		if (data0 == (uintptr_t) tlb_shootdown_ipi_recv)
			tlb_shootdown_ipi_recv();
#endif
	} else {
		/*
		 * Spurious interrupt.
		 */
#ifdef CONFIG_DEBUG
		log(LF_ARCH, LVL_DEBUG,
		    "cpu%u: spurious interrupt (intrcv=%#" PRIx64 ", data0=%#"
		    PRIx64 ")", CPU->id, intrcv, data0);
#else
		(void) intrcv;
#endif
	}
	
	membar();
	asi_u64_write(ASI_INTR_RECEIVE, 0, 0);
}
Exemple #9
0
void release(lock_queue_t *L, qnode_t *I) {
  membar();

  if (I->next == NULL) { // no known successor
    if (__sync_bool_compare_and_swap(L, I, NULL)) {// are we still head?
      return;
    }

    while (I->next == NULL); // wait for new node to change next
  }

  I->next->locked = 0; // unlock the new node
}
Exemple #10
0
void
ofw_pci_dmamap_sync_stst_order_common(void)
{
    static u_char buf[VIS_BLOCKSIZE] __aligned(VIS_BLOCKSIZE);
    register_t reg, s;

    s = intr_disable();
    reg = rd(fprs);
    wr(fprs, reg | FPRS_FEF, 0);
    __asm __volatile("stda %%f0, [%0] %1"
                     : : "r" (buf), "n" (ASI_BLK_COMMIT_S));
    membar(Sync);
    wr(fprs, reg, 0);
    intr_restore(s);
}
Exemple #11
0
/*
 * Common function for DMA map synchronization.  May be called
 * by bus-specific DMA map synchronization functions.
 */
static void
nexus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
{

	/*
	 * We sync out our caches, but the bus must do the same.
	 *
	 * Actually a #Sync is expensive.  We should optimize.
	 */
	if ((op & BUS_DMASYNC_PREREAD) || (op & BUS_DMASYNC_PREWRITE)) {
		/*
		 * Don't really need to do anything, but flush any pending
		 * writes anyway.
		 */
		membar(Sync);
	}
	if (op & BUS_DMASYNC_POSTWRITE) {
		/* Nothing to do.  Handled by the bus controller. */
	}
}
Exemple #12
0
static void
nexus_bus_barrier(bus_space_tag_t t, bus_space_handle_t h, bus_size_t offset,
    bus_size_t size, int flags)
{

	/*
	 * We have lots of alternatives depending on whether we're
	 * synchronizing loads with loads, loads with stores, stores
	 * with loads, or stores with stores.  The only ones that seem
	 * generic are #Sync and #MemIssue.  I'll use #Sync for safety.
	 */
	switch(flags) {
	case BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE:
	case BUS_SPACE_BARRIER_READ:
	case BUS_SPACE_BARRIER_WRITE:
		membar(Sync);
		break;
	default:
		panic("%s: unknown flags", __func__);
	}
	return;
}
int main(int argc, char** argv)
{
	ATOMIC_TYPE var;
	VALUE_TYPE r;
	
	ATOMIC_TYPE* v;
	
	v=&var;
	
	
#ifdef MEMBAR_USES_LOCK
	__membar_lock=&dummy_membar_lock;
	if (lock_init(__membar_lock)==0){
		fprintf(stderr, "ERROR: failed to initialize membar_lock\n");
		__membar_lock=0;
		goto error;
	}
	_membar_lock; /* start with the lock "taken" so that we can safely use
					 unlock/lock sequences on it later */
#endif
#ifdef ATOMIC_OPS_USE_LOCK
	/* init the lock (emulate atomic_ops.c) */
	_atomic_lock=&dummy_atomic_lock;
	if (lock_init(_atomic_lock)==0){
		fprintf(stderr, "ERROR: failed to initialize atomic_lock\n");
		_atomic_lock=0;
		goto error;
	}
#endif
	
	printf("%s\n", flags);
	
	printf("starting memory barrier opcode tests...\n");
	membar();
	printf(" membar() .............................. ok\n");
	membar_write();
	printf(" membar_write() ........................ ok\n");
	membar_read();
	printf(" membar_read() ......................... ok\n");
	
	printf("\nstarting atomic ops basic tests...\n");
	
	VERIFY(at_set(v, 1), 1);
	printf(" atomic_set, v should be 1 ............. %2d\n", (int)at_get(v));
	VERIFY(at_inc(v), 2);
	printf(" atomic_inc, v should be 2 ............. %2d\n", (int)at_get(v));
	VERIFY(r=at_inc_and_test(v), 3);
	printf(" atomic_inc_and_test, v should be  3 ... %2d\n", (int)at_get(v));
	printf("                      r should be  0 ... %2d\n", (int)r);
	
	VERIFY(at_dec(v), 2);
	printf(" atomic_dec, v should be 2 ............. %2d\n", (int)at_get(v));
	VERIFY(r=at_dec_and_test(v), 1);
	printf(" atomic_dec_and_test, v should be  1 ... %2d\n", (int)at_get(v));
	printf("                      r should be  0 ... %2d\n", (int)r);
	VERIFY(r=at_dec_and_test(v), 0);
	printf(" atomic_dec_and_test, v should be  0 ... %2d\n", (int)at_get(v));
	printf("                      r should be  1 ... %2d\n", (int)r);
	VERIFY(r=at_dec_and_test(v), -1);
	printf(" atomic_dec_and_test, v should be -1 ... %2d\n", (int)at_get(v));
	printf("                      r should be  0 ... %2d\n", (int)r);
	
	VERIFY(at_and(v, 2), 2);
	printf(" atomic_and, v should be 2 ............. %2d\n", (int)at_get(v));
	
	VERIFY(at_or(v, 5), 7);
	VERIFY(r=at_get_and_set(v, 0), 0);
	printf(" atomic_or,  v should be 7 ............. %2d\n", (int)r);
	printf(" atomic_get_and_set, v should be 0 ..... %2d\n", (int)at_get(v));

	
	printf("\ndone.\n");
#ifdef MEMBAR_USES_LOCK
	lock_destroy(__membar_lock);
#endif
#ifdef ATOMIC_OPS_USE_LOCK
	lock_destroy(_atomic_lock);
#endif
	return 0;
error:
#ifdef MEMBAR_USES_LOCK
	if (__membar_lock)
		lock_destroy(__membar_lock);
#endif
#ifdef ATOMIC_OPS_USE_LOCK
	if (_atomic_lock)
		lock_destroy(_atomic_lock);
#endif
	return -1;
}
Exemple #14
0
int main(int argc, char** argv)
{
	int r;
	atomic_t v;
#ifdef ATOMIC_OPS_USE_LOCK
	/* init the lock (emulate atomic_ops.c) */
	_atomic_lock=&dummy_lock;
	if (lock_init(_atomic_lock)==0){
		fprintf(stderr, "ERROR: failed to initialize the lock\n");
		goto error;
	}
#endif
	
#ifdef NOSMP
	printf("no-smp mode\n");
#else
	printf("smp mode\n");
#endif
	
	printf("starting memory barrier opcode tests...\n");
	membar();
	printf(" membar() .............................. ok\n");
	membar_write();
	printf(" membar_write() ........................ ok\n");
	membar_read();
	printf(" membar_read() ......................... ok\n");
	
	printf("\nstarting atomic ops basic tests...\n");
	
	mb_atomic_set(&v, 1);
	printf(" atomic_set, v should be 1 ............. %2d\n", mb_atomic_get(&v));
	mb_atomic_inc(&v);
	printf(" atomic_inc, v should be 2 ............. %2d\n", mb_atomic_get(&v));
	r=mb_atomic_inc_and_test(&v);
	printf(" atomic_inc_and_test, v should be  3 ... %2d\n", mb_atomic_get(&v));
	printf("                      r should be  0 ... %2d\n", r);
	
	mb_atomic_dec(&v);
	printf(" atomic_dec, v should be 2 ............. %2d\n", mb_atomic_get(&v));
	r=mb_atomic_dec_and_test(&v);
	printf(" atomic_dec_and_test, v should be  1 ... %2d\n", mb_atomic_get(&v));
	printf("                      r should be  0 ... %2d\n", r);
	r=mb_atomic_dec_and_test(&v);
	printf(" atomic_dec_and_test, v should be  0 ... %2d\n", mb_atomic_get(&v));
	printf("                      r should be  1 ... %2d\n", r);
	r=mb_atomic_dec_and_test(&v);
	printf(" atomic_dec_and_test, v should be -1 ... %2d\n", mb_atomic_get(&v));
	printf("                      r should be  0 ... %2d\n", r);
	
	mb_atomic_and(&v, 2);
	printf(" atomic_and, v should be 2 ............. %2d\n", mb_atomic_get(&v));
	
	mb_atomic_or(&v, 5);
	r=mb_atomic_get_and_set(&v, 0);
	printf(" atomic_or,  v should be 7 ............. %2d\n", r);
	printf(" atomic_get_and_set, v should be 0 ..... %2d\n", mb_atomic_get(&v));

	
	printf("\ndone.\n");
#ifdef ATOMIC_OPS_USE_LOCK
	lock_destroy(_atomic_lock);
#endif
	return 0;
#ifdef ATOMIC_OPS_USE_LOCK
error:
	return -1;
#endif
}
void LIR_Assembler::emit_op0(LIR_Op0* op) {
    switch (op->code()) {
    case lir_word_align: {
        _masm->align(BytesPerWord);
        break;
    }

    case lir_nop:
        assert(op->info() == NULL, "not supported");
        _masm->nop();
        break;

    case lir_label:
        Unimplemented();
        break;

    case lir_build_frame:
        build_frame();
        break;

    case lir_std_entry:
        // init offsets
        offsets()->set_value(CodeOffsets::OSR_Entry, _masm->offset());
        _masm->align(CodeEntryAlignment);
        if (needs_icache(compilation()->method())) {
            check_icache();
        }
        offsets()->set_value(CodeOffsets::Verified_Entry, _masm->offset());
        _masm->verified_entry();
        build_frame();
        offsets()->set_value(CodeOffsets::Frame_Complete, _masm->offset());
        break;

    case lir_osr_entry:
        offsets()->set_value(CodeOffsets::OSR_Entry, _masm->offset());
        osr_entry();
        break;

    case lir_24bit_FPU:
        set_24bit_FPU();
        break;

    case lir_reset_FPU:
        reset_FPU();
        break;

    case lir_breakpoint:
        breakpoint();
        break;

    case lir_fpop_raw:
        fpop();
        break;

    case lir_membar:
        membar();
        break;

    case lir_membar_acquire:
        membar_acquire();
        break;

    case lir_membar_release:
        membar_release();
        break;

    case lir_membar_loadload:
        membar_loadload();
        break;

    case lir_membar_storestore:
        membar_storestore();
        break;

    case lir_membar_loadstore:
        membar_loadstore();
        break;

    case lir_membar_storeload:
        membar_storeload();
        break;

    case lir_get_thread:
        get_thread(op->result_opr());
        break;

    default:
        ShouldNotReachHere();
        break;
    }
}
Exemple #16
0
void
vdsk_scsi_cmd(struct scsi_xfer *xs)
{
	struct scsi_rw *rw;
	struct scsi_rw_big *rwb;
	struct scsi_rw_12 *rw12;
	struct scsi_rw_16 *rw16;
	u_int64_t lba;
	u_int32_t sector_count;
	uint8_t operation;

	switch (xs->cmd->opcode) {
	case READ_BIG:
	case READ_COMMAND:
	case READ_12:
	case READ_16:
		operation = VD_OP_BREAD;
		break;
	case WRITE_BIG:
	case WRITE_COMMAND:
	case WRITE_12:
	case WRITE_16:
		operation = VD_OP_BWRITE;
		break;

	case SYNCHRONIZE_CACHE:
		operation = VD_OP_FLUSH;
		break;

	case INQUIRY:
		vdsk_scsi_inq(xs);
		return;
	case READ_CAPACITY:
		vdsk_scsi_capacity(xs);
		return;
	case READ_CAPACITY_16:
		vdsk_scsi_capacity16(xs);
		return;

	case TEST_UNIT_READY:
	case START_STOP:
	case PREVENT_ALLOW:
		vdsk_scsi_done(xs, XS_NOERROR);
		return;

	default:
		printf("%s cmd 0x%02x\n", __func__, xs->cmd->opcode);
	case MODE_SENSE:
	case MODE_SENSE_BIG:
	case REPORT_LUNS:
	case READ_TOC:
		vdsk_scsi_done(xs, XS_DRIVER_STUFFUP);
		return;
	}

	/*
	 * READ/WRITE/SYNCHRONIZE commands. SYNCHRONIZE CACHE has same
	 * layout as 10-byte READ/WRITE commands.
	 */
	if (xs->cmdlen == 6) {
		rw = (struct scsi_rw *)xs->cmd;
		lba = _3btol(rw->addr) & (SRW_TOPADDR << 16 | 0xffff);
		sector_count = rw->length ? rw->length : 0x100;
	} else if (xs->cmdlen == 10) {
		rwb = (struct scsi_rw_big *)xs->cmd;
		lba = _4btol(rwb->addr);
		sector_count = _2btol(rwb->length);
	} else if (xs->cmdlen == 12) {
		rw12 = (struct scsi_rw_12 *)xs->cmd;
		lba = _4btol(rw12->addr);
		sector_count = _4btol(rw12->length);
	} else if (xs->cmdlen == 16) {
		rw16 = (struct scsi_rw_16 *)xs->cmd;
		lba = _8btol(rw16->addr);
		sector_count = _4btol(rw16->length);
	}

{
	struct vdsk_softc *sc = xs->sc_link->adapter_softc;
	struct ldc_map *map = sc->sc_lm;
	struct vio_dring_msg dm;
	vaddr_t va;
	paddr_t pa;
	psize_t nbytes;
	int len, ncookies;
	int desc, s;
	int timeout;

	s = splbio();
	desc = sc->sc_tx_prod;

	ncookies = 0;
	len = xs->datalen;
	va = (vaddr_t)xs->data;
	while (len > 0) {
		KASSERT(ncookies < MAXPHYS / PAGE_SIZE);
		pmap_extract(pmap_kernel(), va, &pa);
		while (map->lm_slot[map->lm_next].entry != 0) {
			map->lm_next++;
			map->lm_next &= (map->lm_nentries - 1);
		}
		map->lm_slot[map->lm_next].entry = (pa & LDC_MTE_RA_MASK);
		map->lm_slot[map->lm_next].entry |= LDC_MTE_CPR | LDC_MTE_CPW;
		map->lm_slot[map->lm_next].entry |= LDC_MTE_IOR | LDC_MTE_IOW;
		map->lm_slot[map->lm_next].entry |= LDC_MTE_R | LDC_MTE_W;
		map->lm_count++;

		nbytes = MIN(len, PAGE_SIZE - (pa & PAGE_MASK));

		sc->sc_vd->vd_desc[desc].cookie[ncookies].addr =
		    map->lm_next << PAGE_SHIFT | (pa & PAGE_MASK);
		sc->sc_vd->vd_desc[desc].cookie[ncookies].size = nbytes;

		sc->sc_vsd[desc].vsd_map_idx[ncookies] = map->lm_next;
		va += nbytes;
		len -= nbytes;
		ncookies++;
	}

	sc->sc_vd->vd_desc[desc].hdr.ack = 1;
	sc->sc_vd->vd_desc[desc].operation = operation;
	sc->sc_vd->vd_desc[desc].slice = VD_SLICE_NONE;
	sc->sc_vd->vd_desc[desc].status = 0xffffffff;
	sc->sc_vd->vd_desc[desc].offset = lba;
	sc->sc_vd->vd_desc[desc].size = xs->datalen;
	sc->sc_vd->vd_desc[desc].ncookies = ncookies;
	membar(Sync);
	sc->sc_vd->vd_desc[desc].hdr.dstate = VIO_DESC_READY;

	sc->sc_vsd[desc].vsd_xs = xs;
	sc->sc_vsd[desc].vsd_ncookies = ncookies;

	sc->sc_tx_prod++;
	sc->sc_tx_prod &= (sc->sc_vd->vd_nentries - 1);

	bzero(&dm, sizeof(dm));
	dm.tag.type = VIO_TYPE_DATA;
	dm.tag.stype = VIO_SUBTYPE_INFO;
	dm.tag.stype_env = VIO_DRING_DATA;
	dm.tag.sid = sc->sc_local_sid;
	dm.seq_no = sc->sc_seq_no++;
	dm.dring_ident = sc->sc_dring_ident;
	dm.start_idx = dm.end_idx = desc;
	vdsk_sendmsg(sc, &dm, sizeof(dm));

	if (!ISSET(xs->flags, SCSI_POLL)) {
		splx(s);
		return;
	}

	timeout = 1000;
	do {
		if (vdsk_rx_intr(sc) &&
		    sc->sc_vd->vd_desc[desc].status == VIO_DESC_FREE)
			break;

		delay(1000);
	} while(--timeout > 0);
	splx(s);
}
}
Exemple #17
0
/*
 * CPU-specific initialization - this is used for both the Sun Cheetah and
 * later as well as the Fujitsu Zeus and later CPUs.
 */
void
cheetah_init(u_int cpu_impl)
{
	u_long val;

	/* Ensure the TSB Extension Registers hold 0 as TSB_Base. */

	stxa(AA_DMMU_TSB_PEXT_REG, ASI_DMMU, 0);
	stxa(AA_IMMU_TSB_PEXT_REG, ASI_IMMU, 0);
	membar(Sync);

	stxa(AA_DMMU_TSB_SEXT_REG, ASI_DMMU, 0);
	/*
	 * NB: the secondary context was removed from the iMMU.
	 */
	membar(Sync);

	stxa(AA_DMMU_TSB_NEXT_REG, ASI_DMMU, 0);
	stxa(AA_IMMU_TSB_NEXT_REG, ASI_IMMU, 0);
	membar(Sync);

	if (cpu_impl == CPU_IMPL_SPARC64V) {
		/* Ensure MCNTL_JPS1_TSBP is 0. */
		val = ldxa(AA_MCNTL, ASI_MCNTL);
		val &= ~MCNTL_JPS1_TSBP;
		stxa(AA_MCNTL, ASI_MCNTL, val);
		return;
	}

	/*
	 * Configure the first large dTLB to hold 4MB pages (e.g. for direct
	 * mappings) for all three contexts and ensure the second one is set
	 * up to hold 8k pages for them.  Note that this is constraint by
	 * US-IV+, whose large dTLBs can only hold entries of certain page
	 * sizes each.
	 * For US-IV+, additionally ensure that the large iTLB is set up to
	 * hold 8k pages for nucleus and primary context (still no secondary
	 * iMMU context.
	 * NB: according to documentation, changing the page size of the same
	 * context requires a context demap before changing the corresponding
	 * page size, but we hardly can flush our locked pages here, so we use
	 * a demap all instead.
	 */
	stxa(TLB_DEMAP_ALL, ASI_DMMU_DEMAP, 0);
	membar(Sync);
	val = (TS_4M << TLB_PCXR_N_PGSZ0_SHIFT) |
	    (TS_8K << TLB_PCXR_N_PGSZ1_SHIFT) |
	    (TS_4M << TLB_PCXR_P_PGSZ0_SHIFT) |
	    (TS_8K << TLB_PCXR_P_PGSZ1_SHIFT);
	if (cpu_impl == CPU_IMPL_ULTRASPARCIVp)
		val |= (TS_8K << TLB_PCXR_N_PGSZ_I_SHIFT) |
		    (TS_8K << TLB_PCXR_P_PGSZ_I_SHIFT);
	stxa(AA_DMMU_PCXR, ASI_DMMU, val);
	val = (TS_4M << TLB_SCXR_S_PGSZ0_SHIFT) |
	    (TS_8K << TLB_SCXR_S_PGSZ1_SHIFT);
	stxa(AA_DMMU_SCXR, ASI_DMMU, val);
	flush(KERNBASE);

	/*
	 * Ensure DCR_IFPOE is disabled as long as we haven't implemented
	 * support for it (if ever) as most if not all firmware versions
	 * apparently turn it on.  Not making use of DCR_IFPOE should also
	 * avoid Cheetah erratum #109.
	 */
	val = rd(asr18) & ~DCR_IFPOE;
	if (cpu_impl == CPU_IMPL_ULTRASPARCIVp) {
		/*
		 * Ensure the branch prediction mode is set to PC indexing
		 * in order to work around US-IV+ erratum #2.
		 */
		val = (val & ~DCR_BPM_MASK) | DCR_BPM_PC;
		/*
		 * XXX disable dTLB parity error reporting as otherwise we
		 * get seemingly false positives when copying in the user
		 * window by simulating a fill trap on return to usermode in
		 * case single issue is disabled, which thus appears to be
		 * a CPU bug.
		 */
		val &= ~DCR_DTPE;
	}
	wr(asr18, val, 0);
}
OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {

  OopMapSet* oop_maps = NULL;
  // for better readability
  const bool must_gc_arguments = true;
  const bool dont_gc_arguments = false;

  // stub code & info for the different stubs
  switch (id) {
    case forward_exception_id:
      {
        oop_maps = generate_handle_exception(id, sasm);
      }
      break;

    case new_instance_id:
    case fast_new_instance_id:
    case fast_new_instance_init_check_id:
      {
        Register G5_klass = G5; // Incoming
        Register O0_obj   = O0; // Outgoing

        if (id == new_instance_id) {
          __ set_info("new_instance", dont_gc_arguments);
        } else if (id == fast_new_instance_id) {
          __ set_info("fast new_instance", dont_gc_arguments);
        } else {
          assert(id == fast_new_instance_init_check_id, "bad StubID");
          __ set_info("fast new_instance init check", dont_gc_arguments);
        }

        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
            UseTLAB && FastTLABRefill) {
          Label slow_path;
          Register G1_obj_size = G1;
          Register G3_t1 = G3;
          Register G4_t2 = G4;
          assert_different_registers(G5_klass, G1_obj_size, G3_t1, G4_t2);

          // Push a frame since we may do dtrace notification for the
          // allocation which requires calling out and we don't want
          // to stomp the real return address.
          __ save_frame(0);

          if (id == fast_new_instance_init_check_id) {
            // make sure the klass is initialized
            __ ldub(G5_klass, in_bytes(InstanceKlass::init_state_offset()), G3_t1);
            __ cmp(G3_t1, InstanceKlass::fully_initialized);
            __ br(Assembler::notEqual, false, Assembler::pn, slow_path);
            __ delayed()->nop();
          }
#ifdef ASSERT
          // assert object can be fast path allocated
          {
            Label ok, not_ok;
          __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size);
          // make sure it's an instance (LH > 0)
          __ cmp_and_br_short(G1_obj_size, 0, Assembler::lessEqual, Assembler::pn, not_ok);
          __ btst(Klass::_lh_instance_slow_path_bit, G1_obj_size);
          __ br(Assembler::zero, false, Assembler::pn, ok);
          __ delayed()->nop();
          __ bind(not_ok);
          __ stop("assert(can be fast path allocated)");
          __ should_not_reach_here();
          __ bind(ok);
          }
#endif // ASSERT
          // if we got here then the TLAB allocation failed, so try
          // refilling the TLAB or allocating directly from eden.
          Label retry_tlab, try_eden;
          __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves G5_klass

          __ bind(retry_tlab);

          // get the instance size
          __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size);

          __ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path);

          __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2);
          __ verify_oop(O0_obj);
          __ mov(O0, I0);
          __ ret();
          __ delayed()->restore();

          __ bind(try_eden);
          // get the instance size
          __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size);
          __ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path);
          __ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2);

          __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2);
          __ verify_oop(O0_obj);
          __ mov(O0, I0);
          __ ret();
          __ delayed()->restore();

          __ bind(slow_path);

          // pop this frame so generate_stub_call can push it's own
          __ restore();
        }

        oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_instance), G5_klass);
        // I0->O0: new instance
      }

      break;

    case counter_overflow_id:
        // G4 contains bci, G5 contains method
      oop_maps = generate_stub_call(sasm, noreg, CAST_FROM_FN_PTR(address, counter_overflow), G4, G5);
      break;

    case new_type_array_id:
    case new_object_array_id:
      {
        Register G5_klass = G5; // Incoming
        Register G4_length = G4; // Incoming
        Register O0_obj   = O0; // Outgoing

        Address klass_lh(G5_klass, Klass::layout_helper_offset());
        assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
        assert(Klass::_lh_header_size_mask == 0xFF, "bytewise");
        // Use this offset to pick out an individual byte of the layout_helper:
        const int klass_lh_header_size_offset = ((BytesPerInt - 1)  // 3 - 2 selects byte {0,1,0,0}
                                                 - Klass::_lh_header_size_shift / BitsPerByte);

        if (id == new_type_array_id) {
          __ set_info("new_type_array", dont_gc_arguments);
        } else {
          __ set_info("new_object_array", dont_gc_arguments);
        }

#ifdef ASSERT
        // assert object type is really an array of the proper kind
        {
          Label ok;
          Register G3_t1 = G3;
          __ ld(klass_lh, G3_t1);
          __ sra(G3_t1, Klass::_lh_array_tag_shift, G3_t1);
          int tag = ((id == new_type_array_id)
                     ? Klass::_lh_array_tag_type_value
                     : Klass::_lh_array_tag_obj_value);
          __ cmp_and_brx_short(G3_t1, tag, Assembler::equal, Assembler::pt, ok);
          __ stop("assert(is an array klass)");
          __ should_not_reach_here();
          __ bind(ok);
        }
#endif // ASSERT

        if (UseTLAB && FastTLABRefill) {
          Label slow_path;
          Register G1_arr_size = G1;
          Register G3_t1 = G3;
          Register O1_t2 = O1;
          assert_different_registers(G5_klass, G4_length, G1_arr_size, G3_t1, O1_t2);

          // check that array length is small enough for fast path
          __ set(C1_MacroAssembler::max_array_allocation_length, G3_t1);
          __ cmp(G4_length, G3_t1);
          __ br(Assembler::greaterUnsigned, false, Assembler::pn, slow_path);
          __ delayed()->nop();

          // if we got here then the TLAB allocation failed, so try
          // refilling the TLAB or allocating directly from eden.
          Label retry_tlab, try_eden;
          __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves G4_length and G5_klass

          __ bind(retry_tlab);

          // get the allocation size: (length << (layout_helper & 0x1F)) + header_size
          __ ld(klass_lh, G3_t1);
          __ sll(G4_length, G3_t1, G1_arr_size);
          __ srl(G3_t1, Klass::_lh_header_size_shift, G3_t1);
          __ and3(G3_t1, Klass::_lh_header_size_mask, G3_t1);
          __ add(G1_arr_size, G3_t1, G1_arr_size);
          __ add(G1_arr_size, MinObjAlignmentInBytesMask, G1_arr_size);  // align up
          __ and3(G1_arr_size, ~MinObjAlignmentInBytesMask, G1_arr_size);

          __ tlab_allocate(O0_obj, G1_arr_size, 0, G3_t1, slow_path);  // preserves G1_arr_size

          __ initialize_header(O0_obj, G5_klass, G4_length, G3_t1, O1_t2);
          __ ldub(klass_lh, G3_t1, klass_lh_header_size_offset);
          __ sub(G1_arr_size, G3_t1, O1_t2);  // body length
          __ add(O0_obj, G3_t1, G3_t1);       // body start
          __ initialize_body(G3_t1, O1_t2);
          __ verify_oop(O0_obj);
          __ retl();
          __ delayed()->nop();

          __ bind(try_eden);
          // get the allocation size: (length << (layout_helper & 0x1F)) + header_size
          __ ld(klass_lh, G3_t1);
          __ sll(G4_length, G3_t1, G1_arr_size);
          __ srl(G3_t1, Klass::_lh_header_size_shift, G3_t1);
          __ and3(G3_t1, Klass::_lh_header_size_mask, G3_t1);
          __ add(G1_arr_size, G3_t1, G1_arr_size);
          __ add(G1_arr_size, MinObjAlignmentInBytesMask, G1_arr_size);
          __ and3(G1_arr_size, ~MinObjAlignmentInBytesMask, G1_arr_size);

          __ eden_allocate(O0_obj, G1_arr_size, 0, G3_t1, O1_t2, slow_path);  // preserves G1_arr_size
          __ incr_allocated_bytes(G1_arr_size, G3_t1, O1_t2);

          __ initialize_header(O0_obj, G5_klass, G4_length, G3_t1, O1_t2);
          __ ldub(klass_lh, G3_t1, klass_lh_header_size_offset);
          __ sub(G1_arr_size, G3_t1, O1_t2);  // body length
          __ add(O0_obj, G3_t1, G3_t1);       // body start
          __ initialize_body(G3_t1, O1_t2);
          __ verify_oop(O0_obj);
          __ retl();
          __ delayed()->nop();

          __ bind(slow_path);
        }

        if (id == new_type_array_id) {
          oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_type_array), G5_klass, G4_length);
        } else {
          oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_object_array), G5_klass, G4_length);
        }
        // I0 -> O0: new array
      }
      break;

    case new_multi_array_id:
      { // O0: klass
        // O1: rank
        // O2: address of 1st dimension
        __ set_info("new_multi_array", dont_gc_arguments);
        oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_multi_array), I0, I1, I2);
        // I0 -> O0: new multi array
      }
      break;

    case register_finalizer_id:
      {
        __ set_info("register_finalizer", dont_gc_arguments);

        // load the klass and check the has finalizer flag
        Label register_finalizer;
        Register t = O1;
        __ load_klass(O0, t);
        __ ld(t, in_bytes(Klass::access_flags_offset()), t);
        __ set(JVM_ACC_HAS_FINALIZER, G3);
        __ andcc(G3, t, G0);
        __ br(Assembler::notZero, false, Assembler::pt, register_finalizer);
        __ delayed()->nop();

        // do a leaf return
        __ retl();
        __ delayed()->nop();

        __ bind(register_finalizer);
        OopMap* oop_map = save_live_registers(sasm);
        int call_offset = __ call_RT(noreg, noreg,
                                     CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), I0);
        oop_maps = new OopMapSet();
        oop_maps->add_gc_map(call_offset, oop_map);

        // Now restore all the live registers
        restore_live_registers(sasm);

        __ ret();
        __ delayed()->restore();
      }
      break;

    case throw_range_check_failed_id:
      { __ set_info("range_check_failed", dont_gc_arguments); // arguments will be discarded
        // G4: index
        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
      }
      break;

    case throw_index_exception_id:
      { __ set_info("index_range_check_failed", dont_gc_arguments); // arguments will be discarded
        // G4: index
        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
      }
      break;

    case throw_div0_exception_id:
      { __ set_info("throw_div0_exception", dont_gc_arguments);
        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
      }
      break;

    case throw_null_pointer_exception_id:
      { __ set_info("throw_null_pointer_exception", dont_gc_arguments);
        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
      }
      break;

    case handle_exception_id:
      { __ set_info("handle_exception", dont_gc_arguments);
        oop_maps = generate_handle_exception(id, sasm);
      }
      break;

    case handle_exception_from_callee_id:
      { __ set_info("handle_exception_from_callee", dont_gc_arguments);
        oop_maps = generate_handle_exception(id, sasm);
      }
      break;

    case unwind_exception_id:
      {
        // O0: exception
        // I7: address of call to this method

        __ set_info("unwind_exception", dont_gc_arguments);
        __ mov(Oexception, Oexception->after_save());
        __ add(I7, frame::pc_return_offset, Oissuing_pc->after_save());

        __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address),
                        G2_thread, Oissuing_pc->after_save());
        __ verify_not_null_oop(Oexception->after_save());

        // Restore SP from L7 if the exception PC is a method handle call site.
        __ mov(O0, G5);  // Save the target address.
        __ lduw(Address(G2_thread, JavaThread::is_method_handle_return_offset()), L0);
        __ tst(L0);  // Condition codes are preserved over the restore.
        __ restore();

        __ jmp(G5, 0);
        __ delayed()->movcc(Assembler::notZero, false, Assembler::icc, L7_mh_SP_save, SP);  // Restore SP if required.
      }
      break;

    case throw_array_store_exception_id:
      {
        __ set_info("throw_array_store_exception", dont_gc_arguments);
        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
      }
      break;

    case throw_class_cast_exception_id:
      {
        // G4: object
        __ set_info("throw_class_cast_exception", dont_gc_arguments);
        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
      }
      break;

    case throw_incompatible_class_change_error_id:
      {
        __ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments);
        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
      }
      break;

    case slow_subtype_check_id:
      { // Support for uint StubRoutine::partial_subtype_check( Klass sub, Klass super );
        // Arguments :
        //
        //      ret  : G3
        //      sub  : G3, argument, destroyed
        //      super: G1, argument, not changed
        //      raddr: O7, blown by call
        Label miss;

        __ save_frame(0);               // Blow no registers!

        __ check_klass_subtype_slow_path(G3, G1, L0, L1, L2, L4, NULL, &miss);

        __ mov(1, G3);
        __ ret();                       // Result in G5 is 'true'
        __ delayed()->restore();        // free copy or add can go here

        __ bind(miss);
        __ mov(0, G3);
        __ ret();                       // Result in G5 is 'false'
        __ delayed()->restore();        // free copy or add can go here
      }

    case monitorenter_nofpu_id:
    case monitorenter_id:
      { // G4: object
        // G5: lock address
        __ set_info("monitorenter", dont_gc_arguments);

        int save_fpu_registers = (id == monitorenter_id);
        // make a frame and preserve the caller's caller-save registers
        OopMap* oop_map = save_live_registers(sasm, save_fpu_registers);

        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), G4, G5);

        oop_maps = new OopMapSet();
        oop_maps->add_gc_map(call_offset, oop_map);
        restore_live_registers(sasm, save_fpu_registers);

        __ ret();
        __ delayed()->restore();
      }
      break;

    case monitorexit_nofpu_id:
    case monitorexit_id:
      { // G4: lock address
        // note: really a leaf routine but must setup last java sp
        //       => use call_RT for now (speed can be improved by
        //       doing last java sp setup manually)
        __ set_info("monitorexit", dont_gc_arguments);

        int save_fpu_registers = (id == monitorexit_id);
        // make a frame and preserve the caller's caller-save registers
        OopMap* oop_map = save_live_registers(sasm, save_fpu_registers);

        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), G4);

        oop_maps = new OopMapSet();
        oop_maps->add_gc_map(call_offset, oop_map);
        restore_live_registers(sasm, save_fpu_registers);

        __ ret();
        __ delayed()->restore();
      }
      break;

    case deoptimize_id:
      {
        __ set_info("deoptimize", dont_gc_arguments);
        OopMap* oop_map = save_live_registers(sasm);
        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), G4);
        oop_maps = new OopMapSet();
        oop_maps->add_gc_map(call_offset, oop_map);
        restore_live_registers(sasm);
        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
        assert(deopt_blob != NULL, "deoptimization blob must have been created");
        AddressLiteral dest(deopt_blob->unpack_with_reexecution());
        __ jump_to(dest, O0);
        __ delayed()->restore();
      }
      break;

    case access_field_patching_id:
      { __ set_info("access_field_patching", dont_gc_arguments);
        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
      }
      break;

    case load_klass_patching_id:
      { __ set_info("load_klass_patching", dont_gc_arguments);
        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
      }
      break;

    case load_mirror_patching_id:
      { __ set_info("load_mirror_patching", dont_gc_arguments);
        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
      }
      break;

    case load_appendix_patching_id:
      { __ set_info("load_appendix_patching", dont_gc_arguments);
        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
      }
      break;

    case dtrace_object_alloc_id:
      { // O0: object
        __ set_info("dtrace_object_alloc", dont_gc_arguments);
        // we can't gc here so skip the oopmap but make sure that all
        // the live registers get saved.
        save_live_registers(sasm);

        __ save_thread(L7_thread_cache);
        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc),
                relocInfo::runtime_call_type);
        __ delayed()->mov(I0, O0);
        __ restore_thread(L7_thread_cache);

        restore_live_registers(sasm);
        __ ret();
        __ delayed()->restore();
      }
      break;

#if INCLUDE_ALL_GCS
    case g1_pre_barrier_slow_id:
      { // G4: previous value of memory
        BarrierSet* bs = Universe::heap()->barrier_set();
        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
          __ save_frame(0);
          __ set((int)id, O1);
          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0);
          __ should_not_reach_here();
          break;
        }

        __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments);

        Register pre_val = G4;
        Register tmp  = G1_scratch;
        Register tmp2 = G3_scratch;

        Label refill, restart;
        bool with_frame = false; // I don't know if we can do with-frame.
        int satb_q_index_byte_offset =
          in_bytes(JavaThread::satb_mark_queue_offset() +
                   PtrQueue::byte_offset_of_index());
        int satb_q_buf_byte_offset =
          in_bytes(JavaThread::satb_mark_queue_offset() +
                   PtrQueue::byte_offset_of_buf());

        __ bind(restart);
        // Load the index into the SATB buffer. PtrQueue::_index is a
        // size_t so ld_ptr is appropriate
        __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp);

        // index == 0?
        __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pn, refill);

        __ ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2);
        __ sub(tmp, oopSize, tmp);

        __ st_ptr(pre_val, tmp2, tmp);  // [_buf + index] := <address_of_card>
        // Use return-from-leaf
        __ retl();
        __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset);

        __ bind(refill);
        __ save_frame(0);

        __ mov(pre_val, L0);
        __ mov(tmp,     L1);
        __ mov(tmp2,    L2);

        __ call_VM_leaf(L7_thread_cache,
                        CAST_FROM_FN_PTR(address,
                                         SATBMarkQueueSet::handle_zero_index_for_thread),
                                         G2_thread);

        __ mov(L0, pre_val);
        __ mov(L1, tmp);
        __ mov(L2, tmp2);

        __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
        __ delayed()->restore();
      }
      break;

    case g1_post_barrier_slow_id:
      {
        BarrierSet* bs = Universe::heap()->barrier_set();
        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
          __ save_frame(0);
          __ set((int)id, O1);
          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0);
          __ should_not_reach_here();
          break;
        }

        __ set_info("g1_post_barrier_slow_id", dont_gc_arguments);

        Register addr = G4;
        Register cardtable = G5;
        Register tmp  = G1_scratch;
        Register tmp2 = G3_scratch;
        jbyte* byte_map_base = barrier_set_cast<CardTableModRefBS>(bs)->byte_map_base;

        Label not_already_dirty, restart, refill, young_card;

#ifdef _LP64
        __ srlx(addr, CardTableModRefBS::card_shift, addr);
#else
        __ srl(addr, CardTableModRefBS::card_shift, addr);
#endif

        AddressLiteral rs(byte_map_base);
        __ set(rs, cardtable);         // cardtable := <card table base>
        __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]

        __ cmp_and_br_short(tmp, G1SATBCardTableModRefBS::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);

        __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
        __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]

        assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code");
        __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);

        __ bind(young_card);
        // We didn't take the branch, so we're already dirty: return.
        // Use return-from-leaf
        __ retl();
        __ delayed()->nop();

        // Not dirty.
        __ bind(not_already_dirty);

        // Get cardtable + tmp into a reg by itself
        __ add(addr, cardtable, tmp2);

        // First, dirty it.
        __ stb(G0, tmp2, 0);  // [cardPtr] := 0  (i.e., dirty).

        Register tmp3 = cardtable;
        Register tmp4 = tmp;

        // these registers are now dead
        addr = cardtable = tmp = noreg;

        int dirty_card_q_index_byte_offset =
          in_bytes(JavaThread::dirty_card_queue_offset() +
                   PtrQueue::byte_offset_of_index());
        int dirty_card_q_buf_byte_offset =
          in_bytes(JavaThread::dirty_card_queue_offset() +
                   PtrQueue::byte_offset_of_buf());

        __ bind(restart);

        // Get the index into the update buffer. PtrQueue::_index is
        // a size_t so ld_ptr is appropriate here.
        __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3);

        // index == 0?
        __ cmp_and_brx_short(tmp3, G0, Assembler::equal,  Assembler::pn, refill);

        __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4);
        __ sub(tmp3, oopSize, tmp3);

        __ st_ptr(tmp2, tmp4, tmp3);  // [_buf + index] := <address_of_card>
        // Use return-from-leaf
        __ retl();
        __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset);

        __ bind(refill);
        __ save_frame(0);

        __ mov(tmp2, L0);
        __ mov(tmp3, L1);
        __ mov(tmp4, L2);

        __ call_VM_leaf(L7_thread_cache,
                        CAST_FROM_FN_PTR(address,
                                         DirtyCardQueueSet::handle_zero_index_for_thread),
                                         G2_thread);

        __ mov(L0, tmp2);
        __ mov(L1, tmp3);
        __ mov(L2, tmp4);

        __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
        __ delayed()->restore();
      }
      break;
#endif // INCLUDE_ALL_GCS

    case predicate_failed_trap_id:
      {
        __ set_info("predicate_failed_trap", dont_gc_arguments);
        OopMap* oop_map = save_live_registers(sasm);

        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap));

        oop_maps = new OopMapSet();
        oop_maps->add_gc_map(call_offset, oop_map);

        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
        assert(deopt_blob != NULL, "deoptimization blob must have been created");
        restore_live_registers(sasm);

        AddressLiteral dest(deopt_blob->unpack_with_reexecution());
        __ jump_to(dest, O0);
        __ delayed()->restore();
      }
      break;

    default:
      { __ set_info("unimplemented entry", dont_gc_arguments);
        __ save_frame(0);
        __ set((int)id, O1);
        __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), O1);
        __ should_not_reach_here();
      }
      break;
  }
  return oop_maps;
}
inline void MacroAssembler::fence()   { membar(LoadLoad | LoadStore | StoreLoad | StoreStore); }
inline void MacroAssembler::acquire() { membar(LoadLoad | LoadStore); }
inline void MacroAssembler::release() { membar(LoadStore | StoreStore); }