void read_counters(int* event1, int* event2, int* event3, int* event4) { *event1 = __insn_mfspr(SPR_PERF_COUNT_0); *event2 = __insn_mfspr(SPR_PERF_COUNT_1); *event3 = __insn_mfspr(SPR_AUX_PERF_COUNT_0); *event4 = __insn_mfspr(SPR_AUX_PERF_COUNT_1); }
/* Pause the DMA engine, then save off its state registers. */ static void save_tile_dma_state(struct tile_dma_state *dma) { unsigned long state = __insn_mfspr(SPR_DMA_USER_STATUS); unsigned long post_suspend_state; /* If we're running, suspend the engine. */ if ((state & DMA_STATUS_MASK) == SPR_DMA_STATUS__RUNNING_MASK) __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK); /* * Wait for the engine to idle, then save regs. Note that we * want to record the "running" bit from before suspension, * and the "done" bit from after, so that we can properly * distinguish a case where the user suspended the engine from * the case where the kernel suspended as part of the context * swap. */ do { post_suspend_state = __insn_mfspr(SPR_DMA_USER_STATUS); } while (post_suspend_state & SPR_DMA_STATUS__BUSY_MASK); dma->src = __insn_mfspr(SPR_DMA_SRC_ADDR); dma->src_chunk = __insn_mfspr(SPR_DMA_SRC_CHUNK_ADDR); dma->dest = __insn_mfspr(SPR_DMA_DST_ADDR); dma->dest_chunk = __insn_mfspr(SPR_DMA_DST_CHUNK_ADDR); dma->strides = __insn_mfspr(SPR_DMA_STRIDE); dma->chunk_size = __insn_mfspr(SPR_DMA_CHUNK_SIZE); dma->byte = __insn_mfspr(SPR_DMA_BYTE); dma->status = (state & SPR_DMA_STATUS__RUNNING_MASK) | (post_suspend_state & SPR_DMA_STATUS__DONE_MASK); }
/* * Get current overflow status of each performance counter, * and auxiliary performance counter. */ unsigned long pmc_get_overflow(void) { unsigned long status; /* * merge base+aux into a single vector */ status = __insn_mfspr(SPR_PERF_COUNT_STS); status |= __insn_mfspr(SPR_AUX_PERF_COUNT_STS) << TILE_BASE_COUNTERS; return status; }
cycles_t get_cycles(void) { unsigned int high = __insn_mfspr(SPR_CYCLE_HIGH); unsigned int low = __insn_mfspr(SPR_CYCLE_LOW); unsigned int high2 = __insn_mfspr(SPR_CYCLE_HIGH); while (unlikely(high != high2)) { low = __insn_mfspr(SPR_CYCLE_LOW); high = high2; high2 = __insn_mfspr(SPR_CYCLE_HIGH); } return (((cycles_t)high) << 32) | low; }
struct task_struct *__sched _switch_to(struct task_struct *prev, struct task_struct *next) { /* DMA state is already saved; save off other arch state. */ save_arch_state(&prev->thread); #if CHIP_HAS_TILE_DMA() /* * Restore DMA in new task if desired. * Note that it is only safe to restart here since interrupts * are disabled, so we can't take any DMATLB miss or access * interrupts before we have finished switching stacks. */ if (next->thread.tile_dma_state.enabled) { restore_tile_dma_state(&next->thread); grant_dma_mpls(); } else { restrict_dma_mpls(); } #endif /* Restore other arch state. */ restore_arch_state(&next->thread); #if CHIP_HAS_SN_PROC() /* * Restart static network processor in the new process * if it was running before. */ if (next->thread.sn_proc_running) { int snctl = __insn_mfspr(SPR_SNCTL); __insn_mtspr(SPR_SNCTL, snctl & ~SPR_SNCTL__FRZPROC_MASK); } #endif #ifdef CONFIG_HARDWALL /* Enable or disable access to the network registers appropriately. */ if (prev->thread.hardwall != NULL) { if (next->thread.hardwall == NULL) restrict_network_mpls(); } else if (next->thread.hardwall != NULL) { grant_network_mpls(); } #endif /* * Switch kernel SP, PC, and callee-saved registers. * In the context of the new task, return the old task pointer * (i.e. the task that actually called __switch_to). * Pass the value to use for SYSTEM_SAVE_K_0 when we reset our sp. */ return __switch_to(prev, next, next_current_ksp0(next)); }
/* Restart a DMA that was running before we were context-switched out. */ static void restore_tile_dma_state(struct thread_struct *t) { const struct tile_dma_state *dma = &t->tile_dma_state; /* * The only way to restore the done bit is to run a zero * length transaction. */ if ((dma->status & SPR_DMA_STATUS__DONE_MASK) && !(__insn_mfspr(SPR_DMA_USER_STATUS) & SPR_DMA_STATUS__DONE_MASK)) { __insn_mtspr(SPR_DMA_BYTE, 0); __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); while (__insn_mfspr(SPR_DMA_USER_STATUS) & SPR_DMA_STATUS__BUSY_MASK) ; } __insn_mtspr(SPR_DMA_SRC_ADDR, dma->src); __insn_mtspr(SPR_DMA_SRC_CHUNK_ADDR, dma->src_chunk); __insn_mtspr(SPR_DMA_DST_ADDR, dma->dest); __insn_mtspr(SPR_DMA_DST_CHUNK_ADDR, dma->dest_chunk); __insn_mtspr(SPR_DMA_STRIDE, dma->strides); __insn_mtspr(SPR_DMA_CHUNK_SIZE, dma->chunk_size); __insn_mtspr(SPR_DMA_BYTE, dma->byte); /* * Restart the engine if we were running and not done. * Clear a pending async DMA fault that we were waiting on return * to user space to execute, since we expect the DMA engine * to regenerate those faults for us now. Note that we don't * try to clear the TIF_ASYNC_TLB flag, since it's relatively * harmless if set, and it covers both DMA and the SN processor. */ if ((dma->status & DMA_STATUS_MASK) == SPR_DMA_STATUS__RUNNING_MASK) { t->dma_async_tlb.fault_num = 0; __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); } }
void _prepare_arch_switch(struct task_struct *next) { #if CHIP_HAS_SN_PROC() int snctl; #endif #if CHIP_HAS_TILE_DMA() struct tile_dma_state *dma = ¤t->thread.tile_dma_state; if (dma->enabled) save_tile_dma_state(dma); #endif #if CHIP_HAS_SN_PROC() /* * Suspend the static network processor if it was running. * We do not suspend the fabric itself, just like we don't * try to suspend the UDN. */ snctl = __insn_mfspr(SPR_SNCTL); current->thread.sn_proc_running = (snctl & SPR_SNCTL__FRZPROC_MASK) == 0; if (current->thread.sn_proc_running) __insn_mtspr(SPR_SNCTL, snctl | SPR_SNCTL__FRZPROC_MASK); #endif }
/* * A separate 'blocked' method for put() so that backtraces and * profiles will clearly indicate that we're wasting time spinning on * egress availability rather than actually posting commands. */ int64_t __gxio_dma_queue_wait_for_credits(__gxio_dma_queue_t *dma_queue, int64_t modifier) { int backoff = 16; int64_t old; do { int i; /* Back off to avoid spamming memory networks. */ for (i = backoff; i > 0; i--) __insn_mfspr(SPR_PASS); /* Check credits again. */ __gxio_dma_queue_update_credits(dma_queue); old = __insn_fetchaddgez(&dma_queue->credits_and_next_index, modifier); /* Calculate bounded exponential backoff for next iteration. */ if (backoff < 256) backoff *= 2; } while (old + modifier < 0); return old; }
/* * This routine handles page faults. It determines the address, and the * problem, and then passes it handle_page_fault() for normal DTLB and * ITLB issues, and for DMA or SN processor faults when we are in user * space. For the latter, if we're in kernel mode, we just save the * interrupt away appropriately and return immediately. We can't do * page faults for user code while in kernel mode. */ void do_page_fault(struct pt_regs *regs, int fault_num, unsigned long address, unsigned long write) { int is_page_fault; /* This case should have been handled by do_page_fault_ics(). */ BUG_ON(write & ~1); #if CHIP_HAS_TILE_DMA() /* * If it's a DMA fault, suspend the transfer while we're * handling the miss; we'll restart after it's handled. If we * don't suspend, it's possible that this process could swap * out and back in, and restart the engine since the DMA is * still 'running'. */ if (fault_num == INT_DMATLB_MISS || fault_num == INT_DMATLB_ACCESS || fault_num == INT_DMATLB_MISS_DWNCL || fault_num == INT_DMATLB_ACCESS_DWNCL) { __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK); while (__insn_mfspr(SPR_DMA_USER_STATUS) & SPR_DMA_STATUS__BUSY_MASK) ; } #endif /* Validate fault num and decide if this is a first-time page fault. */ switch (fault_num) { case INT_ITLB_MISS: case INT_DTLB_MISS: #if CHIP_HAS_TILE_DMA() case INT_DMATLB_MISS: case INT_DMATLB_MISS_DWNCL: #endif #if CHIP_HAS_SN_PROC() case INT_SNITLB_MISS: case INT_SNITLB_MISS_DWNCL: #endif is_page_fault = 1; break; case INT_DTLB_ACCESS: #if CHIP_HAS_TILE_DMA() case INT_DMATLB_ACCESS: case INT_DMATLB_ACCESS_DWNCL: #endif is_page_fault = 0; break; default: panic("Bad fault number %d in do_page_fault", fault_num); } #if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() if (EX1_PL(regs->ex1) != USER_PL) { struct async_tlb *async; switch (fault_num) { #if CHIP_HAS_TILE_DMA() case INT_DMATLB_MISS: case INT_DMATLB_ACCESS: case INT_DMATLB_MISS_DWNCL: case INT_DMATLB_ACCESS_DWNCL: async = ¤t->thread.dma_async_tlb; break; #endif #if CHIP_HAS_SN_PROC() case INT_SNITLB_MISS: case INT_SNITLB_MISS_DWNCL: async = ¤t->thread.sn_async_tlb; break; #endif default: async = NULL; } if (async) { /* * No vmalloc check required, so we can allow * interrupts immediately at this point. */ local_irq_enable(); set_thread_flag(TIF_ASYNC_TLB); if (async->fault_num != 0) { panic("Second async fault %d;" " old fault was %d (%#lx/%ld)", fault_num, async->fault_num, address, write); } BUG_ON(fault_num == 0); async->fault_num = fault_num; async->is_fault = is_page_fault; async->is_write = write; async->address = address; return; } } #endif handle_page_fault(regs, fault_num, is_page_fault, address, write); }
void finv_buffer_remote(void *buffer, size_t size, int hfh) { char *p, *base; size_t step_size, load_count; /* */ #ifdef __tilegx__ const unsigned long STRIPE_WIDTH = 512; #else const unsigned long STRIPE_WIDTH = 8192; #endif #ifdef __tilegx__ /* */ uint_reg_t old_dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); __insn_mtspr(SPR_DSTREAM_PF, 0); #endif /* */ __finv_buffer(buffer, size); /* */ __insn_mf(); /* */ if (hfh) { step_size = L2_CACHE_BYTES; #ifdef __tilegx__ load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES; #else load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * (1 << CHIP_LOG_NUM_MSHIMS()); #endif } else { step_size = STRIPE_WIDTH; load_count = (1 << CHIP_LOG_NUM_MSHIMS()); } /* */ p = (char *)buffer + size - 1; force_load(p); /* */ p -= step_size; p = (char *)((unsigned long)p | (step_size - 1)); /* */ base = p - (step_size * (load_count - 2)); if ((unsigned long)base < (unsigned long)buffer) base = buffer; /* */ #pragma unroll 8 for (; p >= base; p -= step_size) force_load(p); /* */ p = (char *)buffer + size - 1; __insn_inv(p); p -= step_size; p = (char *)((unsigned long)p | (step_size - 1)); for (; p >= base; p -= step_size) __insn_inv(p); /* */ __insn_mf(); #ifdef __tilegx__ /* */ __insn_mtspr(SPR_DSTREAM_PF, old_dstream_pf); #endif }
void* net_thread(void* arg) { int iix = (uintptr_t)arg; /*Ingress interface index*/ int eix; /*Egress interface index*/ int i, n; /*Index, Number*/ gxio_mpipe_iqueue_t *iqueue = iqueues[iix]; /*Ingress queue*/ gxio_mpipe_equeue_t *equeue; /*Egress queue*/ gxio_mpipe_idesc_t *idescs; /*Ingress packet descriptors*/ gxio_mpipe_edesc_t edescs[MAXBATCH]; /*Egress descriptors.*/ long slot; /*Setup egress queue.*/ switch (iix) { case 0: eix = 1; break; case 1: eix = 0; break; case 2: eix = 3; break; case 3: eix = 2; break; default: tmc_task_die("Invalid interface index, %d", iix); break; } equeue = &equeues[eix]; /*Egress queue*/ /*Bind to a single CPU.*/ if (tmc_cpus_set_my_cpu(tmc_cpus_find_nth_cpu(&cpus, DTILEBASE + iix)) < 0) { tmc_task_die("Failed to setup CPU affinity\n"); } if (set_dataplane(0) < 0) { tmc_task_die("Failed to setup dataplane\n"); } /*Line up all network threads.*/ tmc_sync_barrier_wait(&syncbar); tmc_spin_barrier_wait(&spinbar); if (iix == 0) { /*Pause briefly, to let everyone finish passing the barrier.*/ for (i = 0; i < 10000; i++) __insn_mfspr(SPR_PASS); /*Allow packets to flow (on all links).*/ sim_enable_mpipe_links(mpipei, -1); } /*-------------------------------------------------------------------------*/ /* Process(forward) packets. */ /*-------------------------------------------------------------------------*/ while (1) { /*Receive packet(s).*/ n = gxio_mpipe_iqueue_peek(iqueue, &idescs); if (n <= 0) continue; else if (n > 16) n = 16; //TODO: Experiment with this number. #if 0 printf("[%d] Get packet(s), n=%d\n", iix, n); #endif /*Prefetch packet descriptors from L3 to L1.*/ tmc_mem_prefetch(idescs, n * sizeof(*idescs)); /*Reserve slots. NOTE: This might spin.*/ slot = gxio_mpipe_equeue_reserve_fast(equeue, n); /*Process packet(s).*/ for (i = 0; i < n; i++) { /*Detect Call(s), clone the packet and pass it to antother Tile, if necessary.*/ //TODO: For now, inspect and record the packet using this Tile. if (ccap_detect_call(&idescs[i])) { ccap_trace_add(0, &idescs[i]); //TODO: Use actual link number. } /*Send the packets out on the peer port.*/ gxio_mpipe_edesc_copy_idesc(&edescs[i], &idescs[i]); #if 1 /*Drop "error" packets (but ignore "checksum" problems).*/ if (idescs[i].be || idescs[i].me || idescs[i].tr || idescs[i].ce) { edescs[i].ns = 1; } #endif gxio_mpipe_equeue_put_at(equeue, edescs[i], slot + i); gxio_mpipe_iqueue_consume(iqueue, &idescs[i]); } } /*Make compiler happy.*/ return (void *)NULL; }
/* * This routine is responsible for faulting in user pages. * It passes the work off to one of the appropriate routines. * It returns true if the fault was successfully handled. */ static int handle_page_fault(struct pt_regs *regs, int fault_num, int is_page_fault, unsigned long address, int write) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; unsigned long stack_offset; int fault; int si_code; int is_kernel_mode; pgd_t *pgd; /* on TILE, protection faults are always writes */ if (!is_page_fault) write = 1; flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL); tsk = validate_current(); /* * Check to see if we might be overwriting the stack, and bail * out if so. The page fault code is a relatively likely * place to get trapped in an infinite regress, and once we * overwrite the whole stack, it becomes very hard to recover. */ stack_offset = stack_pointer & (THREAD_SIZE-1); if (stack_offset < THREAD_SIZE / 8) { pr_alert("Potential stack overrun: sp %#lx\n", stack_pointer); show_regs(regs); pr_alert("Killing current process %d/%s\n", tsk->pid, tsk->comm); do_group_exit(SIGKILL); } /* * Early on, we need to check for migrating PTE entries; * see homecache.c. If we find a migrating PTE, we wait until * the backing page claims to be done migrating, then we proceed. * For kernel PTEs, we rewrite the PTE and return and retry. * Otherwise, we treat the fault like a normal "no PTE" fault, * rather than trying to patch up the existing PTE. */ pgd = get_current_pgd(); if (handle_migrating_pte(pgd, fault_num, address, regs->pc, is_kernel_mode, write)) return 1; si_code = SEGV_MAPERR; /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. * * This verifies that the fault happens in kernel space * and that the fault was not a protection fault. */ if (unlikely(address >= TASK_SIZE && !is_arch_mappable_range(address, 0))) { if (is_kernel_mode && is_page_fault && vmalloc_fault(pgd, address) >= 0) return 1; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. */ mm = NULL; /* happy compiler */ vma = NULL; goto bad_area_nosemaphore; } /* * If we're trying to touch user-space addresses, we must * be either at PL0, or else with interrupts enabled in the * kernel, so either way we can re-enable interrupts here * unless we are doing atomic access to user space with * interrupts disabled. */ if (!(regs->flags & PT_FLAGS_DISABLE_IRQ)) local_irq_enable(); mm = tsk->mm; /* * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault. */ if (in_atomic() || !mm) { vma = NULL; /* happy compiler */ goto bad_area_nosemaphore; } if (!is_kernel_mode) flags |= FAULT_FLAG_USER; /* * When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an * erroneous fault occurring in a code path which already holds mmap_sem * we will deadlock attempting to validate the fault against the * address space. Luckily the kernel only validly references user * space from well defined areas of code, which are listed in the * exceptions table. * * As the vast majority of faults will be valid we will only perform * the source reference check when there is a possibility of a deadlock. * Attempt to lock the address space, if we cannot we then validate the * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. */ if (!down_read_trylock(&mm->mmap_sem)) { if (is_kernel_mode && !search_exception_tables(regs->pc)) { vma = NULL; /* happy compiler */ goto bad_area_nosemaphore; } down_read(&mm->mmap_sem); } vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (regs->sp < PAGE_OFFSET) { /* * accessing the stack below sp is always a bug. */ if (address < regs->sp) goto bad_area; } if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: si_code = SEGV_ACCERR; if (fault_num == INT_ITLB_MISS) { if (!(vma->vm_flags & VM_EXEC)) goto bad_area; } else if (write) { #ifdef TEST_VERIFY_AREA if (!is_page_fault && regs->cs == KERNEL_CS) pr_err("WP fault at "REGFMT"\n", regs->eip); #endif if (!(vma->vm_flags & VM_WRITE)) goto bad_area; flags |= FAULT_FLAG_WRITE; } else { if (!is_page_fault || !(vma->vm_flags & VM_READ)) goto bad_area; } /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, address, write); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); } if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; #if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() /* * If this was an asynchronous fault, * restart the appropriate engine. */ switch (fault_num) { #if CHIP_HAS_TILE_DMA() case INT_DMATLB_MISS: case INT_DMATLB_MISS_DWNCL: case INT_DMATLB_ACCESS: case INT_DMATLB_ACCESS_DWNCL: __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); break; #endif #if CHIP_HAS_SN_PROC() case INT_SNITLB_MISS: case INT_SNITLB_MISS_DWNCL: __insn_mtspr(SPR_SNCTL, __insn_mfspr(SPR_SNCTL) & ~SPR_SNCTL__FRZPROC_MASK); break; #endif } #endif up_read(&mm->mmap_sem); return 1; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (!is_kernel_mode) { /* * It's possible to have interrupts off here. */ local_irq_enable(); force_sig_info_fault("segfault", SIGSEGV, si_code, address, fault_num, tsk, regs); return 0; } no_context: /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return 0; /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ bust_spinlocks(1); /* FIXME: no lookup_address() yet */ #ifdef SUPPORT_LOOKUP_ADDRESS if (fault_num == INT_ITLB_MISS) { pte_t *pte = lookup_address(address); if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) pr_crit("kernel tried to execute" " non-executable page - exploit attempt?" " (uid: %d)\n", current->uid); } #endif if (address < PAGE_SIZE) pr_alert("Unable to handle kernel NULL pointer dereference\n"); else pr_alert("Unable to handle kernel paging request\n"); pr_alert(" at virtual address "REGFMT", pc "REGFMT"\n", address, regs->pc); show_regs(regs); if (unlikely(tsk->pid < 2)) { panic("Kernel page fault running %s!", is_idle_task(tsk) ? "the idle task" : "init"); } /* * More FIXME: we should probably copy the i386 here and * implement a generic die() routine. Not today. */ #ifdef SUPPORT_DIE die("Oops", regs); #endif bust_spinlocks(1); do_group_exit(SIGKILL); /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: up_read(&mm->mmap_sem); if (is_kernel_mode) goto no_context; pagefault_out_of_memory(); return 0; do_sigbus: up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die */ if (is_kernel_mode) goto no_context; force_sig_info_fault("bus error", SIGBUS, BUS_ADRERR, address, fault_num, tsk, regs); return 0; }
finv_buffer_remote(void *buffer, size_t size, int hfh) { char *p, *base; size_t step_size, load_count; /* * On TILEPro the striping granularity is a fixed 8KB; on * TILE-Gx it is configurable, and we rely on the fact that * the hypervisor always configures maximum striping, so that * bits 9 and 10 of the PA are part of the stripe function, so * every 512 bytes we hit a striping boundary. * */ #ifdef __tilegx__ const unsigned long STRIPE_WIDTH = 512; #else const unsigned long STRIPE_WIDTH = 8192; #endif #ifdef __tilegx__ /* * On TILE-Gx, we must disable the dstream prefetcher before doing * a cache flush; otherwise, we could end up with data in the cache * that we don't want there. Note that normally we'd do an mf * after the SPR write to disabling the prefetcher, but we do one * below, before any further loads, so there's no need to do it * here. */ uint_reg_t old_dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); __insn_mtspr(SPR_DSTREAM_PF, 0); #endif /* * Flush and invalidate the buffer out of the local L1/L2 * and request the home cache to flush and invalidate as well. */ __finv_buffer(buffer, size); /* * Wait for the home cache to acknowledge that it has processed * all the flush-and-invalidate requests. This does not mean * that the flushed data has reached the memory controller yet, * but it does mean the home cache is processing the flushes. */ __insn_mf(); /* * Issue a load to the last cache line, which can't complete * until all the previously-issued flushes to the same memory * controller have also completed. If we weren't striping * memory, that one load would be sufficient, but since we may * be, we also need to back up to the last load issued to * another memory controller, which would be the point where * we crossed a "striping" boundary (the granularity of striping * across memory controllers). Keep backing up and doing this * until we are before the beginning of the buffer, or have * hit all the controllers. * * If we are flushing a hash-for-home buffer, it's even worse. * Each line may be homed on a different tile, and each tile * may have up to four lines that are on different * controllers. So as we walk backwards, we have to touch * enough cache lines to satisfy these constraints. In * practice this ends up being close enough to "load from * every cache line on a full memory stripe on each * controller" that we simply do that, to simplify the logic. * * On TILE-Gx the hash-for-home function is much more complex, * with the upshot being we can't readily guarantee we have * hit both entries in the 128-entry AMT that were hit by any * load in the entire range, so we just re-load them all. * With larger buffers, we may want to consider using a hypervisor * trap to issue loads directly to each hash-for-home tile for * each controller (doing it from Linux would trash the TLB). */ if (hfh) { step_size = L2_CACHE_BYTES; #ifdef __tilegx__ load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES; #else load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * (1 << CHIP_LOG_NUM_MSHIMS()); #endif } else { step_size = STRIPE_WIDTH; load_count = (1 << CHIP_LOG_NUM_MSHIMS()); } /* Load the last byte of the buffer. */ p = (char *)buffer + size - 1; force_load(p); /* Bump down to the end of the previous stripe or cache line. */ p -= step_size; p = (char *)((unsigned long)p | (step_size - 1)); /* Figure out how far back we need to go. */ base = p - (step_size * (load_count - 2)); if ((unsigned long)base < (unsigned long)buffer) base = buffer; /* Fire all the loads we need. */ for (; p >= base; p -= step_size) force_load(p); /* * Repeat, but with finv's instead of loads, to get rid of the * data we just loaded into our own cache and the old home L3. * The finv's are guaranteed not to actually flush the data in * the buffer back to their home, since we just read it, so the * lines are clean in cache; we will only invalidate those lines. */ p = (char *)buffer + size - 1; __insn_finv(p); p -= step_size; p = (char *)((unsigned long)p | (step_size - 1)); for (; p >= base; p -= step_size) __insn_finv(p); /* Wait for these finv's (and thus the first finvs) to be done. */ __insn_mf(); #ifdef __tilegx__ /* Reenable the prefetcher. */ __insn_mtspr(SPR_DSTREAM_PF, old_dstream_pf); #endif }
/* * The interrupt handling path, implemented in terms of HV interrupt * emulation on TILE64 and TILEPro, and IPI hardware on TILE-Gx. */ void tile_dev_intr(struct pt_regs *regs, int intnum) { int depth = __get_cpu_var(irq_depth)++; unsigned long original_irqs; unsigned long remaining_irqs; struct pt_regs *old_regs; #if CHIP_HAS_IPI() /* * Pending interrupts are listed in an SPR. We might be * nested, so be sure to only handle irqs that weren't already * masked by a previous interrupt. Then, mask out the ones * we're going to handle. */ unsigned long masked = __insn_mfspr(SPR_IPI_MASK_K); original_irqs = __insn_mfspr(SPR_IPI_EVENT_K) & ~masked; __insn_mtspr(SPR_IPI_MASK_SET_K, original_irqs); #else /* * Hypervisor performs the equivalent of the Gx code above and * then puts the pending interrupt mask into a system save reg * for us to find. */ original_irqs = __insn_mfspr(SPR_SYSTEM_SAVE_K_3); #endif remaining_irqs = original_irqs; /* Track time spent here in an interrupt context. */ old_regs = set_irq_regs(regs); irq_enter(); #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: less than 1/8th stack free? */ { long sp = stack_pointer - (long) current_thread_info(); if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) { pr_emerg("tile_dev_intr: " "stack overflow: %ld\n", sp - sizeof(struct thread_info)); dump_stack(); } } #endif while (remaining_irqs) { unsigned long irq = __ffs(remaining_irqs); remaining_irqs &= ~(1UL << irq); /* Count device irqs; Linux IPIs are counted elsewhere. */ if (irq != IRQ_RESCHEDULE) __get_cpu_var(irq_stat).irq_dev_intr_count++; generic_handle_irq(irq); } /* * If we weren't nested, turn on all enabled interrupts, * including any that were reenabled during interrupt * handling. */ if (depth == 0) unmask_irqs(~__get_cpu_var(irq_disable_mask)); __get_cpu_var(irq_depth)--; /* * Track time spent against the current process again and * process any softirqs if they are waiting. */ irq_exit(); set_irq_regs(old_regs); }
/* * This routine handles page faults. It determines the address, and the * problem, and then passes it handle_page_fault() for normal DTLB and * ITLB issues, and for DMA or SN processor faults when we are in user * space. For the latter, if we're in kernel mode, we just save the * interrupt away appropriately and return immediately. We can't do * page faults for user code while in kernel mode. */ void do_page_fault(struct pt_regs *regs, int fault_num, unsigned long address, unsigned long write) { int is_page_fault; #ifdef CONFIG_KPROBES /* * This is to notify the fault handler of the kprobes. The * exception code is redundant as it is also carried in REGS, * but we pass it anyhow. */ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1, regs->faultnum, SIGSEGV) == NOTIFY_STOP) return; #endif #ifdef __tilegx__ /* * We don't need early do_page_fault_ics() support, since unlike * Pro we don't need to worry about unlocking the atomic locks. * There is only one current case in GX where we touch any memory * under ICS other than our own kernel stack, and we handle that * here. (If we crash due to trying to touch our own stack, * we're in too much trouble for C code to help out anyway.) */ if (write & ~1) { unsigned long pc = write & ~1; if (pc >= (unsigned long) __start_unalign_asm_code && pc < (unsigned long) __end_unalign_asm_code) { struct thread_info *ti = current_thread_info(); /* * Our EX_CONTEXT is still what it was from the * initial unalign exception, but now we've faulted * on the JIT page. We would like to complete the * page fault however is appropriate, and then retry * the instruction that caused the unalign exception. * Our state has been "corrupted" by setting the low * bit in "sp", and stashing r0..r3 in the * thread_info area, so we revert all of that, then * continue as if this were a normal page fault. */ regs->sp &= ~1UL; regs->regs[0] = ti->unalign_jit_tmp[0]; regs->regs[1] = ti->unalign_jit_tmp[1]; regs->regs[2] = ti->unalign_jit_tmp[2]; regs->regs[3] = ti->unalign_jit_tmp[3]; write &= 1; } else { pr_alert("%s/%d: ICS set at page fault at %#lx: %#lx\n", current->comm, current->pid, pc, address); show_regs(regs); do_group_exit(SIGKILL); return; } } #else /* This case should have been handled by do_page_fault_ics(). */ BUG_ON(write & ~1); #endif #if CHIP_HAS_TILE_DMA() /* * If it's a DMA fault, suspend the transfer while we're * handling the miss; we'll restart after it's handled. If we * don't suspend, it's possible that this process could swap * out and back in, and restart the engine since the DMA is * still 'running'. */ if (fault_num == INT_DMATLB_MISS || fault_num == INT_DMATLB_ACCESS || fault_num == INT_DMATLB_MISS_DWNCL || fault_num == INT_DMATLB_ACCESS_DWNCL) { __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK); while (__insn_mfspr(SPR_DMA_USER_STATUS) & SPR_DMA_STATUS__BUSY_MASK) ; } #endif /* Validate fault num and decide if this is a first-time page fault. */ switch (fault_num) { case INT_ITLB_MISS: case INT_DTLB_MISS: #if CHIP_HAS_TILE_DMA() case INT_DMATLB_MISS: case INT_DMATLB_MISS_DWNCL: #endif is_page_fault = 1; break; case INT_DTLB_ACCESS: #if CHIP_HAS_TILE_DMA() case INT_DMATLB_ACCESS: case INT_DMATLB_ACCESS_DWNCL: #endif is_page_fault = 0; break; default: panic("Bad fault number %d in do_page_fault", fault_num); } #if CHIP_HAS_TILE_DMA() if (!user_mode(regs)) { struct async_tlb *async; switch (fault_num) { #if CHIP_HAS_TILE_DMA() case INT_DMATLB_MISS: case INT_DMATLB_ACCESS: case INT_DMATLB_MISS_DWNCL: case INT_DMATLB_ACCESS_DWNCL: async = ¤t->thread.dma_async_tlb; break; #endif default: async = NULL; } if (async) { /* * No vmalloc check required, so we can allow * interrupts immediately at this point. */ local_irq_enable(); set_thread_flag(TIF_ASYNC_TLB); if (async->fault_num != 0) { panic("Second async fault %d;" " old fault was %d (%#lx/%ld)", fault_num, async->fault_num, address, write); } BUG_ON(fault_num == 0); async->fault_num = fault_num; async->is_fault = is_page_fault; async->is_write = write; async->address = address; return; } } #endif handle_page_fault(regs, fault_num, is_page_fault, address, write); }
static void save_arch_state(struct thread_struct *t) { #if CHIP_HAS_SPLIT_INTR_MASK() t->interrupt_mask = __insn_mfspr(SPR_INTERRUPT_MASK_0_0) | ((u64)__insn_mfspr(SPR_INTERRUPT_MASK_0_1) << 32); #else t->interrupt_mask = __insn_mfspr(SPR_INTERRUPT_MASK_0); #endif t->ex_context[0] = __insn_mfspr(SPR_EX_CONTEXT_0_0); t->ex_context[1] = __insn_mfspr(SPR_EX_CONTEXT_0_1); t->system_save[0] = __insn_mfspr(SPR_SYSTEM_SAVE_0_0); t->system_save[1] = __insn_mfspr(SPR_SYSTEM_SAVE_0_1); t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2); t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3); t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS); #if CHIP_HAS_PROC_STATUS_SPR() t->proc_status = __insn_mfspr(SPR_PROC_STATUS); #endif #if !CHIP_HAS_FIXED_INTVEC_BASE() t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0); #endif #if CHIP_HAS_TILE_RTF_HWM() t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM); #endif #if CHIP_HAS_DSTREAM_PF() t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); #endif }
/* * Flush and invalidate a VA range that is homed remotely on a single * core (if "!hfh") or homed via hash-for-home (if "hfh"), waiting * until the memory controller holds the flushed values. */ void finv_buffer_remote(void *buffer, size_t size, int hfh) { char *p, *base; size_t step_size, load_count; const unsigned long STRIPE_WIDTH = 8192; #ifdef __tilegx__ /* * On TILE-Gx, we must disable the dstream prefetcher before doing * a cache flush; otherwise, we could end up with data in the cache * that we don't want there. Note that normally we'd do an mf * after the SPR write to disabling the prefetcher, but we do one * below, before any further loads, so there's no need to do it * here. */ uint_reg_t old_dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); __insn_mtspr(SPR_DSTREAM_PF, 0); #endif /* * Flush and invalidate the buffer out of the local L1/L2 * and request the home cache to flush and invalidate as well. */ __finv_buffer(buffer, size); /* * Wait for the home cache to acknowledge that it has processed * all the flush-and-invalidate requests. This does not mean * that the flushed data has reached the memory controller yet, * but it does mean the home cache is processing the flushes. */ __insn_mf(); /* * Issue a load to the last cache line, which can't complete * until all the previously-issued flushes to the same memory * controller have also completed. If we weren't striping * memory, that one load would be sufficient, but since we may * be, we also need to back up to the last load issued to * another memory controller, which would be the point where * we crossed an 8KB boundary (the granularity of striping * across memory controllers). Keep backing up and doing this * until we are before the beginning of the buffer, or have * hit all the controllers. * * If we are flushing a hash-for-home buffer, it's even worse. * Each line may be homed on a different tile, and each tile * may have up to four lines that are on different * controllers. So as we walk backwards, we have to touch * enough cache lines to satisfy these constraints. In * practice this ends up being close enough to "load from * every cache line on a full memory stripe on each * controller" that we simply do that, to simplify the logic. * * FIXME: See bug 9535 for some issues with this code. */ if (hfh) { step_size = L2_CACHE_BYTES; load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * (1 << CHIP_LOG_NUM_MSHIMS()); } else { step_size = STRIPE_WIDTH; load_count = (1 << CHIP_LOG_NUM_MSHIMS()); } /* Load the last byte of the buffer. */ p = (char *)buffer + size - 1; force_load(p); /* Bump down to the end of the previous stripe or cache line. */ p -= step_size; p = (char *)((unsigned long)p | (step_size - 1)); /* Figure out how far back we need to go. */ base = p - (step_size * (load_count - 2)); if ((long)base < (long)buffer) base = buffer; /* * Fire all the loads we need. The MAF only has eight entries * so we can have at most eight outstanding loads, so we * unroll by that amount. */ #pragma unroll 8 for (; p >= base; p -= step_size) force_load(p); /* * Repeat, but with inv's instead of loads, to get rid of the * data we just loaded into our own cache and the old home L3. * No need to unroll since inv's don't target a register. */ p = (char *)buffer + size - 1; __insn_inv(p); p -= step_size; p = (char *)((unsigned long)p | (step_size - 1)); for (; p >= base; p -= step_size) __insn_inv(p); /* Wait for the load+inv's (and thus finvs) to have completed. */ __insn_mf(); #ifdef __tilegx__ /* Reenable the prefetcher. */ __insn_mtspr(SPR_DSTREAM_PF, old_dstream_pf); #endif }