/** * Appends along the last dimensions. */ static hid_t make_dataset(ndio_hdf5_t self,nd_type_id_t type_id,unsigned ndim,size_t *shape, hid_t* filespace) { hsize_t *sh=0,*ori=0,*ext=0; TRY(self->isw); STACK_ALLOC(hsize_t,sh ,ndim); STACK_ALLOC(hsize_t,ori,ndim); STACK_ALLOC(hsize_t,ext,ndim); if(self->dataset>=0) // data set already exists...needs extending, append on slowest dim { HTRY(H5Sget_simple_extent_dims(space(self),sh,NULL)); ZERO(hsize_t,ori,ndim); ori[0]=sh[0]; sh[0]+=shape[ndim-1]; reverse_hsz_sz(ndim,ext,shape); HTRY(H5Dextend(self->dataset,sh)); HTRY(*filespace=H5Dget_space(self->dataset)); HTRY(H5Sselect_hyperslab(*filespace,H5S_SELECT_SET,ori,NULL,ext,NULL)); } else { HTRY(self->dataset=H5Dcreate( self->file,name(self), nd_to_hdf5_type(type_id), make_space(self,ndim,shape), H5P_DEFAULT,/*(rare) link creation props*/ dataset_creation_properties( /*set_deflate*/( set_chunk(self,ndim,shape))), H5P_DEFAULT /*(rare) dataset access props*/ )); reverse_hsz_sz(ndim,sh,shape); *filespace=H5S_ALL; } HTRY(H5Dset_extent(self->dataset,sh)); return self->dataset; Error: return -1; }
int exec_setup_stack(struct lwp *l, struct exec_package *epp) { u_long max_stack_size; u_long access_linear_min, access_size; u_long noaccess_linear_min, noaccess_size; #ifndef USRSTACK32 #define USRSTACK32 (0x00000000ffffffffL&~PGOFSET) #endif if (epp->ep_flags & EXEC_32) { epp->ep_minsaddr = USRSTACK32; max_stack_size = MAXSSIZ; } else { epp->ep_minsaddr = USRSTACK; max_stack_size = MAXSSIZ; } epp->ep_ssize = l->l_proc->p_rlimit[RLIMIT_STACK].rlim_cur; #ifdef PAX_ASLR pax_aslr_stack(l, epp, &max_stack_size); #endif /* PAX_ASLR */ l->l_proc->p_stackbase = epp->ep_minsaddr; epp->ep_maxsaddr = (u_long)STACK_GROW(epp->ep_minsaddr, max_stack_size); /* * set up commands for stack. note that this takes *two*, one to * map the part of the stack which we can access, and one to map * the part which we can't. * * arguably, it could be made into one, but that would require the * addition of another mapping proc, which is unnecessary */ access_size = epp->ep_ssize; access_linear_min = (u_long)STACK_ALLOC(epp->ep_minsaddr, access_size); noaccess_size = max_stack_size - access_size; noaccess_linear_min = (u_long)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr, access_size), noaccess_size); if (noaccess_size > 0 && noaccess_size <= MAXSSIZ) { NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size, noaccess_linear_min, NULL, 0, VM_PROT_NONE, VMCMD_STACK); } KASSERT(access_size > 0 && access_size <= MAXSSIZ); NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, access_size, access_linear_min, NULL, 0, VM_PROT_READ | VM_PROT_WRITE, VMCMD_STACK); return 0; }
static hid_t make_space(ndio_hdf5_t self,unsigned ndims,size_t *shape) { hsize_t *maxdims=0,*dims=0; if(self->space!=-1) // may alread exist (eg. for an append). In this case reset the object. { HTRY(H5Sclose(self->space)); self->space=-1; } TRY(self->isw); STACK_ALLOC(hsize_t,maxdims,ndims); STACK_ALLOC(hsize_t,dims,ndims); { unsigned i; for(i=0;i<ndims;++i) { maxdims[i]=H5S_UNLIMITED; dims[ndims-1-i]=shape[i]; } } return self->space=H5Screate_simple(ndims,dims,maxdims); Error: return -1; }
int darwin_exec_setup_stack(struct lwp *l, struct exec_package *epp) { u_long max_stack_size; u_long access_linear_min, access_size; u_long noaccess_linear_min, noaccess_size; if (epp->ep_flags & EXEC_32) { epp->ep_minsaddr = DARWIN_USRSTACK32; max_stack_size = MAXSSIZ; } else { epp->ep_minsaddr = DARWIN_USRSTACK; max_stack_size = MAXSSIZ; } epp->ep_maxsaddr = (u_long)STACK_GROW(epp->ep_minsaddr, max_stack_size); epp->ep_ssize = l->l_proc->p_rlimit[RLIMIT_STACK].rlim_cur; /* * set up commands for stack. note that this takes *two*, one to * map the part of the stack which we can access, and one to map * the part which we can't. * * arguably, it could be made into one, but that would require the * addition of another mapping proc, which is unnecessary */ access_size = epp->ep_ssize; access_linear_min = (u_long)STACK_ALLOC(epp->ep_minsaddr, access_size); noaccess_size = max_stack_size - access_size; noaccess_linear_min = (u_long)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr, access_size), noaccess_size); if (noaccess_size > 0) { NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size, noaccess_linear_min, NULL, 0, VM_PROT_NONE, VMCMD_STACK); } KASSERT(access_size > 0); NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, access_size, access_linear_min, NULL, 0, VM_PROT_READ | VM_PROT_WRITE, VMCMD_STACK); return 0; }
static unsigned hdf5_subarray(ndio_t file,nd_t dst,size_t *pos,size_t *step) { ndio_hdf5_t self=(ndio_hdf5_t)ndioContext(file); hsize_t *pos_,*shape_,*step_; hid_t m=-1,f=-1; STACK_ALLOC(hsize_t,pos_,ndndim(dst)); STACK_ALLOC(hsize_t,shape_,ndndim(dst)); STACK_ALLOC(hsize_t,step_,ndndim(dst)); reverse_hsz_sz(ndndim(dst),shape_,ndshape(dst)); reverse_hsz_sz(ndndim(dst),pos_,pos); reverse_hsz_sz(ndndim(dst),step_,step); HTRY(f=H5Dget_space(dataset(self))); HTRY(H5Sselect_hyperslab(f,H5S_SELECT_SET,pos_,step_,shape_,NULL/*block*/)); HTRY(m=H5Screate_simple(ndndim(dst),shape_,NULL)); HTRY(H5Dread(dataset(self),dtype(self),m,f,H5P_DEFAULT,nddata(dst))); H5Sclose(f); return 1; Error: if(f>-1) H5Sclose(f); if(m>-1) H5Sclose(m); return 0; }
int pass_audio_output(Filter_Audio *f_a, const int16_t *data, unsigned int samples) { if (!f_a || (!f_a->echo_enabled && !f_a->gain_enabled)) { return -1; } unsigned int nsx_samples = f_a->fs / 100; if (!samples || (samples % nsx_samples) != 0) { return -1; } _Bool resample = 0; unsigned int resampled_samples = 0; if (f_a->fs != 16000) { samples = (samples / nsx_samples) * 160; nsx_samples = 160; resample = 1; } unsigned int temp_samples = samples; float *d_f = (float *)STACK_ALLOC( nsx_samples * sizeof(float) + nsx_samples * sizeof(int16_t) ); while (temp_samples) { if (resample) { int16_t *d = (int16_t *)(d_f + nsx_samples); downsample_audio_echo_in(f_a, d, data + resampled_samples); if (WebRtcAgc_AddFarend(f_a->gain_control, d, nsx_samples) == -1) return -1; S16ToFloatS16(d, nsx_samples, d_f); resampled_samples += f_a->fs / 100; } else { S16ToFloatS16(data + (samples - temp_samples), nsx_samples, d_f); } if (WebRtcAec_BufferFarend(f_a->echo_cancellation, d_f, nsx_samples) == -1) { return -1; } temp_samples -= nsx_samples; } return 0; }
static ndio_hdf5_t set_chunk(ndio_hdf5_t self, unsigned ndim, size_t *shape) { hsize_t *sh; hid_t out; STACK_ALLOC(hsize_t,sh,ndim); reverse_hsz_sz(ndim,sh,shape); { int i=0; const int t=max(((int)ndim)-3,0); for(i=0;i<t && prod_hsz(sh,ndim)>CHUNK_THRESH;++i) sh[i]=1; // flatten outer dimensions while(prod_hsz(sh,ndim)>CHUNK_THRESH) { for(i=t;i<(int)ndim && prod_hsz(sh,ndim)>CHUNK_THRESH;++i) sh[i]/=2; //halve the remaining 1 or 2 dimensions } } HTRY(H5Pset_chunk(out=dataset_creation_properties(self),ndim,sh)); return self; Error: return 0; }
static nd_t hdf5_shape(ndio_t file) { hid_t s; nd_t out=0; unsigned ndims; hsize_t *sh=0; ndio_hdf5_t self=(ndio_hdf5_t)ndioContext(file); TRY(self->isr); TRY((s=space(self))>-1); TRY(out=ndinit()); ndcast(out,hdf5_to_nd_type(dtype(self))); HTRY(ndims=H5Sget_simple_extent_ndims(space(self))); STACK_ALLOC(hsize_t,sh,ndims); HTRY(H5Sget_simple_extent_dims(space(self),sh,NULL)); { unsigned i; for(i=0;i<ndims;++i) ndShapeSet(out,ndims-1-i,sh[i]); } return out; Error: ndfree(out); return 0; }
/* * create_elf_tables */ static int create_elf_tables_aux(struct linux_binprm *bprm, unsigned long ntdll_load_addr, elf_off_t ntdll_phoff, elf_half_t ntdll_phnum, unsigned long ntdll_start_thunk, unsigned long exeso_load_addr, elf_off_t exeso_phoff, elf_half_t exeso_phnum, unsigned long exeso_start_thunk, unsigned long interp_load_addr, unsigned long interp_entry, unsigned long init_entry) { unsigned long p = bprm->p; int argc = bprm->argc; int envc = bprm->envc; elf_addr_t __user *argv; elf_addr_t __user *envp; elf_addr_t __user *sp; elf_addr_t __user *u_platform; const char *k_platform = ELF_PLATFORM; int items; elf_addr_t *elf_info; elf_addr_t *elf_info2; int ei_index = 0; const struct cred *cred = current_cred(); /* * If this architecture has a platform capability string, copy it * to userspace. In some cases (Sparc), this info is impossible * for userspace to get any other way, in others (i386) it is * merely difficult. */ u_platform = NULL; if (k_platform) { size_t len = strlen(k_platform) + 1; /* * In some cases (e.g. Hyper-Threading), we want to avoid L1 * evictions by the processes running on the same package. One * thing we can do is to shuffle the initial stack for them. */ p = arch_align_stack(p); u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len); if (__copy_to_user(u_platform, k_platform, len)) return -EFAULT; } /* Create the ELF interpreter info */ elf_info = (elf_addr_t *) current->mm->saved_auxv; #define NEW_AUX_ENT(id, val) \ do { elf_info[ei_index++] = id; elf_info[ei_index++] = val; } while (0) #ifdef ARCH_DLINFO11 /* * ARCH_DLINFO must come first so PPC can do its special alignment of * AUXV. */ ARCH_DLINFO; #endif NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP); NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE); NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); NEW_AUX_ENT(AT_PHDR, ntdll_load_addr + ntdll_phoff); NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr)); NEW_AUX_ENT(AT_PHNUM, ntdll_phnum); NEW_AUX_ENT(AT_BASE, interp_load_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, ntdll_start_thunk); NEW_AUX_ENT(AT_UID, cred->uid); NEW_AUX_ENT(AT_EUID, cred->euid); NEW_AUX_ENT(AT_GID, cred->gid); NEW_AUX_ENT(AT_EGID, cred->egid); NEW_AUX_ENT(AT_SECURE, (elf_addr_t) security_bprm_secureexec(bprm)); #if 0 if (k_platform) { /* FIXME */ NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t)(unsigned long)u_platform); } #endif if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { NEW_AUX_ENT(AT_EXECFD, (elf_addr_t) bprm->interp_data); } #undef NEW_AUX_ENT /* AT_NULL is zero; clear the rest too */ memset(&elf_info[ei_index], 0, sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]); /* And advance past the AT_NULL entry. */ ei_index += 2; sp = STACK_ADD(p, ei_index * 2); items = (argc + 1) + (envc + 1); items += 1; /* ELF interpreters only put argc on the stack */ items += 3; /* interp entry address & _init address & load_base */ bprm->p = STACK_ROUND(sp, items); /* Point sp at the lowest address on the stack */ #ifdef CONFIG_STACK_GROWSUP sp = (elf_addr_t __user *)bprm->p - items - ei_index; bprm->exec = (unsigned long) sp; /* XXX: PARISC HACK */ #else sp = (elf_addr_t __user *)bprm->p; #endif /* Now, let's put argc (and argv, envp if appropriate) on the stack */ if (__put_user(argc, sp)) return -EFAULT; ++sp; argv = sp; envp = argv + argc + 1; /* Populate argv and envp */ p = current->mm->arg_end = current->mm->arg_start; while (argc-- > 0) { size_t len; __put_user((elf_addr_t)p, argv); ++argv; len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); if (!len || len > MAX_ARG_STRLEN) return 0; p += len; } if (__put_user(0, argv)) return -EFAULT; current->mm->arg_end = current->mm->env_start = p; while (envc-- > 0) { size_t len; __put_user((elf_addr_t)p, envp); ++envp; len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); if (!len || len > MAX_ARG_STRLEN) return 0; p += len; } if (__put_user(0, envp)) return -EFAULT; current->mm->env_end = p; /* Put the elf_info on the stack in the right place. */ sp = (elf_addr_t __user *)envp + 1; if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t))) return -EFAULT; sp += ei_index; elf_info2 = (elf_addr_t *)kmalloc(sizeof(current->mm->saved_auxv), GFP_KERNEL); if(!elf_info2) return -ENOMEM; ei_index = 0; #define NEW_AUX_ENT(id, val) \ do { elf_info2[ei_index++] = id; elf_info2[ei_index++] = val; } while (0) #ifdef ARCH_DLINFO11 /* * ARCH_DLINFO must come first so PPC can do its special alignment of * AUXV. */ ARCH_DLINFO; #endif NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP); NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE); NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); NEW_AUX_ENT(AT_PHDR, exeso_load_addr + exeso_phoff); NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr)); NEW_AUX_ENT(AT_PHNUM, exeso_phnum); NEW_AUX_ENT(AT_BASE, interp_load_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exeso_start_thunk); NEW_AUX_ENT(AT_UID, cred->uid); NEW_AUX_ENT(AT_EUID, cred->euid); NEW_AUX_ENT(AT_GID, cred->gid); NEW_AUX_ENT(AT_EGID, cred->egid); NEW_AUX_ENT(AT_SECURE, (elf_addr_t) security_bprm_secureexec(bprm)); #if 0 if (k_platform) { /* FIXME */ NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t)(unsigned long)u_platform); } #endif if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { NEW_AUX_ENT(AT_EXECFD, (elf_addr_t) bprm->interp_data); } #undef NEW_AUX_ENT /* AT_NULL is zero; clear the rest too */ memset(&elf_info2[ei_index], 0, sizeof(current->mm->saved_auxv) - ei_index * sizeof elf_info2[0]); ei_index += 2; if (copy_to_user(sp, elf_info2, ei_index * sizeof(elf_addr_t))) { kfree(elf_info2); return -EFAULT; } kfree(elf_info2); sp += ei_index; __put_user(interp_entry, sp); ++sp; __put_user(init_entry, sp); ++sp; __put_user(exeso_load_addr, sp); memset(current->mm->saved_auxv, 0, sizeof(current->mm->saved_auxv)); return 0; } /* end create_elf_tables */
int filter_audio(Filter_Audio *f_a, int16_t *data, unsigned int samples) { if (!f_a) { return -1; } unsigned int nsx_samples = f_a->fs / 100; if (!samples || (samples % nsx_samples) != 0) { return -1; } _Bool resample = 0; unsigned int resampled_samples = 0; if (f_a->fs != 16000) { samples = (samples / nsx_samples) * 160; nsx_samples = 160; resample = 1; } unsigned int temp_samples = samples; unsigned int smp = f_a->fs / 100; int novoice = 1; int16_t *d_l = (int16_t *)STACK_ALLOC(nsx_samples * (2 * sizeof(int16_t) + 2 * sizeof(float)) + smp * sizeof(float)); int16_t *temp = d_l + nsx_samples; float *d_f_l = (float *)(temp + nsx_samples); float *d_f_h = d_f_l + nsx_samples; float *d_f_u = d_f_h + nsx_samples; while (temp_samples) { int16_t *d_h = NULL; memset(temp, 0, nsx_samples*sizeof(int16_t)); if (resample) { d_h = temp; downsample_audio(f_a, d_l, d_h, data + resampled_samples, smp); } else { memcpy(d_l, data + (samples - temp_samples), nsx_samples * sizeof(int16_t)); } if(f_a->vad_enabled){ if(WebRtcVad_Process(f_a->Vad_handle, 16000, d_l, nsx_samples) == 1){ novoice = 0; } } else { novoice = 0; } if (f_a->gain_enabled) { int32_t inMicLevel = 128, outMicLevel; if (WebRtcAgc_VirtualMic(f_a->gain_control, d_l, d_h, nsx_samples, inMicLevel, &outMicLevel) == -1) return -1; } S16ToFloatS16(d_l, nsx_samples, d_f_l); memset(d_f_h, 0, nsx_samples*sizeof(float)); if (resample) { S16ToFloatS16(d_h, nsx_samples, d_f_h); } if (f_a->echo_enabled) { if (WebRtcAec_Process(f_a->echo_cancellation, d_f_l, d_f_h, d_f_l, d_f_h, nsx_samples, f_a->msInSndCardBuf, 0) == -1) { return -1; } if (resample) { FloatS16ToS16(d_f_h, nsx_samples, d_h); } FloatS16ToS16(d_f_l, nsx_samples, d_l); } if (f_a->noise_enabled) { if (WebRtcNsx_Process(f_a->noise_sup_x, d_l, d_h, d_l, d_h) == -1) { return -1; } } if (f_a->gain_enabled) { int32_t inMicLevel = 128, outMicLevel; uint8_t saturationWarning; if (WebRtcAgc_Process(f_a->gain_control, d_l, d_h, nsx_samples, d_l, d_h, inMicLevel, &outMicLevel, 0, &saturationWarning) == -1) { return -1; } } if (resample) { upsample_audio(f_a, data + resampled_samples, smp, d_l, d_h, nsx_samples); S16ToFloat(data + resampled_samples, smp, d_f_u); run_filter_zam(&f_a->hpfa, d_f_u, smp); run_filter_zam(&f_a->hpfb, d_f_u, smp); if (f_a->lowpass_enabled) { run_filter_zam(&f_a->lpfa, d_f_u, smp); run_filter_zam(&f_a->lpfb, d_f_u, smp); } run_saturator_zam(d_f_u, smp); FloatToS16(d_f_u, smp, data + resampled_samples); resampled_samples += smp; } else { S16ToFloat(d_l, nsx_samples, d_f_l); run_filter_zam(&f_a->hpfa, d_f_l, nsx_samples); run_filter_zam(&f_a->hpfb, d_f_l, nsx_samples); if (f_a->lowpass_enabled) { run_filter_zam(&f_a->lpfa, d_f_l, nsx_samples); run_filter_zam(&f_a->lpfb, d_f_l, nsx_samples); } run_saturator_zam(d_f_l, nsx_samples); FloatToS16(d_f_l, nsx_samples, d_l); memcpy(data + (samples - temp_samples), d_l, nsx_samples * sizeof(int16_t)); } temp_samples -= nsx_samples; } return !novoice; }
void NAME(char *UPLO, char *TRANS, char *DIAG, blasint *N, FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX){ char uplo_arg = *UPLO; char trans_arg = *TRANS; char diag_arg = *DIAG; blasint n = *N; blasint lda = *LDA; blasint incx = *INCX; blasint info; int uplo; int unit; int trans, buffer_size; FLOAT *buffer; #ifdef SMP int nthreads; #endif PRINT_DEBUG_NAME; TOUPPER(uplo_arg); TOUPPER(trans_arg); TOUPPER(diag_arg); trans = -1; unit = -1; uplo = -1; if (trans_arg == 'N') trans = 0; if (trans_arg == 'T') trans = 1; if (trans_arg == 'R') trans = 2; if (trans_arg == 'C') trans = 3; if (diag_arg == 'U') unit = 0; if (diag_arg == 'N') unit = 1; if (uplo_arg == 'U') uplo = 0; if (uplo_arg == 'L') uplo = 1; info = 0; if (incx == 0) info = 8; if (lda < MAX(1, n)) info = 6; if (n < 0) info = 4; if (unit < 0) info = 3; if (trans < 0) info = 2; if (uplo < 0) info = 1; if (info != 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint n, FLOAT *a, blasint lda, FLOAT *x, blasint incx) { int trans, uplo, unit, buffer_size; blasint info; FLOAT *buffer; #ifdef SMP int nthreads; #endif PRINT_DEBUG_CNAME; unit = -1; uplo = -1; trans = -1; info = 0; if (order == CblasColMajor) { if (Uplo == CblasUpper) uplo = 0; if (Uplo == CblasLower) uplo = 1; if (TransA == CblasNoTrans) trans = 0; if (TransA == CblasTrans) trans = 1; if (TransA == CblasConjNoTrans) trans = 2; if (TransA == CblasConjTrans) trans = 3; if (Diag == CblasUnit) unit = 0; if (Diag == CblasNonUnit) unit = 1; info = -1; if (incx == 0) info = 8; if (lda < MAX(1, n)) info = 6; if (n < 0) info = 4; if (unit < 0) info = 3; if (trans < 0) info = 2; if (uplo < 0) info = 1; } if (order == CblasRowMajor) { if (Uplo == CblasUpper) uplo = 1; if (Uplo == CblasLower) uplo = 0; if (TransA == CblasNoTrans) trans = 1; if (TransA == CblasTrans) trans = 0; if (TransA == CblasConjNoTrans) trans = 3; if (TransA == CblasConjTrans) trans = 2; if (Diag == CblasUnit) unit = 0; if (Diag == CblasNonUnit) unit = 1; info = -1; if (incx == 0) info = 8; if (lda < MAX(1, n)) info = 6; if (n < 0) info = 4; if (unit < 0) info = 3; if (trans < 0) info = 2; if (uplo < 0) info = 1; } if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #endif if (n == 0) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0 ) x -= (n - 1) * incx * 2; #ifdef SMP // Calibrated on a Xeon E5-2630 if(1L * n * n > 36L * sizeof(FLOAT) * sizeof(FLOAT) * GEMM_MULTITHREAD_THRESHOLD) { nthreads = num_cpu_avail(2); if(nthreads > 2 && 1L * n * n < 64L * sizeof(FLOAT) * sizeof(FLOAT) * GEMM_MULTITHREAD_THRESHOLD) nthreads = 2; } else nthreads = 1; if(nthreads > 1) { buffer_size = n > 16 ? 0 : n * 4 + 40; } else #endif { buffer_size = ((n - 1) / DTB_ENTRIES) * 2 * DTB_ENTRIES + 32 / sizeof(FLOAT); if(incx != 1) buffer_size += n * 2; } STACK_ALLOC(buffer_size, FLOAT, buffer); #ifdef SMP if (nthreads == 1) { #endif (trmv[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer); #ifdef SMP } else { (trmv_thread[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer, nthreads); } #endif STACK_FREE(buffer); FUNCTION_PROFILE_END(4, n * n / 2 + n, n * n); IDEBUG_END; return; }
/** // // Handle thread creation during when address space is available // and we can recover from faults (from bad user pointers...) //*/ void rdecl thread_specret(THREAD *thp) { struct _thread_local_storage *tsp; const struct _thread_attr *attr; void *init_cc; uintptr_t stack_top; uintptr_t new_sp; int verify; thp->status = (void *)EFAULT; if((attr = thp->args.wa.attr)) { //RD_VERIFY_PTR(act, attr, sizeof(*attr)); //RD_PROBE_INT(act, attr, sizeof(*attr) / sizeof(int)); // Check for attributes which we do not support. // If there is a stack addr there must be a stack size. // If there is a stack size it must be at lease PTHREAD_STACK_MIN. // If EXPLICIT sched, make sure policy and priority are valid. // add validation of the sporadic server attributes if(attr->__flags & PTHREAD_SCOPE_PROCESS) { verify = ENOTSUP; } else if((attr->__stackaddr || attr->__stacksize) && attr->__stacksize < PTHREAD_STACK_MIN) { verify = EINVAL; } else if(attr->__flags & PTHREAD_EXPLICIT_SCHED) { verify = kerschedok(thp, attr->__policy, (struct sched_param *)&attr->__param); } else { verify = EOK; } if(verify != EOK) { lock_kernel(); thp->status = (void *)verify; thp->flags |= (_NTO_TF_KILLSELF | _NTO_TF_ONLYME); return; // RUSH3: this comes out in loader_exit() but EINTR overridden } } // Check if we need to allocate a stack if(!(thp->flags & _NTO_TF_ALLOCED_STACK)) { uintptr_t guardsize = 0; unsigned lazystate = 0; unsigned prealloc = 0; if(attr) { // Get the user requested values. thp->un.lcl.stackaddr = attr->__stackaddr; thp->un.lcl.stacksize = attr->__stacksize; if(attr->__stackaddr != NULL && !WR_PROBE_PTR(thp, thp->un.lcl.stackaddr, thp->un.lcl.stacksize)) { lock_kernel(); thp->status = (void *)EINVAL; thp->flags |= (_NTO_TF_KILLSELF | _NTO_TF_ONLYME); return; } guardsize = attr->__guardsize; prealloc = attr->__prealloc; lazystate = attr->__flags & PTHREAD_NOTLAZYSTACK_MASK; } if(thp->un.lcl.stacksize == 0) { if(__cpu_flags & CPU_FLAG_MMU) { thp->un.lcl.stacksize = DEF_VIRTUAL_THREAD_STACKSIZE; } else { thp->un.lcl.stacksize = DEF_PHYSICAL_THREAD_STACKSIZE; } } if(!thp->un.lcl.stackaddr) { lock_kernel(); if(thp->process->pid != PROCMGR_PID && procmgr.process_stack_code) { unspecret_kernel(); if(thp->state != STATE_STACK) { // Must do modification of user address spaces at process time struct sigevent event; CRASHCHECK(thp != actives[KERNCPU]); event.sigev_notify = SIGEV_PULSE; event.sigev_coid = PROCMGR_COID; event.sigev_value.sival_int = SYNC_OWNER(thp); event.sigev_priority = thp->priority; event.sigev_code = procmgr.process_stack_code; if(sigevent_proc(&event)) { // Pulse failed... thp->status = (void *)EAGAIN; thp->flags |= (_NTO_TF_KILLSELF | _NTO_TF_ONLYME); return; } // we may not be running after sigevent_proc() unready(thp, STATE_STACK); thp->prev.thread = (void *)guardsize; thp->next.thread = (void *)lazystate; thp->status = (void *)prealloc; } return; } guardsize = 0; if(procmgr_stack_alloc(thp) != EOK) { thp->status = (void *)EAGAIN; thp->flags |= (_NTO_TF_KILLSELF | _NTO_TF_ONLYME); return; } thp->flags |= _NTO_TF_ALLOCED_STACK; unlock_kernel(); SPECRET_PREEMPT(thp); } } // Inherit or assign a scheduling policy and params. if(attr) { if(attr->__flags & PTHREAD_MULTISIG_DISALLOW) { thp->flags |= _NTO_TF_NOMULTISIG; } thp->args.wa.exitfunc = attr->__exitfunc; } // Clear detach state if there is a parent // Get the *real* attribute structure pointer - we may have // NULL'd out thp->args.wa.attr and then been preempted attr = thp->args.wa.real_attr; if(thp->join && (!attr || !(attr->__flags & PTHREAD_CREATE_DETACHED))) { thp->flags &= ~_NTO_TF_DETACHED; } // Make thread lookups valid lock_kernel(); vector_flag(&thp->process->threads, thp->tid, 0); thp->args.wa.attr = 0; if(actives[KERNCPU] != thp) { return; } // Load the necessary registers for the thread to start execution. stack_top = STACK_INIT((uintptr_t)thp->un.lcl.stackaddr, thp->un.lcl.stacksize); STACK_ALLOC(thp->un.lcl.tls, new_sp, stack_top, sizeof *thp->un.lcl.tls); STACK_ALLOC(init_cc, new_sp, new_sp, STACK_INITIAL_CALL_CONVENTION_USAGE); SETKSP(thp, new_sp); // Could fault again while setting tls in stack... unlock_kernel(); SPECRET_PREEMPT(thp); SET_XFER_HANDLER(&threadstack_fault_handlers); tsp = thp->un.lcl.tls; memset(tsp, 0, sizeof(*tsp)); // Set the inital calling convention usage section to zero - will // help any stack traceback code to determine when it has hit the // top of the stack. memset(init_cc, 0, STACK_INITIAL_CALL_CONVENTION_USAGE); if(attr) { tsp->__flags = attr->__flags & (PTHREAD_CSTATE_MASK|PTHREAD_CTYPE_MASK); } tsp->__arg = thp->args.wa.arg; tsp->__exitfunc = thp->args.wa.exitfunc; if(tsp->__exitfunc == NULL && thp->process->valid_thp != NULL) { /* We don't have thread termination (exitfunc) for this thread. Likely it was created with SIGEV_THREAD. Use the same one as for the valid_thp's. This mostly works since all threads created via pthread_create have the same exit function. */ tsp->__exitfunc = thp->process->valid_thp->un.lcl.tls->__exitfunc; } tsp->__errptr = &tsp->__errval; if(thp->process->pid == PROCMGR_PID) { tsp->__stackaddr = (uint8_t *)thp->un.lcl.stackaddr; } else { tsp->__stackaddr = (uint8_t *)thp->un.lcl.stackaddr + ((attr == NULL) ? 0 : attr->__guardsize); } tsp->__pid = thp->process->pid; tsp->__tid = thp->tid + 1; tsp->__owner = SYNC_OWNER(thp); // Touch additional stack if requested in attr // @@@ NYI // if(attr->guaranteedstacksize) ... SET_XFER_HANDLER(NULL); cpu_thread_waaa(thp); // Let the parent continue. The tid was stuffed during thread_create(). if(thp->join && thp->join->state == STATE_WAITTHREAD) { lock_kernel(); ready(thp->join); thp->join = NULL; } // // Don't change priority until parent thread freed to run again // - we might get a priority inversion otherwise. // if((attr != NULL) && (attr->__flags & PTHREAD_EXPLICIT_SCHED)) { lock_kernel(); if(sched_thread(thp, attr->__policy, (struct sched_param *)&attr->__param) != EOK) { /* We should have some error handling if sched_thread() fails ... thp->status = (void *)EAGAIN; thp->flags |= (_NTO_TF_KILLSELF | _NTO_TF_ONLYME); return; */ } } /* Only done once for the first thread running */ if(thp->process->process_priority == 0) { thp->process->process_priority = thp->priority; } /* a thread is born unto a STOPPED process - make sure it stops too! */ if ( thp->process->flags & (_NTO_PF_DEBUG_STOPPED|_NTO_PF_STOPPED) ) { thp->flags |= _NTO_TF_TO_BE_STOPPED; } thp->flags &= ~_NTO_TF_WAAA; }