static char *tp_from_combined_area(const struct tls_info *info, void *combined_area, size_t tdb_size) { size_t tls_size = info->tdata_size + info->tbss_size; ptrdiff_t tdboff = __nacl_tp_tdb_offset(tdb_size); if (tdboff < 0) { /* * The combined area is big enough to hold the TDB and then be aligned * up to the $tp alignment requirement. If the whole area is aligned * to the $tp requirement, then aligning the beginning of the area * would give us the beginning unchanged, which is not what we need. * Instead, align from the putative end of the TDB, to decide where * $tp--the true end of the TDB--should actually lie. */ return aligned_addr((char *) combined_area + tdb_size, info->tls_alignment); } else { /* * The linker increases the size of the TLS block up to its alignment * requirement, and that total is subtracted from the $tp address to * access the TLS area. To keep that final address properly aligned, * we need to align up from the allocated space and then add the * aligned size. */ tls_size = aligned_size(tls_size, info->tls_alignment); return aligned_addr((char *) combined_area, info->tls_alignment) + tls_size; } }
static inline nc_thread_descriptor_t *nc_get_tdb(void) { /* * Fetch the thread-specific data pointer. This is usually just * a wrapper around __libnacl_irt_tls.tls_get() but we don't use * that here so that the IRT build can override the definition. */ return (void *) ((char *) __nacl_read_tp() + __nacl_tp_tdb_offset(TDB_SIZE)); }
void *__nacl_tls_initialize_memory(void *combined_area, size_t tdb_size) { const struct tls_info *info = get_tls_info(); size_t tls_size = info->tdata_size + info->tbss_size; char *combined_area_end = (char *) combined_area + __nacl_tls_combined_size(tdb_size); void *tp = tp_from_combined_area(info, combined_area, tdb_size); char *start = tp; if (__nacl_tp_tls_offset(0) > 0) { /* * From $tp, we skip the header size and then must round up from * there to the required alignment (which is what the linker will * will do when calculating TPOFF relocations at link time). The * end result is that the offset from $tp matches the one chosen * by the linker exactly and that the final address is aligned to * info->tls_alignment (since $tp was already aligned to at least * that much). */ start += aligned_size(__nacl_tp_tls_offset(tls_size), info->tls_alignment); } else { /* * We'll subtract the aligned size of the TLS block from $tp, which * must itself already be adequately aligned. */ start += __nacl_tp_tls_offset(aligned_size(tls_size, info->tls_alignment)); } /* Sanity check. (But avoid pulling in assert() here.) */ if (start + info->tdata_size + info->tbss_size > combined_area_end) simple_abort(); memcpy(start, info->tdata_start, info->tdata_size); memset(start + info->tdata_size, 0, info->tbss_size); if (__nacl_tp_tdb_offset(tdb_size) == 0) { /* * On x86 (but not on ARM), the TDB sits directly at $tp and the * first word there must hold the $tp pointer itself. */ void *tdb = (char *) tp + __nacl_tp_tdb_offset(tdb_size); *(void **) tdb = tdb; } return tp; }
void *__nacl_tls_data_bss_initialize_from_template(void *combined_area, size_t tdb_size) { if (__nacl_tp_tdb_offset(tdb_size) != 0) { /* * This needs more work for ARM. * For now abort via null pointer dereference. */ while (1) *(volatile int *) 0; } else { void *tdb = aligned_addr(((char *) combined_area) + tls_size + SAFETY_PADDING , tls_align); return _dl_allocate_tls(tdb); } }
static struct nc_combined_tdb *get_irt_tdb(void *thread_ptr) { struct nc_combined_tdb *tdb = (void *) ((uintptr_t) thread_ptr + __nacl_tp_tdb_offset(sizeof(*tdb))); return tdb; }
int pthread_create(pthread_t *thread_id, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) { int retval = EAGAIN; void *esp; /* Declare the variables outside of the while scope. */ nc_thread_memory_block_t *stack_node = NULL; char *thread_stack = NULL; nc_thread_descriptor_t *new_tdb = NULL; nc_basic_thread_data_t *new_basic_data = NULL; nc_thread_memory_block_t *tls_node = NULL; size_t stacksize = PTHREAD_STACK_DEFAULT; void *new_tp; /* TODO(gregoryd) - right now a single lock is used, try to optimize? */ pthread_mutex_lock(&__nc_thread_management_lock); do { /* Allocate the combined TLS + TDB block---see tls.h for explanation. */ tls_node = nc_allocate_memory_block_mu(TLS_AND_TDB_MEMORY, __nacl_tls_combined_size(TDB_SIZE)); if (NULL == tls_node) break; new_tp = __nacl_tls_initialize_memory(nc_memory_block_to_payload(tls_node), TDB_SIZE); new_tdb = (nc_thread_descriptor_t *) ((char *) new_tp + __nacl_tp_tdb_offset(TDB_SIZE)); /* * TODO(gregoryd): consider creating a pool of basic_data structs, * similar to stack and TLS+TDB (probably when adding the support for * variable stack size). */ new_basic_data = malloc(sizeof(*new_basic_data)); if (NULL == new_basic_data) { /* * The tdb should be zero intialized. * This just re-emphasizes this requirement. */ new_tdb->basic_data = NULL; break; } nc_tdb_init(new_tdb, new_basic_data); new_tdb->tls_node = tls_node; /* * All the required members of the tdb must be initialized before * the thread is started and actually before the global lock is released, * since another thread can call pthread_join() or pthread_detach(). */ new_tdb->start_func = start_routine; new_tdb->state = arg; if (attr != NULL) { new_tdb->joinable = attr->joinable; stacksize = attr->stacksize; } /* Allocate the stack for the thread. */ stack_node = nc_allocate_memory_block_mu(THREAD_STACK_MEMORY, stacksize); if (NULL == stack_node) { retval = EAGAIN; break; } thread_stack = align((uint32_t) nc_memory_block_to_payload(stack_node), kStackAlignment); new_tdb->stack_node = stack_node; retval = 0; } while (0); if (0 != retval) { pthread_mutex_unlock(&__nc_thread_management_lock); goto ret; /* error */ } /* * Speculatively increase the thread count. If thread creation * fails, we will decrease it back. This way the thread count will * never be lower than the actual number of threads, but can briefly * be higher than that. */ ++__nc_running_threads_counter; /* * Save the new thread id. This can not be done after the syscall, * because the child thread could have already finished by that * time. If thread creation fails, it will be overriden with -1 * later. */ *thread_id = new_basic_data; pthread_mutex_unlock(&__nc_thread_management_lock); /* * Calculate the top-of-stack location. The very first location is a * zero address of architecture-dependent width, needed to satisfy the * normal ABI alignment requirements for the stack. (On some machines * this is the dummy return address of the thread-start function.) * * Both thread_stack and stacksize are multiples of 16. */ esp = (void *) (thread_stack + stacksize - kStackPadBelowAlign); memset(esp, 0, kStackPadBelowAlign); /* Start the thread. */ retval = irt_thread.thread_create( FUN_TO_VOID_PTR(nc_thread_starter), esp, new_tp); if (0 != retval) { pthread_mutex_lock(&__nc_thread_management_lock); /* TODO(gregoryd) : replace with atomic decrement? */ --__nc_running_threads_counter; pthread_mutex_unlock(&__nc_thread_management_lock); goto ret; } assert(0 == retval); ret: if (0 != retval) { /* Failed to create a thread. */ pthread_mutex_lock(&__nc_thread_management_lock); nc_release_tls_node(tls_node, new_tdb); if (new_basic_data) { nc_release_basic_data_mu(new_basic_data); } if (stack_node) { stack_node->is_used = 0; nc_free_memory_block_mu(THREAD_STACK_MEMORY, stack_node); } pthread_mutex_unlock(&__nc_thread_management_lock); *thread_id = NACL_PTHREAD_ILLEGAL_THREAD_ID; } return retval; }