int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
                   void* (*start_routine)(void*), void* arg) {
  ErrnoRestorer errno_restorer;

  // Inform the rest of the C library that at least one thread
  // was created. This will enforce certain functions to acquire/release
  // locks (e.g. atexit()) to protect shared global structures.
  // This works because pthread_create() is not called by the C library
  // initialization routine that sets up the main thread's data structures.
  __isthreaded = 1;

  pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(calloc(sizeof(*thread), 1));
  if (thread == NULL) {
    __libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: couldn't allocate thread");
    return EAGAIN;
  }
  thread->allocated_on_heap = true;

  if (attr == NULL) {
    pthread_attr_init(&thread->attr);
  } else {
    thread->attr = *attr;
    attr = NULL; // Prevent misuse below.
  }

  // Make sure the stack size is PAGE_SIZE aligned.
  size_t stack_size = (thread->attr.stack_size + (PAGE_SIZE-1)) & ~(PAGE_SIZE-1);

  if (thread->attr.stack_base == NULL) {
    // The caller didn't provide a stack, so allocate one.
    thread->attr.stack_base = __create_thread_stack(stack_size, thread->attr.guard_size);
    if (thread->attr.stack_base == NULL) {
      free(thread);
      __libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: couldn't allocate %zd-byte stack", stack_size);
      return EAGAIN;
    }
  } else {
    // The caller did provide a stack, so remember we're not supposed to free it.
    thread->attr.flags |= PTHREAD_ATTR_FLAG_USER_STACK;
  }

  // Make room for TLS.
  void** tls = (void**)((uint8_t*)(thread->attr.stack_base) + stack_size - BIONIC_TLS_SLOTS * sizeof(void*));

  // Create a mutex for the thread in TLS_SLOT_SELF to wait on once it starts so we can keep
  // it from doing anything until after we notify the debugger about it
  //
  // This also provides the memory barrier we need to ensure that all
  // memory accesses previously performed by this thread are visible to
  // the new thread.
  pthread_mutex_t* start_mutex = (pthread_mutex_t*) &tls[TLS_SLOT_SELF];
  pthread_mutex_init(start_mutex, NULL);
  ScopedPthreadMutexLocker start_locker(start_mutex);

  tls[TLS_SLOT_THREAD_ID] = thread;

  int flags = CLONE_FILES | CLONE_FS | CLONE_VM | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM;

  int tid = __pthread_clone(start_routine, tls, flags, arg);
  if (tid < 0) {
    int clone_errno = errno;
    if ((thread->attr.flags & PTHREAD_ATTR_FLAG_USER_STACK) == 0) {
      munmap(thread->attr.stack_base, stack_size);
    }
    free(thread);
    __libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: clone failed: %s", strerror(errno));
    return clone_errno;
  }

  thread->tid = tid;

  int init_errno = _init_thread(thread, true);
  if (init_errno != 0) {
    // Mark the thread detached and let its __thread_entry run to
    // completion. (It'll just exit immediately, cleaning up its resources.)
    thread->internal_flags |= kPthreadInitFailed;
    thread->attr.flags |= PTHREAD_ATTR_FLAG_DETACHED;
    return init_errno;
  }

  // Notify any debuggers about the new thread.
  {
    ScopedPthreadMutexLocker debugger_locker(&gDebuggerNotificationLock);
    _thread_created_hook(thread->tid);
  }

  // Publish the pthread_t and let the thread run.
  *thread_out = (pthread_t) thread;

  return 0;
}
int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
                   void* (*start_routine)(void*), void* arg) {
  ErrnoRestorer errno_restorer;

  // Inform the rest of the C library that at least one thread was created.
  __isthreaded = 1;

  pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(calloc(sizeof(*thread), 1));
  if (thread == NULL) {
    __libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: couldn't allocate thread");
    return EAGAIN;
  }

  if (attr == NULL) {
    pthread_attr_init(&thread->attr);
  } else {
    thread->attr = *attr;
    attr = NULL; // Prevent misuse below.
  }

  // Make sure the stack size and guard size are multiples of PAGE_SIZE.
  thread->attr.stack_size = (thread->attr.stack_size + (PAGE_SIZE-1)) & ~(PAGE_SIZE-1);
  thread->attr.guard_size = (thread->attr.guard_size + (PAGE_SIZE-1)) & ~(PAGE_SIZE-1);

  if (thread->attr.stack_base == NULL) {
    // The caller didn't provide a stack, so allocate one.
    thread->attr.stack_base = __create_thread_stack(thread);
    if (thread->attr.stack_base == NULL) {
      free(thread);
      return EAGAIN;
    }
  } else {
    // The caller did provide a stack, so remember we're not supposed to free it.
    thread->attr.flags |= PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK;
  }

  // Make room for the TLS area.
  // The child stack is the same address, just growing in the opposite direction.
  // At offsets >= 0, we have the TLS slots.
  // At offsets < 0, we have the child stack.
  thread->tls = reinterpret_cast<void**>(reinterpret_cast<uint8_t*>(thread->attr.stack_base) +
                                         thread->attr.stack_size - BIONIC_TLS_SLOTS * sizeof(void*));
  void* child_stack = thread->tls;
  __init_tls(thread);

  // Create a mutex for the thread in TLS to wait on once it starts so we can keep
  // it from doing anything until after we notify the debugger about it
  //
  // This also provides the memory barrier we need to ensure that all
  // memory accesses previously performed by this thread are visible to
  // the new thread.
  pthread_mutex_init(&thread->startup_handshake_mutex, NULL);
  pthread_mutex_lock(&thread->startup_handshake_mutex);

  thread->start_routine = start_routine;
  thread->start_routine_arg = arg;

  int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
      CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
  void* tls = thread->tls;
#if defined(__i386__)
  // On x86 (but not x86-64), CLONE_SETTLS takes a pointer to a struct user_desc rather than
  // a pointer to the TLS itself.
  user_desc tls_descriptor;
  __init_user_desc(&tls_descriptor, false, tls);
  tls = &tls_descriptor;
#endif
  int rc = clone(__pthread_start, child_stack, flags, thread, &(thread->tid), tls, &(thread->tid));
  if (rc == -1) {
    int clone_errno = errno;
    // We don't have to unlock the mutex at all because clone(2) failed so there's no child waiting to
    // be unblocked, but we're about to unmap the memory the mutex is stored in, so this serves as a
    // reminder that you can't rewrite this function to use a ScopedPthreadMutexLocker.
    pthread_mutex_unlock(&thread->startup_handshake_mutex);
    if ((thread->attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK) == 0) {
      munmap(thread->attr.stack_base, thread->attr.stack_size);
    }
    free(thread);
    __libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: clone failed: %s", strerror(errno));
    return clone_errno;
  }

  int init_errno = __init_thread(thread, true);
  if (init_errno != 0) {
    // Mark the thread detached and replace its start_routine with a no-op.
    // Letting the thread run is the easiest way to clean up its resources.
    thread->attr.flags |= PTHREAD_ATTR_FLAG_DETACHED;
    thread->start_routine = __do_nothing;
    pthread_mutex_unlock(&thread->startup_handshake_mutex);
    return init_errno;
  }

  // Notify any debuggers about the new thread.
  {
    ScopedPthreadMutexLocker debugger_locker(&g_debugger_notification_lock);
    _thread_created_hook(thread->tid);
  }

  // Publish the pthread_t and unlock the mutex to let the new thread start running.
  *thread_out = reinterpret_cast<pthread_t>(thread);
  pthread_mutex_unlock(&thread->startup_handshake_mutex);

  return 0;
}