Esempio n. 1
0
/* We have only one thread that ever re-initialises GPUs, thus if any GPU
 * init command fails due to a completely wedged GPU, the thread will never
 * return, unable to harm other GPUs. If it does return, it means we only had
 * a soft failure and then the reinit_gpu thread is ready to tackle another
 * GPU */
void *reinit_gpu(void *userdata)
{
	struct thr_info *mythr = userdata;
	struct cgpu_info *cgpu;
	struct thr_info *thr;
	struct timeval now;
	char name[256];
	int thr_id;
	int gpu;

	pthread_detach(pthread_self());

select_cgpu:
	cgpu = tq_pop(mythr->q, NULL);
	if (!cgpu)
		goto out;

	if (clDevicesNum() != nDevs) {
		applog(LOG_WARNING, "Hardware not reporting same number of active devices, will not attempt to restart GPU");
		goto out;
	}

	gpu = cgpu->device_id;

	for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
		thr = &thr_info[thr_id];
		cgpu = thr->cgpu;
		if (cgpu->api != &opencl_api)
			continue;
		if (dev_from_id(thr_id) != gpu)
			continue;

		thr = &thr_info[thr_id];
		if (!thr) {
			applog(LOG_WARNING, "No reference to thread %d exists", thr_id);
			continue;
		}

		thr->rolling = thr->cgpu->rolling = 0;
		/* Reports the last time we tried to revive a sick GPU */
		gettimeofday(&thr->sick, NULL);
		if (!pthread_cancel(thr->pth)) {
			applog(LOG_WARNING, "Thread %d still exists, killing it off", thr_id);
		} else
			applog(LOG_WARNING, "Thread %d no longer exists", thr_id);
	}

	for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
		int virtual_gpu;

		thr = &thr_info[thr_id];
		cgpu = thr->cgpu;
		if (cgpu->api != &opencl_api)
			continue;
		if (dev_from_id(thr_id) != gpu)
			continue;

		virtual_gpu = cgpu->virtual_gpu;
		/* Lose this ram cause we may get stuck here! */
		//tq_freeze(thr->q);

		thr->q = tq_new();
		if (!thr->q)
			quit(1, "Failed to tq_new in reinit_gpu");

		/* Lose this ram cause we may dereference in the dying thread! */
		//free(clState);

		applog(LOG_INFO, "Reinit GPU thread %d", thr_id);
		clStates[thr_id] = initCl(virtual_gpu, name, sizeof(name));
		if (!clStates[thr_id]) {
			applog(LOG_ERR, "Failed to reinit GPU thread %d", thr_id);
			goto select_cgpu;
		}
		applog(LOG_INFO, "initCl() finished. Found %s", name);

		if (unlikely(thr_info_create(thr, NULL, miner_thread, thr))) {
			applog(LOG_ERR, "thread %d create failed", thr_id);
			return NULL;
		}
		applog(LOG_WARNING, "Thread %d restarted", thr_id);
	}

	gettimeofday(&now, NULL);
	get_datestamp(cgpu->init, &now);

	for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
		thr = &thr_info[thr_id];
		cgpu = thr->cgpu;
		if (cgpu->api != &opencl_api)
			continue;
		if (dev_from_id(thr_id) != gpu)
			continue;

		tq_push(thr->q, &ping);
	}

	goto select_cgpu;
out:
	return NULL;
}
Esempio n. 2
0
/* We have only one thread that ever re-initialises GPUs, thus if any GPU
 * init command fails due to a completely wedged GPU, the thread will never
 * return, unable to harm other GPUs. If it does return, it means we only had
 * a soft failure and then the reinit_gpu thread is ready to tackle another
 * GPU */
void *reinit_gpu(void *userdata)
{
  struct thr_info *mythr = (struct thr_info *)userdata;
  struct cgpu_info *cgpu;
  struct thr_info *thr;
  struct timeval now;
  char name[256];
  int thr_id;
  int gpu;

  pthread_detach(pthread_self());

select_cgpu:
  cgpu = (struct cgpu_info *)tq_pop(mythr->q, NULL);
  if (!cgpu)
    goto out;

  if (clDevicesNum() != nDevs) {
    applog(LOG_WARNING, "Hardware not reporting same number of active devices, will not attempt to restart GPU");
    goto out;
  }

  gpu = cgpu->device_id;

  rd_lock(&mining_thr_lock);
  for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
    thr = mining_thr[thr_id];
    cgpu = thr->cgpu;
    if (cgpu->drv->drv_id != DRIVER_opencl)
      continue;
    if (dev_from_id(thr_id) != gpu)
      continue;

    thr->rolling = thr->cgpu->rolling = 0;
    /* Reports the last time we tried to revive a sick GPU */
    cgtime(&thr->sick);
    if (!pthread_kill(thr->pth, 0)) {
      applog(LOG_WARNING, "Thread %d still exists, killing it off", thr_id);
      cg_completion_timeout(&thr_info_cancel_join, thr, 5000);
      thr->cgpu->drv->thread_shutdown(thr);
    } else
      applog(LOG_WARNING, "Thread %d no longer exists", thr_id);
  }
  rd_unlock(&mining_thr_lock);

  rd_lock(&mining_thr_lock);
  for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
    int virtual_gpu;

    thr = mining_thr[thr_id];
    cgpu = thr->cgpu;
    if (cgpu->drv->drv_id != DRIVER_opencl)
      continue;
    if (dev_from_id(thr_id) != gpu)
      continue;

    virtual_gpu = cgpu->virtual_gpu;
    /* Lose this ram cause we may get stuck here! */
    //tq_freeze(thr->q);

    thr->q = tq_new();
    if (!thr->q)
      quit(1, "Failed to tq_new in reinit_gpu");

    /* Lose this ram cause we may dereference in the dying thread! */
    //free(clState);

    applog(LOG_INFO, "Reinit GPU thread %d", thr_id);
    clStates[thr_id] = initCl(virtual_gpu, name, sizeof(name), &cgpu->algorithm);
    if (!clStates[thr_id]) {
      applog(LOG_ERR, "Failed to reinit GPU thread %d", thr_id);
      goto select_cgpu;
    }
    applog(LOG_INFO, "initCl() finished. Found %s", name);

    if (unlikely(thr_info_create(thr, NULL, miner_thread, thr))) {
      applog(LOG_ERR, "thread %d create failed", thr_id);
      return NULL;
    }
    applog(LOG_WARNING, "Thread %d restarted", thr_id);
  }
  rd_unlock(&mining_thr_lock);

  cgtime(&now);
  get_datestamp(cgpu->init, sizeof(cgpu->init), &now);

  rd_lock(&mining_thr_lock);
  for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
    thr = mining_thr[thr_id];
    cgpu = thr->cgpu;
    if (cgpu->drv->drv_id != DRIVER_opencl)
      continue;
    if (dev_from_id(thr_id) != gpu)
      continue;

    cgsem_post(&thr->sem);
  }
  rd_unlock(&mining_thr_lock);

  goto select_cgpu;
out:
  return NULL;
}