Beispiel #1
0
void fand_interrupt(int sig)
{
  int fan;
  for (fan = 0; fan < total_fans; fan++) {
    write_fan_speed(fan + fan_offset, fan_max);
  }

  syslog(LOG_WARNING, "Shutting down fand on signal %s", strsignal(sig));
  if (sig == SIGUSR1) {
    stop_watchdog();
  }
  exit(3);
}
Beispiel #2
0
int server_shutdown(const char *why) {
  int fan;
  for (fan = 0; fan < total_fans; fan++) {
    write_fan_speed(fan + fan_offset, fan_max);
  }

  syslog(LOG_EMERG, "Shutting down:  %s", why);
#if defined(CONFIG_WEDGE100)
  write_device(USERVER_POWER, "0");
  sleep(5);
  write_device(MAIN_POWER, "0");
#endif
#if defined(CONFIG_WEDGE) && !defined(CONFIG_WEDGE100)
  write_device(GPIO_USERVER_POWER_DIRECTION, "out");
  write_device(GPIO_USERVER_POWER, "0");
  /*
   * Putting T2 in reset generates a non-maskable interrupt to uS,
   * the kernel running on uS might panic depending on its version.
   * sleep 5s here to make sure uS is completely down.
   */
  sleep(5);

  if (write_device(GPIO_T2_POWER_DIRECTION, "out") ||
      write_device(GPIO_T2_POWER, "1")) {
    /*
     * We're here because something has gone badly wrong.  If we
     * didn't manage to shut down the T2, cut power to the whole box,
     * using the PMBus OPERATION register.  This will require a power
     * cycle (removal of both power inputs) to recover.
     */
    syslog(LOG_EMERG, "T2 power off failed;  turning off via ADM1278");
    system("rmmod adm1275");
    system("i2cset -y 12 0x10 0x01 00");
  }
#else
  // TODO(7088822):  try throttling, then shutting down server.
  syslog(LOG_EMERG, "Need to implement actual shutdown!\n");
#endif

  /*
   * We have to stop the watchdog, or the system will be automatically
   * rebooted some seconds after fand exits (and stops kicking the
   * watchdog).
   */

  stop_watchdog();

  sleep(2);
  exit(2);
}
Beispiel #3
0
/* 
 * Use fuzzy logic type approach to creating the new fan speed. 
 * if count < cold_limit fan should be off.
 * if count > hot_limit fan should be full on.
 * if count between limits set proportionally to base speed + proportional element.
 */
static void update_fan_speed(struct thermostat *th)
{
	int var = th->temps;
	
/* remember that var = 1/T ie smaller var higher temperature and faster fan speed needed */
	if (abs(var - th->last_var) >= MIN_TEMP_COUNT_CHANGE) {
		int new_speed;

		if(var < cold_limit){
			
			if (var < hot_limit)
			{
				th->last_var = var;
				/* too hot for proportional control */
				new_speed = OXSEMI_FAN_SPEED_RATIO_MAX;
			}
			else
			{
				/* fan speed it the user selected starting value for the fan 
				 * so scale operatation from nominal at cold limit to max at hot limit. 
				 */
				new_speed = OXSEMI_FAN_SPEED_RATIO_MAX - 
					(OXSEMI_FAN_SPEED_RATIO_MAX - min_fan_speed_ratio) * (var - hot_limit)/(cold_limit - hot_limit);

				if (th->set_speed == 0 ) th->set_speed = min_fan_speed_ratio;
								
				if ((new_speed - th->set_speed) > MAX_FAN_RATIO_CHANGE)
					new_speed = th->set_speed + MAX_FAN_RATIO_CHANGE; 
				else if ((new_speed - th->set_speed) < -MAX_FAN_RATIO_CHANGE)
					new_speed = th->set_speed - MAX_FAN_RATIO_CHANGE;
				else
					th->last_var = var;
			}
		}
		else {
			
			th->last_var = var;
			/* var greater than low limit - too cold for fan. */
			new_speed = OXSEMI_FAN_SPEED_RATIO_MIN;
		}

		write_fan_speed(th, new_speed);
		th->set_speed = new_speed;
	}
}
Beispiel #4
0
static void __exit oxsemi_therm_exit(void)
{
	if ( thread_therm )
	{
		kthread_stop(thread_therm);
	}
	
	/* Stop the fan so that it doesnot run anymore - dont reset 
	 * SYS_CTRL_RSTEN_MISC_BIT as other modules may use it
	 */
	write_fan_speed(thermostat, 0);
	
	remove_proc_entry("therm-fan", NULL);
	misc_deregister(&oxsemi_therm_miscdev);
	
	kfree(thermostat);
	thermostat = NULL;
	
}
Beispiel #5
0
int main(int argc, char **argv) {
  /* Sensor values */

#if defined(CONFIG_WEDGE)
  int intake_temp;
  int exhaust_temp;
  int switch_temp;
  int userver_temp;
#else
  float intake_temp;
  float exhaust_temp;
  float userver_temp;
#endif

  int fan_speed = fan_high;
  int bad_reads = 0;
  int fan_failure = 0;
  int fan_speed_changes = 0;
  int old_speed;

  int fan_bad[FANS];
  int fan;

  unsigned log_count = 0; // How many times have we logged our temps?
  int opt;
  int prev_fans_bad = 0;

  struct sigaction sa;

  sa.sa_handler = fand_interrupt;
  sa.sa_flags = 0;
  sigemptyset(&sa.sa_mask);

  sigaction(SIGTERM, &sa, NULL);
  sigaction(SIGINT, &sa, NULL);
  sigaction(SIGUSR1, &sa, NULL);

  // Start writing to syslog as early as possible for diag purposes.

  openlog("fand", LOG_CONS, LOG_DAEMON);

#if defined(CONFIG_WEDGE) && !defined(CONFIG_WEDGE100)
  if (is_two_fan_board(false)) {
    /* Alternate, two fan configuration */
    total_fans = 2;
    fan_offset = 2; /* fan 3 is the first */

    fan_low = SIXPACK_FAN_LOW;
    fan_medium = SIXPACK_FAN_MEDIUM;
    fan_high = SIXPACK_FAN_HIGH;
    fan_max = SIXPACK_FAN_MAX;
    fan_speed = fan_high;
  }
#endif

  while ((opt = getopt(argc, argv, "l:m:h:b:t:r:v")) != -1) {
    switch (opt) {
    case 'l':
      fan_low = atoi(optarg);
      break;
    case 'm':
      fan_medium = atoi(optarg);
      break;
    case 'h':
      fan_high = atoi(optarg);
      break;
    case 'b':
      temp_bottom = INTERNAL_TEMPS(atoi(optarg));
      break;
    case 't':
      temp_top = INTERNAL_TEMPS(atoi(optarg));
      break;
    case 'r':
      report_temp = atoi(optarg);
      break;
    case 'v':
      verbose = true;
      break;
    default:
      usage();
      break;
    }
  }

  if (optind > argc) {
    usage();
  }

  if (temp_bottom > temp_top) {
    fprintf(stderr,
            "Should temp-bottom (%d) be higher than "
            "temp-top (%d)?  Starting anyway.\n",
            EXTERNAL_TEMPS(temp_bottom),
            EXTERNAL_TEMPS(temp_top));
  }

  if (fan_low > fan_medium || fan_low > fan_high || fan_medium > fan_high) {
    fprintf(stderr,
            "fan RPMs not strictly increasing "
            "-- %d, %d, %d, starting anyway\n",
            fan_low,
            fan_medium,
            fan_high);
  }

  daemon(1, 0);

  if (verbose) {
    syslog(LOG_DEBUG, "Starting up;  system should have %d fans.",
           total_fans);
  }

  for (fan = 0; fan < total_fans; fan++) {
    fan_bad[fan] = 0;
    write_fan_speed(fan + fan_offset, fan_speed);
    write_fan_led(fan + fan_offset, FAN_LED_BLUE);
  }

#if defined(CONFIG_YOSEMITE)
  /* Ensure that we can read from sensors before proceeding. */

  int found = 0;
  userver_temp = 100;
  while (!found) {
    for (int node = 1; node <= TOTAL_1S_SERVERS && !found; node++) {
      if (!yosemite_sensor_read(node, BIC_SENSOR_SOC_THERM_MARGIN,
                               &userver_temp) &&
          userver_temp < 0) {
        syslog(LOG_DEBUG, "SOC_THERM_MARGIN first valid read of %f.",
               userver_temp);
        found = 1;
      }
      sleep(5);
    }
    // XXX:  Will it ever be a problem that we don't exit this until
    //       we see a valid value?
  }
#endif

  /* Start watchdog in manual mode */
  start_watchdog(0);

  /* Set watchdog to persistent mode so timer expiry will happen independent
   * of this process's liveliness. */
  set_persistent_watchdog(WATCHDOG_SET_PERSISTENT);

  sleep(5);  /* Give the fans time to come up to speed */

  while (1) {
    int max_temp;
    old_speed = fan_speed;

    /* Read sensors */

#if defined(CONFIG_WEDGE) || defined(CONFIG_WEDGE100)
    read_temp(INTAKE_TEMP_DEVICE, &intake_temp);
    read_temp(EXHAUST_TEMP_DEVICE, &exhaust_temp);
    read_temp(CHIP_TEMP_DEVICE, &switch_temp);
    read_temp(USERVER_TEMP_DEVICE, &userver_temp);

    /*
     * uServer can be powered down, but all of the rest of the sensors
     * should be readable at any time.
     */

    if ((intake_temp == BAD_TEMP || exhaust_temp == BAD_TEMP ||
         switch_temp == BAD_TEMP)) {
      bad_reads++;
    }
#else
    intake_temp = exhaust_temp = userver_temp = BAD_TEMP;
    if (yosemite_sensor_read(FRU_SPB, SP_SENSOR_INLET_TEMP, &intake_temp) ||
        yosemite_sensor_read(FRU_SPB, SP_SENSOR_OUTLET_TEMP, &exhaust_temp))
      bad_reads++;

    /*
     * There are a number of 1S servers;  any or all of them
     * could be powered off and returning no values.  Ignore these
     * invalid values.
     */
    for (int node = 1; node <= TOTAL_1S_SERVERS; node++) {
      float new_temp;
      if (!yosemite_sensor_read(node, BIC_SENSOR_SOC_THERM_MARGIN,
			        &new_temp)) {
        if (userver_temp < new_temp) {
          userver_temp = new_temp;
        }
      }

      // Since the yosemite_sensor_read() times out after 8secs, keep WDT from expiring
      kick_watchdog();
    }
#endif

    if (bad_reads > BAD_READ_THRESHOLD) {
      server_shutdown("Some sensors couldn't be read");
    }

    if (log_count++ % report_temp == 0) {
      syslog(LOG_DEBUG,
#if defined(CONFIG_WEDGE) || defined(CONFIG_WEDGE100)
             "Temp intake %d, t2 %d, "
             " userver %d, exhaust %d, "
             "fan speed %d, speed changes %d",
#else
             "Temp intake %f, max server %f, exhaust %f, "
             "fan speed %d, speed changes %d",
#endif
             intake_temp,
#if defined(CONFIG_WEDGE) || defined(CONFIG_WEDGE100)
             switch_temp,
#endif
             userver_temp,
             exhaust_temp,
             fan_speed,
             fan_speed_changes);
    }

    /* Protection heuristics */

    if (intake_temp > INTAKE_LIMIT) {
      server_shutdown("Intake temp limit reached");
    }

#if defined(CONFIG_WEDGE) || defined(CONFIG_WEDGE100)
    if (switch_temp > SWITCH_LIMIT) {
      server_shutdown("T2 temp limit reached");
    }
#endif

    if (userver_temp + USERVER_TEMP_FUDGE > USERVER_LIMIT) {
      server_shutdown("uServer temp limit reached");
    }

    /*
     * Calculate change needed -- we should eventually
     * do something more sophisticated, like PID.
     *
     * We should use the intake temperature to adjust this
     * as well.
     */

#if defined(CONFIG_YOSEMITE)
    /* Use tables to lookup the new fan speed for Yosemite. */

    int intake_speed = temp_to_fan_speed(intake_temp, intake_map,
                                         INTAKE_MAP_SIZE);
    int cpu_speed = temp_to_fan_speed(userver_temp, cpu_map, CPU_MAP_SIZE);

    if (fan_speed == fan_max && fan_failure != 0) {
      /* Don't change a thing */
    } else if (intake_speed > cpu_speed) {
      fan_speed = intake_speed;
    } else {
      fan_speed = cpu_speed;
    }
#else
    /* Other systems use a simpler built-in table to determine fan speed. */

    if (switch_temp > userver_temp + USERVER_TEMP_FUDGE) {
      max_temp = switch_temp;
    } else {
      max_temp = userver_temp + USERVER_TEMP_FUDGE;
    }

    /*
     * If recovering from a fan problem, spin down fans gradually in case
     * temperatures are still high. Gradual spin down also reduces wear on
     * the fans.
     */
    if (fan_speed == fan_max) {
      if (fan_failure == 0) {
        fan_speed = fan_high;
      }
    } else if (fan_speed == fan_high) {
      if (max_temp + COOLDOWN_SLOP < temp_top) {
        fan_speed = fan_medium;
      }
    } else if (fan_speed == fan_medium) {
      if (max_temp > temp_top) {
        fan_speed = fan_high;
      } else if (max_temp + COOLDOWN_SLOP < temp_bottom) {
        fan_speed = fan_low;
      }
    } else {/* low */
      if (max_temp > temp_bottom) {
        fan_speed = fan_medium;
      }
    }
#endif

    /*
     * Update fans only if there are no failed ones. If any fans failed
     * earlier, all remaining fans should continue to run at max speed.
     */

    if (fan_failure == 0 && fan_speed != old_speed) {
      syslog(LOG_NOTICE,
             "Fan speed changing from %d to %d",
             old_speed,
             fan_speed);
      fan_speed_changes++;
      for (fan = 0; fan < total_fans; fan++) {
        write_fan_speed(fan + fan_offset, fan_speed);
      }
    }

    /*
     * Wait for some change.  Typical I2C temperature sensors
     * only provide a new value every second and a half, so
     * checking again more quickly than that is a waste.
     *
     * We also have to wait for the fan changes to take effect
     * before measuring them.
     */

    sleep(5);

    /* Check fan RPMs */

    for (fan = 0; fan < total_fans; fan++) {
      /*
       * Make sure that we're within some percentage
       * of the requested speed.
       */
      if (fan_speed_okay(fan + fan_offset, fan_speed, FAN_FAILURE_OFFSET)) {
        if (fan_bad[fan] > FAN_FAILURE_THRESHOLD) {
          write_fan_led(fan + fan_offset, FAN_LED_BLUE);
          syslog(LOG_CRIT,
                 "Fan %d has recovered",
                 fan);
        }
        fan_bad[fan] = 0;
      } else {
        fan_bad[fan]++;
      }
    }

    fan_failure = 0;
    for (fan = 0; fan < total_fans; fan++) {
      if (fan_bad[fan] > FAN_FAILURE_THRESHOLD) {
        fan_failure++;
        write_fan_led(fan + fan_offset, FAN_LED_RED);
      }
    }

    if (fan_failure > 0) {
      if (prev_fans_bad != fan_failure) {
        syslog(LOG_CRIT, "%d fans failed", fan_failure);
      }

      /*
       * If fans are bad, we need to blast all of the
       * fans at 100%;  we don't bother to turn off
       * the bad fans, in case they are all that is left.
       *
       * Note that we have a temporary bug with setting fans to
       * 100% so we only do fan_max = 99%.
       */

      fan_speed = fan_max;
      for (fan = 0; fan < total_fans; fan++) {
        write_fan_speed(fan + fan_offset, fan_speed);
      }

#if defined(CONFIG_WEDGE) || defined(CONFIG_WEDGE100)
      /*
       * On Wedge, we want to shut down everything if none of the fans
       * are visible, since there isn't automatic protection to shut
       * off the server or switch chip.  On other platforms, the CPUs
       * generating the heat will automatically turn off, so this is
       * unnecessary.
       */

      if (fan_failure == total_fans) {
        int count = 0;
        for (fan = 0; fan < total_fans; fan++) {
          if (fan_bad[fan] > FAN_SHUTDOWN_THRESHOLD)
            count++;
        }
        if (count == total_fans) {
          server_shutdown("all fans are bad for more than 12 cycles");
        }
      }
#endif

      /*
       * Fans can be hot swapped and replaced; in which case the fan daemon
       * will automatically detect the new fan and (assuming the new fan isn't
       * itself faulty), automatically readjust the speeds for all fans down
       * to a more suitable rpm. The fan daemon does not need to be restarted.
       */
    }

    /* Suppress multiple warnings for similar number of fan failures. */
    prev_fans_bad = fan_failure;

    /* if everything is fine, restart the watchdog countdown. If this process
     * is terminated, the persistent watchdog setting will cause the system
     * to reboot after the watchdog timeout. */
    kick_watchdog();
  }
}
Beispiel #6
0
static int __init oxsemi_therm_init(void)
{
	struct thermostat* th;
	int rc, ret;

	if (thermostat)
		return 0;

	read_reg(SYS_CTRL_RSTEN_CTRL);

/* release fan/tacho from system reset */		
	*((volatile unsigned long *) SYS_CTRL_RSTEN_CLR_CTRL) = (1UL << SYS_CTRL_RSTEN_MISC_BIT);
	
/* Pull Down the GPIO 29 from the software */
	*((volatile unsigned long *) SYSCTRL_GPIO_PULLUP_CTRL_0) |= TEMP_TACHO_PULLUP_CTRL_VALUE;
	
	*((volatile unsigned long *) SYS_CTRL_GPIO_PRIMSEL_CTRL_0) &= ~(1UL << QUAD_FUNCTION_ENABLE_FAN_PWM);
	*((volatile unsigned long *) SYS_CTRL_GPIO_PRIMSEL_CTRL_0) |= (1UL << PRIMARY_FUNCTION_ENABLE_FAN_TACHO);
	*((volatile unsigned long *) SYS_CTRL_GPIO_PRIMSEL_CTRL_0) |= (1UL << PRIMARY_FUNCTION_ENABLE_FAN_TEMP);
	
/* disable secondary use */
	*((volatile unsigned long *) SYS_CTRL_GPIO_SECSEL_CTRL_0) &= ~(1UL << QUAD_FUNCTION_ENABLE_FAN_PWM);
	
/* disable tertiary use */
	*((volatile unsigned long *) SYS_CTRL_GPIO_TERTSEL_CTRL_0) &= ~(1UL << QUAD_FUNCTION_ENABLE_FAN_PWM);
	
/* disable quadinary use */
	*((volatile unsigned long *) SYS_CTRL_GPIO_PWMSEL_CTRL_0) |= (1UL << QUAD_FUNCTION_ENABLE_FAN_PWM);
	
	read_reg(SYS_CTRL_RSTEN_CTRL);
	read_reg(SYS_CTRL_GPIO_PRIMSEL_CTRL_0);
	read_reg(SYS_CTRL_GPIO_SECSEL_CTRL_0);
	read_reg(SYS_CTRL_GPIO_TERTSEL_CTRL_0);

	th = (struct thermostat *)
		kmalloc(sizeof(struct thermostat), GFP_KERNEL);
	
	if (!th)
		return -ENOMEM;

	memset(th, 0, sizeof(struct thermostat));
	init_MUTEX( &th->sem );

	rc = read_reg(TACHO_CLOCK_DIVIDER);
	if (rc < 0) {
		printk(KERN_ERR "thermAndFan: Thermostat failed to read config ");
		kfree(th);
		return -ENODEV;
	}
	
	/* Set the Tacho clock divider up */
	write_reg( TACHO_CLOCK_DIVIDER, TACHO_CORE_TACHO_DIVIDER_VALUE );
	
	/* check tacho divider set correctly */	
	rc = read_reg(TACHO_CLOCK_DIVIDER);
	/* Comparing a 10 bit value to a 32 bit return value */
	if ((rc & TACHO_CORE_TACHO_DIVIDER_VALUE) != TACHO_CORE_TACHO_DIVIDER_VALUE) {
		printk(KERN_ERR "thermAndFan: Set Tacho Divider Value Failed readback:%d\n", rc);
		kfree(th);
		return -ENODEV;
	}
	
	write_reg( PWM_CLOCK_DIVIDER, PWM_CORE_CLK_DIVIDER_VALUE );
	
	printk(KERN_INFO "thermAndFan: initializing - ox810\n");
	
#ifdef DEBUG
	DumpTachoRegisters();
#endif
	
	thermostat = th;

	/* Start the thermister measuring */
	write_reg( TACHO_THERMISTOR_CONTROL, (1 << TACHO_THERMISTOR_CONTROL_THERM_ENABLE) );
	
	/* Start Speed measuring */
	write_reg( TACHO_FAN_SPEED_CONTROL, 
			(1 << (TACHO_FAN_SPEED_CONTROL_PWM_ENABLE_BASE 
							+ TACHO_FAN_SPEED_CONTROL_PWM_USED)) 
				| (1 << TACHO_FAN_SPEED_CONTROL_FAN_COUNT_MODE));
	
	/* be sure to really write fan speed the first time */
	th->last_speed    = -2;
	th->last_var	  = -80;

	/* Set fan to initial speed */
	write_fan_speed(th, min_fan_speed_ratio);

	thread_therm = kthread_run(monitor_task, th, "kfand");

	if (thread_therm == ERR_PTR(-ENOMEM)) {
		printk(KERN_INFO "thermAndFan: Kthread creation failed\n");
		thread_therm = NULL;
		return -ENOMEM;
	}

	ret = misc_register(&oxsemi_therm_miscdev);
	if (ret < 0)
		return ret;

	proc_oxsemi_therm = create_proc_entry("therm-fan", 0, NULL);
	if (proc_oxsemi_therm) {
		proc_oxsemi_therm->read_proc = oxsemi_therm_read;
	} else {
		printk(KERN_ERR "therm-fan: unable to register /proc/therm\n");
	}

	return 0;
}
Beispiel #7
0
int main(int argc, char * const argv[]){

	/* Parse the command line arguments */
	MFC.fork = TRUE;
	parse_options(argc, argv);

	signal(SIGHUP,Signal_Handler);		/* hangup signal */
	signal(SIGTERM,Signal_Handler);		/* software termination signal from kill */

	struct timespec timx,tim1;

	openlog("mfc-daemon", LOG_PID, LOG_DAEMON);
	MFC.syslog = TRUE;


	/* check machine and pidfile*/
	MFC.total_cpus = check_cpu();
	MFC.total_fans = check_fan();
	check_pidfile();
	write_pidfile();
	MFC.pidfile = TRUE;

	if (MFC.fork) {
		start_daemon();
	}

	int fan;
	for (fan = 1; fan <= MFC.total_fans; ++fan) {
		write_fan_manual(fan, 1);
	}

	tim1.tv_sec = TV_SEC;
	tim1.tv_nsec = TV_NSEC;

	//init
	int wr_manual=0;
	int change_number=0;
	int old_fan_speed=-1;

	INFO("Start");

	int temp = get_cpu_temperature();
	int old_temp_change = 0;
	int fan_speed=GET_FAN_SPEED(temp);

	fan_speed=set_min_max_fan_speed(fan_speed);

	for (fan = 1; fan <= MFC.total_fans; ++fan) {
		write_fan_speed(fan, fan_speed);
	}

	while (1){

		wr_manual++;
		if (wr_manual==9){
			for (fan = 1; fan <= MFC.total_fans; ++fan) {
				write_fan_manual(fan, 1);
			}
			wr_manual=0;
		}

		temp = get_cpu_temperature();

                int diff = abs(temp - old_temp_change);

		if (diff >= 2){
			//	temp = average of both cpu's
			fan_speed=GET_FAN_SPEED(temp);
			fan_speed=set_min_max_fan_speed(fan_speed);

			if (fan_speed!=old_fan_speed){
				for (fan = 1; fan <= MFC.total_fans; ++fan) {
					write_fan_speed(fan, fan_speed);
				}
				change_number=log_fan_speed(fan_speed,change_number,temp);
				old_fan_speed=fan_speed;
			}
			old_temp_change = temp;
		}

		if (nanosleep(&tim1,&timx) == -1){
			QUIT_DAEMON("Error nanosleep");
		}
	}
}