void fand_interrupt(int sig) { int fan; for (fan = 0; fan < total_fans; fan++) { write_fan_speed(fan + fan_offset, fan_max); } syslog(LOG_WARNING, "Shutting down fand on signal %s", strsignal(sig)); if (sig == SIGUSR1) { stop_watchdog(); } exit(3); }
int server_shutdown(const char *why) { int fan; for (fan = 0; fan < total_fans; fan++) { write_fan_speed(fan + fan_offset, fan_max); } syslog(LOG_EMERG, "Shutting down: %s", why); #if defined(CONFIG_WEDGE100) write_device(USERVER_POWER, "0"); sleep(5); write_device(MAIN_POWER, "0"); #endif #if defined(CONFIG_WEDGE) && !defined(CONFIG_WEDGE100) write_device(GPIO_USERVER_POWER_DIRECTION, "out"); write_device(GPIO_USERVER_POWER, "0"); /* * Putting T2 in reset generates a non-maskable interrupt to uS, * the kernel running on uS might panic depending on its version. * sleep 5s here to make sure uS is completely down. */ sleep(5); if (write_device(GPIO_T2_POWER_DIRECTION, "out") || write_device(GPIO_T2_POWER, "1")) { /* * We're here because something has gone badly wrong. If we * didn't manage to shut down the T2, cut power to the whole box, * using the PMBus OPERATION register. This will require a power * cycle (removal of both power inputs) to recover. */ syslog(LOG_EMERG, "T2 power off failed; turning off via ADM1278"); system("rmmod adm1275"); system("i2cset -y 12 0x10 0x01 00"); } #else // TODO(7088822): try throttling, then shutting down server. syslog(LOG_EMERG, "Need to implement actual shutdown!\n"); #endif /* * We have to stop the watchdog, or the system will be automatically * rebooted some seconds after fand exits (and stops kicking the * watchdog). */ stop_watchdog(); sleep(2); exit(2); }
/* * Use fuzzy logic type approach to creating the new fan speed. * if count < cold_limit fan should be off. * if count > hot_limit fan should be full on. * if count between limits set proportionally to base speed + proportional element. */ static void update_fan_speed(struct thermostat *th) { int var = th->temps; /* remember that var = 1/T ie smaller var higher temperature and faster fan speed needed */ if (abs(var - th->last_var) >= MIN_TEMP_COUNT_CHANGE) { int new_speed; if(var < cold_limit){ if (var < hot_limit) { th->last_var = var; /* too hot for proportional control */ new_speed = OXSEMI_FAN_SPEED_RATIO_MAX; } else { /* fan speed it the user selected starting value for the fan * so scale operatation from nominal at cold limit to max at hot limit. */ new_speed = OXSEMI_FAN_SPEED_RATIO_MAX - (OXSEMI_FAN_SPEED_RATIO_MAX - min_fan_speed_ratio) * (var - hot_limit)/(cold_limit - hot_limit); if (th->set_speed == 0 ) th->set_speed = min_fan_speed_ratio; if ((new_speed - th->set_speed) > MAX_FAN_RATIO_CHANGE) new_speed = th->set_speed + MAX_FAN_RATIO_CHANGE; else if ((new_speed - th->set_speed) < -MAX_FAN_RATIO_CHANGE) new_speed = th->set_speed - MAX_FAN_RATIO_CHANGE; else th->last_var = var; } } else { th->last_var = var; /* var greater than low limit - too cold for fan. */ new_speed = OXSEMI_FAN_SPEED_RATIO_MIN; } write_fan_speed(th, new_speed); th->set_speed = new_speed; } }
static void __exit oxsemi_therm_exit(void) { if ( thread_therm ) { kthread_stop(thread_therm); } /* Stop the fan so that it doesnot run anymore - dont reset * SYS_CTRL_RSTEN_MISC_BIT as other modules may use it */ write_fan_speed(thermostat, 0); remove_proc_entry("therm-fan", NULL); misc_deregister(&oxsemi_therm_miscdev); kfree(thermostat); thermostat = NULL; }
int main(int argc, char **argv) { /* Sensor values */ #if defined(CONFIG_WEDGE) int intake_temp; int exhaust_temp; int switch_temp; int userver_temp; #else float intake_temp; float exhaust_temp; float userver_temp; #endif int fan_speed = fan_high; int bad_reads = 0; int fan_failure = 0; int fan_speed_changes = 0; int old_speed; int fan_bad[FANS]; int fan; unsigned log_count = 0; // How many times have we logged our temps? int opt; int prev_fans_bad = 0; struct sigaction sa; sa.sa_handler = fand_interrupt; sa.sa_flags = 0; sigemptyset(&sa.sa_mask); sigaction(SIGTERM, &sa, NULL); sigaction(SIGINT, &sa, NULL); sigaction(SIGUSR1, &sa, NULL); // Start writing to syslog as early as possible for diag purposes. openlog("fand", LOG_CONS, LOG_DAEMON); #if defined(CONFIG_WEDGE) && !defined(CONFIG_WEDGE100) if (is_two_fan_board(false)) { /* Alternate, two fan configuration */ total_fans = 2; fan_offset = 2; /* fan 3 is the first */ fan_low = SIXPACK_FAN_LOW; fan_medium = SIXPACK_FAN_MEDIUM; fan_high = SIXPACK_FAN_HIGH; fan_max = SIXPACK_FAN_MAX; fan_speed = fan_high; } #endif while ((opt = getopt(argc, argv, "l:m:h:b:t:r:v")) != -1) { switch (opt) { case 'l': fan_low = atoi(optarg); break; case 'm': fan_medium = atoi(optarg); break; case 'h': fan_high = atoi(optarg); break; case 'b': temp_bottom = INTERNAL_TEMPS(atoi(optarg)); break; case 't': temp_top = INTERNAL_TEMPS(atoi(optarg)); break; case 'r': report_temp = atoi(optarg); break; case 'v': verbose = true; break; default: usage(); break; } } if (optind > argc) { usage(); } if (temp_bottom > temp_top) { fprintf(stderr, "Should temp-bottom (%d) be higher than " "temp-top (%d)? Starting anyway.\n", EXTERNAL_TEMPS(temp_bottom), EXTERNAL_TEMPS(temp_top)); } if (fan_low > fan_medium || fan_low > fan_high || fan_medium > fan_high) { fprintf(stderr, "fan RPMs not strictly increasing " "-- %d, %d, %d, starting anyway\n", fan_low, fan_medium, fan_high); } daemon(1, 0); if (verbose) { syslog(LOG_DEBUG, "Starting up; system should have %d fans.", total_fans); } for (fan = 0; fan < total_fans; fan++) { fan_bad[fan] = 0; write_fan_speed(fan + fan_offset, fan_speed); write_fan_led(fan + fan_offset, FAN_LED_BLUE); } #if defined(CONFIG_YOSEMITE) /* Ensure that we can read from sensors before proceeding. */ int found = 0; userver_temp = 100; while (!found) { for (int node = 1; node <= TOTAL_1S_SERVERS && !found; node++) { if (!yosemite_sensor_read(node, BIC_SENSOR_SOC_THERM_MARGIN, &userver_temp) && userver_temp < 0) { syslog(LOG_DEBUG, "SOC_THERM_MARGIN first valid read of %f.", userver_temp); found = 1; } sleep(5); } // XXX: Will it ever be a problem that we don't exit this until // we see a valid value? } #endif /* Start watchdog in manual mode */ start_watchdog(0); /* Set watchdog to persistent mode so timer expiry will happen independent * of this process's liveliness. */ set_persistent_watchdog(WATCHDOG_SET_PERSISTENT); sleep(5); /* Give the fans time to come up to speed */ while (1) { int max_temp; old_speed = fan_speed; /* Read sensors */ #if defined(CONFIG_WEDGE) || defined(CONFIG_WEDGE100) read_temp(INTAKE_TEMP_DEVICE, &intake_temp); read_temp(EXHAUST_TEMP_DEVICE, &exhaust_temp); read_temp(CHIP_TEMP_DEVICE, &switch_temp); read_temp(USERVER_TEMP_DEVICE, &userver_temp); /* * uServer can be powered down, but all of the rest of the sensors * should be readable at any time. */ if ((intake_temp == BAD_TEMP || exhaust_temp == BAD_TEMP || switch_temp == BAD_TEMP)) { bad_reads++; } #else intake_temp = exhaust_temp = userver_temp = BAD_TEMP; if (yosemite_sensor_read(FRU_SPB, SP_SENSOR_INLET_TEMP, &intake_temp) || yosemite_sensor_read(FRU_SPB, SP_SENSOR_OUTLET_TEMP, &exhaust_temp)) bad_reads++; /* * There are a number of 1S servers; any or all of them * could be powered off and returning no values. Ignore these * invalid values. */ for (int node = 1; node <= TOTAL_1S_SERVERS; node++) { float new_temp; if (!yosemite_sensor_read(node, BIC_SENSOR_SOC_THERM_MARGIN, &new_temp)) { if (userver_temp < new_temp) { userver_temp = new_temp; } } // Since the yosemite_sensor_read() times out after 8secs, keep WDT from expiring kick_watchdog(); } #endif if (bad_reads > BAD_READ_THRESHOLD) { server_shutdown("Some sensors couldn't be read"); } if (log_count++ % report_temp == 0) { syslog(LOG_DEBUG, #if defined(CONFIG_WEDGE) || defined(CONFIG_WEDGE100) "Temp intake %d, t2 %d, " " userver %d, exhaust %d, " "fan speed %d, speed changes %d", #else "Temp intake %f, max server %f, exhaust %f, " "fan speed %d, speed changes %d", #endif intake_temp, #if defined(CONFIG_WEDGE) || defined(CONFIG_WEDGE100) switch_temp, #endif userver_temp, exhaust_temp, fan_speed, fan_speed_changes); } /* Protection heuristics */ if (intake_temp > INTAKE_LIMIT) { server_shutdown("Intake temp limit reached"); } #if defined(CONFIG_WEDGE) || defined(CONFIG_WEDGE100) if (switch_temp > SWITCH_LIMIT) { server_shutdown("T2 temp limit reached"); } #endif if (userver_temp + USERVER_TEMP_FUDGE > USERVER_LIMIT) { server_shutdown("uServer temp limit reached"); } /* * Calculate change needed -- we should eventually * do something more sophisticated, like PID. * * We should use the intake temperature to adjust this * as well. */ #if defined(CONFIG_YOSEMITE) /* Use tables to lookup the new fan speed for Yosemite. */ int intake_speed = temp_to_fan_speed(intake_temp, intake_map, INTAKE_MAP_SIZE); int cpu_speed = temp_to_fan_speed(userver_temp, cpu_map, CPU_MAP_SIZE); if (fan_speed == fan_max && fan_failure != 0) { /* Don't change a thing */ } else if (intake_speed > cpu_speed) { fan_speed = intake_speed; } else { fan_speed = cpu_speed; } #else /* Other systems use a simpler built-in table to determine fan speed. */ if (switch_temp > userver_temp + USERVER_TEMP_FUDGE) { max_temp = switch_temp; } else { max_temp = userver_temp + USERVER_TEMP_FUDGE; } /* * If recovering from a fan problem, spin down fans gradually in case * temperatures are still high. Gradual spin down also reduces wear on * the fans. */ if (fan_speed == fan_max) { if (fan_failure == 0) { fan_speed = fan_high; } } else if (fan_speed == fan_high) { if (max_temp + COOLDOWN_SLOP < temp_top) { fan_speed = fan_medium; } } else if (fan_speed == fan_medium) { if (max_temp > temp_top) { fan_speed = fan_high; } else if (max_temp + COOLDOWN_SLOP < temp_bottom) { fan_speed = fan_low; } } else {/* low */ if (max_temp > temp_bottom) { fan_speed = fan_medium; } } #endif /* * Update fans only if there are no failed ones. If any fans failed * earlier, all remaining fans should continue to run at max speed. */ if (fan_failure == 0 && fan_speed != old_speed) { syslog(LOG_NOTICE, "Fan speed changing from %d to %d", old_speed, fan_speed); fan_speed_changes++; for (fan = 0; fan < total_fans; fan++) { write_fan_speed(fan + fan_offset, fan_speed); } } /* * Wait for some change. Typical I2C temperature sensors * only provide a new value every second and a half, so * checking again more quickly than that is a waste. * * We also have to wait for the fan changes to take effect * before measuring them. */ sleep(5); /* Check fan RPMs */ for (fan = 0; fan < total_fans; fan++) { /* * Make sure that we're within some percentage * of the requested speed. */ if (fan_speed_okay(fan + fan_offset, fan_speed, FAN_FAILURE_OFFSET)) { if (fan_bad[fan] > FAN_FAILURE_THRESHOLD) { write_fan_led(fan + fan_offset, FAN_LED_BLUE); syslog(LOG_CRIT, "Fan %d has recovered", fan); } fan_bad[fan] = 0; } else { fan_bad[fan]++; } } fan_failure = 0; for (fan = 0; fan < total_fans; fan++) { if (fan_bad[fan] > FAN_FAILURE_THRESHOLD) { fan_failure++; write_fan_led(fan + fan_offset, FAN_LED_RED); } } if (fan_failure > 0) { if (prev_fans_bad != fan_failure) { syslog(LOG_CRIT, "%d fans failed", fan_failure); } /* * If fans are bad, we need to blast all of the * fans at 100%; we don't bother to turn off * the bad fans, in case they are all that is left. * * Note that we have a temporary bug with setting fans to * 100% so we only do fan_max = 99%. */ fan_speed = fan_max; for (fan = 0; fan < total_fans; fan++) { write_fan_speed(fan + fan_offset, fan_speed); } #if defined(CONFIG_WEDGE) || defined(CONFIG_WEDGE100) /* * On Wedge, we want to shut down everything if none of the fans * are visible, since there isn't automatic protection to shut * off the server or switch chip. On other platforms, the CPUs * generating the heat will automatically turn off, so this is * unnecessary. */ if (fan_failure == total_fans) { int count = 0; for (fan = 0; fan < total_fans; fan++) { if (fan_bad[fan] > FAN_SHUTDOWN_THRESHOLD) count++; } if (count == total_fans) { server_shutdown("all fans are bad for more than 12 cycles"); } } #endif /* * Fans can be hot swapped and replaced; in which case the fan daemon * will automatically detect the new fan and (assuming the new fan isn't * itself faulty), automatically readjust the speeds for all fans down * to a more suitable rpm. The fan daemon does not need to be restarted. */ } /* Suppress multiple warnings for similar number of fan failures. */ prev_fans_bad = fan_failure; /* if everything is fine, restart the watchdog countdown. If this process * is terminated, the persistent watchdog setting will cause the system * to reboot after the watchdog timeout. */ kick_watchdog(); } }
static int __init oxsemi_therm_init(void) { struct thermostat* th; int rc, ret; if (thermostat) return 0; read_reg(SYS_CTRL_RSTEN_CTRL); /* release fan/tacho from system reset */ *((volatile unsigned long *) SYS_CTRL_RSTEN_CLR_CTRL) = (1UL << SYS_CTRL_RSTEN_MISC_BIT); /* Pull Down the GPIO 29 from the software */ *((volatile unsigned long *) SYSCTRL_GPIO_PULLUP_CTRL_0) |= TEMP_TACHO_PULLUP_CTRL_VALUE; *((volatile unsigned long *) SYS_CTRL_GPIO_PRIMSEL_CTRL_0) &= ~(1UL << QUAD_FUNCTION_ENABLE_FAN_PWM); *((volatile unsigned long *) SYS_CTRL_GPIO_PRIMSEL_CTRL_0) |= (1UL << PRIMARY_FUNCTION_ENABLE_FAN_TACHO); *((volatile unsigned long *) SYS_CTRL_GPIO_PRIMSEL_CTRL_0) |= (1UL << PRIMARY_FUNCTION_ENABLE_FAN_TEMP); /* disable secondary use */ *((volatile unsigned long *) SYS_CTRL_GPIO_SECSEL_CTRL_0) &= ~(1UL << QUAD_FUNCTION_ENABLE_FAN_PWM); /* disable tertiary use */ *((volatile unsigned long *) SYS_CTRL_GPIO_TERTSEL_CTRL_0) &= ~(1UL << QUAD_FUNCTION_ENABLE_FAN_PWM); /* disable quadinary use */ *((volatile unsigned long *) SYS_CTRL_GPIO_PWMSEL_CTRL_0) |= (1UL << QUAD_FUNCTION_ENABLE_FAN_PWM); read_reg(SYS_CTRL_RSTEN_CTRL); read_reg(SYS_CTRL_GPIO_PRIMSEL_CTRL_0); read_reg(SYS_CTRL_GPIO_SECSEL_CTRL_0); read_reg(SYS_CTRL_GPIO_TERTSEL_CTRL_0); th = (struct thermostat *) kmalloc(sizeof(struct thermostat), GFP_KERNEL); if (!th) return -ENOMEM; memset(th, 0, sizeof(struct thermostat)); init_MUTEX( &th->sem ); rc = read_reg(TACHO_CLOCK_DIVIDER); if (rc < 0) { printk(KERN_ERR "thermAndFan: Thermostat failed to read config "); kfree(th); return -ENODEV; } /* Set the Tacho clock divider up */ write_reg( TACHO_CLOCK_DIVIDER, TACHO_CORE_TACHO_DIVIDER_VALUE ); /* check tacho divider set correctly */ rc = read_reg(TACHO_CLOCK_DIVIDER); /* Comparing a 10 bit value to a 32 bit return value */ if ((rc & TACHO_CORE_TACHO_DIVIDER_VALUE) != TACHO_CORE_TACHO_DIVIDER_VALUE) { printk(KERN_ERR "thermAndFan: Set Tacho Divider Value Failed readback:%d\n", rc); kfree(th); return -ENODEV; } write_reg( PWM_CLOCK_DIVIDER, PWM_CORE_CLK_DIVIDER_VALUE ); printk(KERN_INFO "thermAndFan: initializing - ox810\n"); #ifdef DEBUG DumpTachoRegisters(); #endif thermostat = th; /* Start the thermister measuring */ write_reg( TACHO_THERMISTOR_CONTROL, (1 << TACHO_THERMISTOR_CONTROL_THERM_ENABLE) ); /* Start Speed measuring */ write_reg( TACHO_FAN_SPEED_CONTROL, (1 << (TACHO_FAN_SPEED_CONTROL_PWM_ENABLE_BASE + TACHO_FAN_SPEED_CONTROL_PWM_USED)) | (1 << TACHO_FAN_SPEED_CONTROL_FAN_COUNT_MODE)); /* be sure to really write fan speed the first time */ th->last_speed = -2; th->last_var = -80; /* Set fan to initial speed */ write_fan_speed(th, min_fan_speed_ratio); thread_therm = kthread_run(monitor_task, th, "kfand"); if (thread_therm == ERR_PTR(-ENOMEM)) { printk(KERN_INFO "thermAndFan: Kthread creation failed\n"); thread_therm = NULL; return -ENOMEM; } ret = misc_register(&oxsemi_therm_miscdev); if (ret < 0) return ret; proc_oxsemi_therm = create_proc_entry("therm-fan", 0, NULL); if (proc_oxsemi_therm) { proc_oxsemi_therm->read_proc = oxsemi_therm_read; } else { printk(KERN_ERR "therm-fan: unable to register /proc/therm\n"); } return 0; }
int main(int argc, char * const argv[]){ /* Parse the command line arguments */ MFC.fork = TRUE; parse_options(argc, argv); signal(SIGHUP,Signal_Handler); /* hangup signal */ signal(SIGTERM,Signal_Handler); /* software termination signal from kill */ struct timespec timx,tim1; openlog("mfc-daemon", LOG_PID, LOG_DAEMON); MFC.syslog = TRUE; /* check machine and pidfile*/ MFC.total_cpus = check_cpu(); MFC.total_fans = check_fan(); check_pidfile(); write_pidfile(); MFC.pidfile = TRUE; if (MFC.fork) { start_daemon(); } int fan; for (fan = 1; fan <= MFC.total_fans; ++fan) { write_fan_manual(fan, 1); } tim1.tv_sec = TV_SEC; tim1.tv_nsec = TV_NSEC; //init int wr_manual=0; int change_number=0; int old_fan_speed=-1; INFO("Start"); int temp = get_cpu_temperature(); int old_temp_change = 0; int fan_speed=GET_FAN_SPEED(temp); fan_speed=set_min_max_fan_speed(fan_speed); for (fan = 1; fan <= MFC.total_fans; ++fan) { write_fan_speed(fan, fan_speed); } while (1){ wr_manual++; if (wr_manual==9){ for (fan = 1; fan <= MFC.total_fans; ++fan) { write_fan_manual(fan, 1); } wr_manual=0; } temp = get_cpu_temperature(); int diff = abs(temp - old_temp_change); if (diff >= 2){ // temp = average of both cpu's fan_speed=GET_FAN_SPEED(temp); fan_speed=set_min_max_fan_speed(fan_speed); if (fan_speed!=old_fan_speed){ for (fan = 1; fan <= MFC.total_fans; ++fan) { write_fan_speed(fan, fan_speed); } change_number=log_fan_speed(fan_speed,change_number,temp); old_fan_speed=fan_speed; } old_temp_change = temp; } if (nanosleep(&tim1,&timx) == -1){ QUIT_DAEMON("Error nanosleep"); } } }