void u8g_com_arduino_fast_parallel_write(u8g_t *u8g, uint8_t val)
{
  U8G_ATOMIC_START();
  u8g_com_arduino_fast_write_data_pin( 0, val&1 );
  val >>= 1;
  u8g_com_arduino_fast_write_data_pin( 1, val&1 );
  val >>= 1;
  u8g_com_arduino_fast_write_data_pin( 2, val&1 );
  val >>= 1;
  u8g_com_arduino_fast_write_data_pin( 3, val&1 );
  val >>= 1;

  u8g_com_arduino_fast_write_data_pin( 4, val&1 );
  val >>= 1;
  u8g_com_arduino_fast_write_data_pin( 5, val&1 );
  val >>= 1;
  u8g_com_arduino_fast_write_data_pin( 6, val&1 );
  val >>= 1;
  u8g_com_arduino_fast_write_data_pin( 7, val&1 );
  val >>= 1;
  U8G_ATOMIC_END();
  
  /* EN cycle time must be 1 micro second */
  u8g_com_arduino_digital_write(u8g, U8G_PI_EN, HIGH);
  u8g_MicroDelay(); /* delay by 1000ns, reference: ST7920: 140ns, SBN1661: 100ns */
  u8g_com_arduino_digital_write(u8g, U8G_PI_EN, LOW);
  u8g_10MicroDelay(); /* ST7920 commands: 72us */
  u8g_10MicroDelay(); /* ST7920 commands: 72us */
}
void u8g_com_arduino_fast_parallel_write(u8g_t *u8g, uint8_t val)
{
  u8g_com_arduino_fast_write_data_pin( 0, val&1 );
  val >>= 1;
  u8g_com_arduino_fast_write_data_pin( 1, val&1 );
  val >>= 1;
  u8g_com_arduino_fast_write_data_pin( 2, val&1 );
  val >>= 1;
  u8g_com_arduino_fast_write_data_pin( 3, val&1 );
  val >>= 1;

  u8g_com_arduino_fast_write_data_pin( 4, val&1 );
  val >>= 1;
  u8g_com_arduino_fast_write_data_pin( 5, val&1 );
  val >>= 1;
  u8g_com_arduino_fast_write_data_pin( 6, val&1 );
  val >>= 1;
  u8g_com_arduino_fast_write_data_pin( 7, val&1 );
  val >>= 1;
  
  /* EN cycle time must be 1 micro second, digitalWrite is slow enough to do this */
  u8g_com_arduino_digital_write(u8g, U8G_PI_EN, HIGH);
  u8g_com_arduino_digital_write(u8g, U8G_PI_EN, LOW);
}