/*---------------------------------------------------------------------------*/ static resolv_status_t set_connection_address(uip_ipaddr_t *ipaddr, int *port) { #ifndef UDP_CONNECTION_ADDR #if RESOLV_CONF_SUPPORTS_MDNS && RESOLV_CONF_SUPPORTS_DNS_SD #define UDP_CONNECTION_ADDR _server._udp.local #elif RESOLV_CONF_SUPPORTS_MDNS #define UDP_CONNECTION_ADDR cus.local #elif UIP_CONF_ROUTER #define UDP_CONNECTION_ADDR fd00:0:0:0:0212:7404:0004:0404 #else #define UDP_CONNECTION_ADDR fe80:0:0:0:6466:6666:6666:6666 #endif #endif /* !UDP_CONNECTION_ADDR */ #define _QUOTEME(x) #x #define QUOTEME(x) _QUOTEME(x) resolv_status_t status = RESOLV_STATUS_ERROR; if(uiplib_ipaddrconv(QUOTEME(UDP_CONNECTION_ADDR), ipaddr) == 0) { /* * We are looking for a hostname and not an IP address. */ uip_ipaddr_t *resolved_addr = NULL; #if RESOLV_CONF_SUPPORTS_MDNS && RESOLV_CONF_SUPPORTS_DNS_SD status = resolv_service_lookup(QUOTEME(UDP_CONNECTION_ADDR),&resolved_addr,port); #else status = resolv_lookup(QUOTEME(UDP_CONNECTION_ADDR),&resolved_addr); #endif /* RESOLV_CONF_SUPPORTS_MDNS && RESOLV_CONF_SUPPORTS_DNS_SD */ if(status == RESOLV_STATUS_UNCACHED || status == RESOLV_STATUS_EXPIRED) { PRINTF("Attempting to look up %s\n",QUOTEME(UDP_CONNECTION_ADDR)); #if RESOLV_CONF_SUPPORTS_MDNS && RESOLV_CONF_SUPPORTS_DNS_SD resolv_query_service(QUOTEME(UDP_CONNECTION_ADDR)); #else resolv_query(QUOTEME(UDP_CONNECTION_ADDR)); #endif /* RESOLV_CONF_SUPPORTS_MDNS && RESOLV_CONF_SUPPORTS_DNS_SD */ status = RESOLV_STATUS_RESOLVING; } else if(status == RESOLV_STATUS_CACHED && resolved_addr != NULL #if RESOLV_CONF_SUPPORTS_MDNS && RESOLV_CONF_SUPPORTS_DNS_SD && port != NULL #endif /* RESOLV_CONF_SUPPORTS_MDNS && RESOLV_CONF_SUPPORTS_DNS_SD */ ) { PRINTF("Lookup of \"%s\" succeded!\n",QUOTEME(UDP_CONNECTION_ADDR)); } else if(status == RESOLV_STATUS_RESOLVING) { PRINTF("Still looking up \"%s\"...\n",QUOTEME(UDP_CONNECTION_ADDR)); } else { PRINTF("Lookup of \"%s\" failed. status = %d\n",QUOTEME(UDP_CONNECTION_ADDR),status); } if(resolved_addr) uip_ipaddr_copy(ipaddr, resolved_addr); } else { status = RESOLV_STATUS_CACHED; } return status; }
/*---------------------------------------------------------------------------*/ static void set_connection_address(uip_ipaddr_t *ipaddr) { #define _QUOTEME(x) #x #define QUOTEME(x) _QUOTEME(x) #ifdef UDP_CONNECTION_ADDR if(uiplib_ipaddrconv(QUOTEME(UDP_CONNECTION_ADDR), ipaddr) == 0) { PRINTF("UDP client failed to parse address '%s'\n", QUOTEME(UDP_CONNECTION_ADDR)); } #else uip_ip6addr(ipaddr, 0xfe80, 0x0000, 0x0000, 0x0000, 0x6500, 0x0012, 0x91c3, 0x2501); #endif /* UDP_CONNECTION_ADDR */ }
/*---------------------------------------------------------------------------*/ static void set_connection_address(uip_ipaddr_t *ipaddr) { #define _QUOTEME(x) #x #define QUOTEME(x) _QUOTEME(x) #ifdef UDP_CONNECTION_ADDR if(uiplib_ipaddrconv(QUOTEME(UDP_CONNECTION_ADDR), ipaddr) == 0) { PRINTF("UDP client failed to parse address '%s'\n", QUOTEME(UDP_CONNECTION_ADDR)); } #elif UIP_CONF_ROUTER uip_ip6addr(ipaddr,0xaaaa,0,0,0,0x0212,0x7404,0x0004,0x0404); #else uip_ip6addr(ipaddr,0xfe80,0,0,0,0x6466,0x6666,0x6666,0x6666); #endif /* UDP_CONNECTION_ADDR */ }
void ahnentafel_generate_c(const struct ahnentafel_t* bs, FILE* stream) { fputs( GENERATED_FILE_PROLOGUE "#define item_t " QUOTEME(item_t) "\n" "#define ARRLEN(a) (sizeof(a)/sizeof(*a))\n" "#define N 0\n" "static const item_t array[] =" S_EOL "{\n", stream); CALL_PRINT_ARRAY(item_t, stream, bs->bst, bs->bst_size); fputs(S_EOL "};\n" FUNCTION_DEFINITION S_EOL "{" S_EOL "int i = 0;" S_EOL "if (item == N)" S_EOL "return 0;" S_EOL "do" S_EOL "{" S_EOL "if (array[i] == item)" S_EOL "return &array[i];" S_EOL "if (array[i] > item)" S_EOL "i = 2 * i + 1;" S_EOL "else" S_EOL "i = 2 * i + 2;" S_EOL "}" S_EOL "while (i < (int)ARRLEN(array));" S_EOL "return 0;" S_EOL "}" S_EOL, stream); }
char* pacparser_version(void) { #ifndef VERSION print_error("WARNING: VERSION not defined."); return ""; #endif return QUOTEME(VERSION); }
void hopscotch_generate_c(const struct hopscotch_t* h, FILE* stream) { int num_buckets = 1 << h->bits_used; fputs( GENERATED_FILE_PROLOGUE "#define item_t " QUOTEME(item_t) "\n" "static const item_t array[] =" S_EOL "{\n", stream); CALL_PRINT_ARRAY(item_t, stream, h->buckets, num_buckets + h->neighborhood_size - 1); fputs(S_EOL "};\n" FUNCTION_DEFINITION S_EOL "{" S_EOL "item_t key =" S_EOL, stream); output_mask(stream, h->mask, h->bits_used); fputs( ";\n" "const item_t *p = &array[key];\n" "#define CHECK() if (*p == item) return p; else ++p;\n" , stream); for (int i = 0; i < h->neighborhood_size; i++) fputs("CHECK()" S_EOL, stream); fputs( "return 0;" S_EOL "}" S_EOL , stream); }
const std::string& GetCompiler() { static const std::string compiler = "" #ifdef __GNUC__ //"gcc-" QUOTEME(__GNUC__) "." QUOTEME(__GNUC_MINOR__) "." QUOTEME(__GNUC_PATCHLEVEL__); "gcc-" __VERSION__; #elif defined(_MSC_VER) #ifdef _MSC_FULL_VER "msvc-" QUOTEME(_MSC_FULL_VER); #else "msvc-" QUOTEME(_MSC_VER); #endif #elif defined(__VERSION__) "unknown-" __VERSION__; #else "unknown"; #endif return compiler; }
/*---------------------------------------------------------------------------*/ static void set_connection_address(uip_ipaddr_t *ipaddr) { #define _QUOTEME(x) #x #define QUOTEME(x) _QUOTEME(x) #ifdef UDP_CONNECTION_ADDR if(uiplib_ipaddrconv(QUOTEME(UDP_CONNECTION_ADDR), ipaddr) == 0) { PRINTF("UDP client failed to parse address '%s'\n", QUOTEME(UDP_CONNECTION_ADDR)); } #elif UIP_CONF_ROUTER // uip_ip6addr(ipaddr,0xaaaa,0,0,0,0x0212,0x7404,0x0004,0x0404); // IP_CLIENT // define IP_SERVER fe80::2a01:22ff:fe33:4455 uip_ip6addr(ipaddr,0xaaaa,0,0,0,0x0212,0x7404,0x0004,0x0404); #else // uip_ip6addr(ipaddr,0xfe80,0,0,0,0x6466,0x6666,0x6666,0x6666); uip_ip6addr(ipaddr,0xfe80,0,0,0,0x2a01,0x22ff,0xfe33,0x4455); #endif /* UDP_CONNECTION_ADDR */ }
const std::string& GetBuildEnvironment() { static const std::string environment = "boost-" #ifdef BOOST_VERSION QUOTEME(BOOST_VERSION) #else "unknown" #endif ", " #ifdef BOOST_STDLIB BOOST_STDLIB; #else "unknown stdlib"; #endif return environment; }
/*---------------------------------------------------------------------------*/ static resolv_status_t set_connection_address(uip_ipaddr_t *ipaddr) { #ifndef UDP_CONNECTION_ADDR //#if RESOLV_CONF_SUPPORTS_MDNS #if 0 #define UDP_CONNECTION_ADDR contiki-udp-server.local #elif UIP_CONF_ROUTER #define UDP_CONNECTION_ADDR aaaa:0:0:0:0201:2dcf:4629:04b4 #else #define UDP_CONNECTION_ADDR fe80:0:0:0:6466:6666:6666:6666 #endif #endif /* !UDP_CONNECTION_ADDR */ #define _QUOTEME(x) #x #define QUOTEME(x) _QUOTEME(x) resolv_status_t status = RESOLV_STATUS_ERROR; if(uiplib_ipaddrconv(QUOTEME(UDP_CONNECTION_ADDR), ipaddr) == 0) { uip_ipaddr_t *resolved_addr = NULL; status = resolv_lookup(QUOTEME(UDP_CONNECTION_ADDR),&resolved_addr); if(status == RESOLV_STATUS_UNCACHED || status == RESOLV_STATUS_EXPIRED) { PRINTF("Attempting to look up %s\n",QUOTEME(UDP_CONNECTION_ADDR)); resolv_query(QUOTEME(UDP_CONNECTION_ADDR)); status = RESOLV_STATUS_RESOLVING; } else if(status == RESOLV_STATUS_CACHED && resolved_addr != NULL) { PRINTF("Lookup of \"%s\" succeded!\n",QUOTEME(UDP_CONNECTION_ADDR)); } else if(status == RESOLV_STATUS_RESOLVING) { PRINTF("Still looking up \"%s\"...\n",QUOTEME(UDP_CONNECTION_ADDR)); } else { PRINTF("Lookup of \"%s\" failed. status = %d\n",QUOTEME(UDP_CONNECTION_ADDR),status); } if(resolved_addr) uip_ipaddr_copy(ipaddr, resolved_addr); } else { status = RESOLV_STATUS_CACHED; } return status; }
//////////////////////////////////////////////////////////////////// //// OUTPUT ////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////// ffmpegOutput::ffmpegOutput(QWidget *parent) : QDialog(parent), ui(new Ui::ffmpegOutput) { ui->setupUi(this); logging=false; QDir path; #ifdef DEBUGGING logLocation.setPath(QString(QUOTEME(PWD_PRO))); // for debug mode read preset in path #else #ifdef Q_WS_X11 logLocation.setPath(QDir::homePath()+"/.ffmpeg-gui"); #endif // directory not exist, create if (!logLocation.exists()) logLocation.mkdir(logLocation.absolutePath()); // dont copy files to home, only put here the modified/created by users /* #ifdef Q_WS_X11 QDir systemPath; QFileInfoList entries; QFile file; systemPath.setPath("/usr/local/share/ffmpeg/"); entries= systemPath.entryInfoList(QStringList("*.ffpreset"),QDir::Files,QDir::Name); foreach (QFileInfo f, entries) { file.setFileName(f.absoluteFilePath()); file.copy(QDir::homePath()+"/.ffmpeg-gui/"+f.fileName()); } #endif */ #endif html_log = new QFile(logLocation.absolutePath()+"/ffmpegLog.html"); ui->PTE_ffmpeg_error->hide(); }
void linear_sse_generate_c(const struct linear_sse_t* bs, FILE* stream) { fputs( GENERATED_FILE_PROLOGUE "#include <emmintrin.h>\n" , stream); fprintf(stream, "#define CACHE_LINE_BYTES %d\n", CACHE_LINE_BYTES); fputs( "#define item_t " QUOTEME(item_t) "\n" "#ifdef WIN32\n" "#define ALIGN(n) __declspec(align(n))\n" "#else\n" "#define ALIGN(n) __attribute__ ((aligned(n)))\n" "#endif\n" "ALIGN(CACHE_LINE_BYTES) static const item_t array[] =" S_EOL "{\n" , stream); CALL_PRINT_ARRAY(item_t, stream, bs->array, bs->size); fputs( S_EOL "};\n" "#define ARRLEN(a) (sizeof(a)/sizeof(*a))\n" "#define VEC_SIZE 16\n" "#define VEC_ITEMS (VEC_SIZE/sizeof(item_t))\n" FUNCTION_DEFINITION S_EOL "{" S_EOL #if item_bits == 64 "__m128i s = _mm_set_epi32(item >> 32, item & UINT32_MAX, item >> 32, item & UINT32_MAX);" S_EOL #else "__m128i s = _mm_set1_epi" QUOTEME(item_bits) "(item);" S_EOL #endif "int n = ARRLEN(array);" S_EOL "n -= n % VEC_ITEMS;" S_EOL "const __m128i *p = (__m128i*)array;" S_EOL "for (; p < (__m128i*)&array[n]; p++)" S_EOL "{" S_EOL "__m128i g = *p;" S_EOL #if item_bits == 16 "__m128i temp = _mm_cmpeq_epi" QUOTEME(item_bits) "(s, g);" S_EOL "unsigned ans = _mm_movemask_epi8(temp);" S_EOL "if (ans)" S_EOL "for (unsigned i = 0; i < VEC_ITEMS; i++)" S_EOL "if (ans & (1 << (i*2)))" S_EOL "return (item_t*)p + i;" S_EOL #elif item_bits == 32 "__m128i temp = _mm_cmpeq_epi" QUOTEME(item_bits) "(s, g);" S_EOL "unsigned ans = _mm_movemask_ps(*(__m128*)&temp);" S_EOL "if (ans)" S_EOL "for (unsigned i = 0; i < VEC_ITEMS; i++)" S_EOL "if (ans & (1 << i))" S_EOL "return (item_t*)p + i;" S_EOL #elif item_bits == 64 "__m128i temp = _mm_cmpeq_epi32(s, g);" S_EOL "unsigned ans = _mm_movemask_ps(*(__m128*)&temp);" S_EOL "if ((ans & 3) == 3) return (item_t*)p;" S_EOL "if ((ans & 12) == 12) return (item_t*)p + 1;" S_EOL #else #error item_bits must be wither 16,32 or 64 ! #endif "}" S_EOL "for (item_t *pp = (item_t*)p; pp <= &array[ARRLEN(array) - 1]; pp++)" S_EOL "if (*pp == item)" S_EOL "return pp;" S_EOL "return 0;" S_EOL "}" S_EOL, stream); }
cl_kernel get_kernel(char *kern_name, cl_context context, cl_device_id device) { cl_program program; cl_kernel kernel; cl_int err = 0; char *fin_program_src; const char *program_source = "void sum_reduce_and_store(__local float *sdata,\n" "__global float *store_arr,\n" "float value,\n" "int store_off)\n" "{\n" //Note that this draws from NVIDIA's reduction example: //- Doesn't use % operator. //- Uses contiguous threads. //- Uses sequential addressing -- no divergence or bank conflicts. //- Is completely unrolled. // local size must be a power of 2 and (>= 64 or == 1) "unsigned int lsz = get_local_size(0);\n" "unsigned int lid = get_local_id(0);\n" "sdata[lid] = value;\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" // do reduction in shared mem "if (lsz != 1) {\n" "if (lsz >= 512) { if (lid < 256) { sdata[lid] += sdata[lid + 256]; } barrier(CLK_LOCAL_MEM_FENCE); }\n" "if (lsz >= 256) { if (lid < 128) { sdata[lid] += sdata[lid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); }\n" "if (lsz >= 128) { if (lid < 64) { sdata[lid] += sdata[lid + 64]; } barrier(CLK_LOCAL_MEM_FENCE); }\n" //Avoid extra if statements by only using local size >= 64 "if (lid < 32) { sdata[lid] += sdata[lid + 32]; } barrier(CLK_LOCAL_MEM_FENCE);\n" "if (lid < 16) { sdata[lid] += sdata[lid + 16]; } barrier(CLK_LOCAL_MEM_FENCE);\n" "if (lid < 8) { sdata[lid] += sdata[lid + 8]; } barrier(CLK_LOCAL_MEM_FENCE);\n" "if (lid < 4) { sdata[lid] += sdata[lid + 4]; } barrier(CLK_LOCAL_MEM_FENCE);\n" "if (lid < 2) { sdata[lid] += sdata[lid + 2]; } barrier(CLK_LOCAL_MEM_FENCE);\n" "if (lid < 1) { sdata[lid] += sdata[lid + 1]; } barrier(CLK_LOCAL_MEM_FENCE);\n" "}\n" // write result for this block to global mem "if (lid == 0) store_arr[store_off] = sdata[0];\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "}\n" "void global_sum_and_reduce(__local float *reduce_s,\n" "__global float *reduce_arr,\n" "int beg_off,\n" "int arr_len)\n" "{\n" "unsigned int lsz = get_local_size(0);\n" "unsigned int i;" "float value = 0.0f;\n" "if (get_group_id(0) != 0)\n" "return;\n" //Reduce the entire array using one work group "for(i = get_local_id(0); i < arr_len; i += lsz)\n" "if (i < arr_len)\n" "value += reduce_arr[beg_off+i];\n" "sum_reduce_and_store(reduce_s, reduce_arr, value, beg_off);\n" "}\n" "__kernel void consolidate_train(__constant unsigned int *sizes,\n" "__global unsigned int *num_layers,\n" "__global unsigned int *num_neurons, \n" "__global unsigned int *num_inputs,\n" "__global unsigned int *num_outputs,\n" "__global float *MSE_values,\n" "__global float *num_bit_fail,\n" "__global float *train_errors,\n" "__global float *weights_deltas,\n" "__local float *reduce_s)\n" "{\n" "unsigned int input_sz = get_global_size(0);\n" "unsigned int gnum;\n" "unsigned int l;\n" //Calculate the number of groups used in the training run "if (sizes[5] %% sizes[7])\n" "gnum = 1 + (sizes[5] / sizes[7]);\n" "else\n" "gnum = sizes[5] / sizes[7];\n" //Calculate for all layers "for(l = 0; l < num_layers[get_global_id(1)]; ++l) {\n" "unsigned int part_layer_off = get_global_id(1)*sizes[1]+l;\n" "unsigned int num_neurons_l = num_neurons[part_layer_off];\n" "unsigned int n_layer_off = sizes[2]*part_layer_off;\n" "unsigned int o_layer_off = sizes[4]*part_layer_off;\n" "unsigned int n;\n" //Calcalate for all neurons "for(n = 0; n < num_neurons[part_layer_off]; ++n) {\n" "unsigned int num_outputs_l = num_outputs[n_layer_off+n];\n" "unsigned int num_inputs_l = num_inputs[n_layer_off+n];\n" "unsigned int o;\n" //Calculate for all outputs "for(o = 0; o < num_outputs_l; ++o) {\n" "unsigned int i;\n" //Sum delta data "for(i = 0; i < num_inputs_l; ++i) {\n" "global_sum_and_reduce(reduce_s, weights_deltas,\n" "((o_layer_off+o)*sizes[3]+i)*gnum, gnum);\n" // "if(get_global_id(0) == 0)\n" // "printf(\"d (l, n, o, i) (delta): (%%5d %%5d %%5d %%5d) (%%5d %%10f)\\n\", l, n, o, i, ((o_layer_off+o)*sizes[3]+i)*gsz, weights_deltas[((o_layer_off+o)*sizes[3]+i)*gsz]);\n" "}\n" "global_sum_and_reduce(reduce_s, train_errors,\n" "(o_layer_off+o)*sizes[5], sizes[5]);\n" // "printf(\"e (l, n, o) (errs): (%%5d %%5d %%5d) (%%5d %%10f)\\n\", l, n, o, (o_layer_off+o)*input_sz, train_errors[(o_layer_off+o)*input_sz]);\n" "}\n" "o_layer_off += num_outputs_l;\n" "}\n" "}\n" "global_sum_and_reduce(reduce_s, MSE_values, get_global_id(1)*sizes[5], sizes[5]);\n" "global_sum_and_reduce(reduce_s, num_bit_fail, get_global_id(1)*sizes[5], sizes[5]);\n" // "printf(\"m (msev): (%%10f)\\n\", MSE_values[get_global_id(1)*input_sz]);\n" // "printf(\"n (fail): (%%10f)\\n\", num_bit_fail[get_global_id(1)*input_sz]);\n" "}\n" "float activation_derived(float steepness, int act_func,\n" "__global float *outputs,\n" "__global float *sums, int o_i)\n" "{\n" "switch (act_func)\n" "{\n" "case %d:\n" "case %d:\n" "case %d:\n" "return " QUOTEME(fann_linear_derive(steepness, outputs[o_i])) ";\n" "case %d:\n" "case %d:\n" "return " QUOTEME(fann_sigmoid_derive(steepness, fann_clip(outputs[o_i], 0.01f, 0.99f))) ";\n" "case %d:\n" "case %d:\n" "return " QUOTEME(fann_sigmoid_symmetric_derive(steepness, fann_clip(outputs[o_i], -0.98f, 0.98f))) ";\n" "case %d:\n" "return " QUOTEME(fann_gaussian_derive(steepness, outputs[o_i], sums[o_i])) ";\n" "case %d:\n" "return " QUOTEME(fann_gaussian_symmetric_derive(steepness, outputs[o_i], sums[o_i])) ";\n" "case %d:\n" "return " QUOTEME(fann_elliot_derive(steepness, fann_clip(outputs[o_i], 0.01f, 0.99f), sums[o_i])) ";\n" "case %d:\n" "return " QUOTEME(fann_elliot_symmetric_derive(steepness, fann_clip(outputs[o_i], -0.98f, 0.98f), sums[o_i])) ";\n" "case %d:\n" "return " QUOTEME(fann_sin_symmetric_derive(steepness, sums[o_i])) ";\n" "case %d:\n" "return " QUOTEME(fann_cos_symmetric_derive(steepness, sums[o_i])) ";\n" "case %d:\n" "return " QUOTEME(fann_sin_derive(steepness, sums[o_i])) ";\n" "case %d:\n" "return " QUOTEME(fann_cos_derive(steepness, sums[o_i])) ";\n" "case %d: //This should be an error\n" "case %d: //This should be an error\n" "case %d: //FIXME\n" "return -99.0;\n" "}\n" "}\n" "void backpropagate_MSE(__constant unsigned int *sizes,\n" "__global unsigned int *num_layers,\n" "__global unsigned int *num_neurons, \n" "__global unsigned int *num_inputs,\n" "__global unsigned int *num_outputs,\n" "__global float *steepness,\n" "__global int *activation,\n" "__global float *weights,\n" "__global float *inputs,\n" "__global float *sums,\n" "__global float *outputs,\n" "__global float *train_errors,\n" "__global float *weights_deltas,\n" //Shared areas for caching "__local float *steep_s,\n" "__local int *act_s,\n" "__local float *weights_s,\n" "__local float *reduce_s )\n" "{\n" "unsigned int input_id = get_global_id(0);\n" "unsigned int lid = get_local_id(0);\n" "unsigned int lsz = get_local_size(0);\n" "unsigned int gnum;\n" "unsigned int gid = get_group_id(0);\n" "int l;\n" //Calculate the number of groups used in the training run "if (sizes[5] %% sizes[7])\n" "gnum = 1 + (sizes[5] / sizes[7]);\n" "else\n" "gnum = sizes[5] / sizes[7];\n" //Calculate for all layers "for(l = num_layers[get_global_id(1)]-1; l >= 0; --l) {\n" "unsigned int part_layer_off = get_global_id(1)*sizes[1]+l;\n" "unsigned int num_neurons_l = num_neurons[part_layer_off];\n" "unsigned int n_layer_off = sizes[2]*part_layer_off;\n" "unsigned int o_layer_off = sizes[4]*part_layer_off;\n" "unsigned int output_off = o_layer_off-sizes[4];\n" "unsigned int n;\n" //Copy steepness & activation to shared mem "barrier(CLK_LOCAL_MEM_FENCE);\n" "for(n = 0; n < num_neurons_l; n += lsz) {\n" "unsigned int neuron_num = n+lid;\n" "if (neuron_num < num_neurons[part_layer_off]){\n" "steep_s[neuron_num] = steepness[n_layer_off+neuron_num];\n" "act_s[neuron_num] = activation[n_layer_off+neuron_num];\n" "}\n" "}\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" //Clear all the previous layer's train_errors "for(n = 0; n < num_neurons_l && l != 0; ++n) {\n" "unsigned int num_outputs_l = num_outputs[n_layer_off+n];\n" "unsigned int o;\n" //Zero all outputs "for(o = 0; o < num_outputs_l; ++o) {\n" //Don't overrun data "if (sizes[5] > input_id)\n" "train_errors[output_off*sizes[5]+input_id] = 0.0f;\n" "++output_off;\n" "}\n" "}\n" //Reset "output_off = o_layer_off;\n" //Calcalate for all neurons "for(n = 0; n < num_neurons[part_layer_off]; ++n) {\n" "unsigned int num_outputs_l = num_outputs[n_layer_off+n];\n" "unsigned int num_inputs_l = num_inputs[n_layer_off+n];\n" "unsigned int o;\n" //Calculate for all outputs "for(o = 0; o < num_outputs_l; ++o) {\n" "unsigned int i;\n" "unsigned int o_i = output_off*sizes[5]+input_id;\n" "float error;\n" //Multiply errors with the activation function derivative. "if (sizes[5] > input_id)\n" "train_errors[o_i] = error =\n" "train_errors[o_i]*activation_derived(steep_s[n], act_s[n], outputs, sums, o_i);\n" //Weight & sum data from the inputs & bias "for(i = 0; i < num_inputs_l; ++i) {\n" "unsigned int weights_i = 0;\n" "unsigned int prev_output_i = 0;\n" "float delta = 0.0f;\n" //Weights aren't used for first layer "if (l != 0) {\n" "weights_i = (sizes[3]*o+i) %% lsz;\n" //Load shared memory as appropriate "if (weights_i == 0) {\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "if (sizes[3]*o+i+lid < sizes[3]*num_outputs_l)\n" "weights_s[lid] = weights[output_off*sizes[3]+i+lid];\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "}\n" "}\n" //Don't overrun data "if (sizes[5] > input_id) {\n" //Figure out what input was used "if(i == num_inputs_l-1){\n" "prev_output_i = (o_layer_off-sizes[4]+i)*sizes[5]+input_id;\n" "delta = error;\n" "} else if(l == 0) {\n" "delta = inputs[i*sizes[5]+input_id] * error;\n" "} else {\n" "prev_output_i = (o_layer_off-sizes[4]+i)*sizes[5]+input_id;\n" "delta = outputs[prev_output_i] * error;\n" "}\n" "}\n" // "printf(\"(id, l, n, o, i) (delta): (%%5d %%5d %%5d %%5d %%5d) (%%10f)\\n\", input_id, l, n, o, i, error*input);\n" // Calculate the weight deltas // Due to memory requirements we're reducing the deltas here "sum_reduce_and_store(reduce_s, weights_deltas, delta,\n" "(output_off*sizes[3]+i)*gnum+gid);\n" // "weights_deltas[(output_off*sizes[3]+i)*gnum+gid] = gnum;\n" // "printf(\"(id, l, n, o, i) (fin delta): (%%5d %%5d %%5d %%5d %%5d) (%%10f)\\n\", input_id, l, n, o, i, weights_deltas[(output_off*sizes[3]+i)*gnum+gid]);\n" //Calculate the error for previous layer "if(l != 0 && sizes[5] > input_id)\n" "train_errors[prev_output_i] += error * weights_s[weights_i];\n" "}\n" "++output_off;\n" "}\n" "}\n" "}\n" "}\n" "void compute_MSE(__constant unsigned int *sizes,\n" "__global float *f_params,\n" "__global unsigned int *num_layers,\n" "__global unsigned int *num_neurons, \n" "__global unsigned int *num_outputs,\n" "__global int *activation,\n" "__global float *outputs,\n" "__global float *train_errors,\n" "__global float *actual_outputs,\n" "__global float *MSE_values,\n" "__global float *num_bit_fail,\n" "__local int *act_s)\n" "{\n" "unsigned int ann_id = get_global_id(1);\n" "unsigned int input_id = get_global_id(0);\n" "unsigned int out_neuron_index = ann_id*sizes[1]+num_layers[ann_id]-1;\n" "unsigned int out_off = out_neuron_index*sizes[4]*sizes[5]+input_id;\n" "unsigned int num_neurons_l = num_neurons[out_neuron_index];\n" "unsigned int layer_off = sizes[2]*out_neuron_index;\n" "unsigned int n;\n" "unsigned int layer_o = 0;\n" "unsigned int bit_fail = 0;\n" "float MSE_value = 0.0f;\n" //Copy steepness & activation to shared mem "for(n = 0; n < num_neurons_l; n += get_local_size(0)) {\n" "unsigned int neuron_off = n+get_local_id(0);\n" "if (neuron_off < num_neurons_l)\n" "act_s[neuron_off] = activation[layer_off+neuron_off];\n" "}\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" //Don't use the extra threads "if (input_id >= sizes[5])\n" "return;\n" //Calcalate for all neurons "for(n = 0; n < num_neurons_l; ++n) {\n" "unsigned int num_outputs_l = num_outputs[layer_off+n];\n" "unsigned int act_out_off = (ann_id*sizes[4]+layer_o)*sizes[5]+input_id;\n" "unsigned int o;\n" //Calculate for all outputs "for(o = 0; o < num_outputs_l; ++o) {\n" "unsigned int out_index = out_off+(layer_o+o)*sizes[5];\n" "float neuron_diff = actual_outputs[act_out_off+o*sizes[5]] - outputs[out_index];\n" //Update MSE macro follows "switch (act_s[n]) {\n" "case %d:\n" "case %d:\n" "case %d:\n" "case %d:\n" "case %d:\n" "case %d:\n" "case %d:\n" "case %d:\n" "neuron_diff *= 0.5f;\n" "}\n" "MSE_value += neuron_diff * neuron_diff;\n" "if(fabs(neuron_diff) >= f_params[0])\n" "++bit_fail;\n" // "printf(\"neuron_diff MSE_value: %%10f %%10f\\n\", neuron_diff, MSE_value);\n" //Update error "if (sizes[9]) {\n" "if(neuron_diff < -.9999999f)\n" "neuron_diff = -17.0f;\n" "else if(neuron_diff > .9999999f)\n" "neuron_diff = 17.0f;\n" "else\n" "neuron_diff = log((1.0f + neuron_diff) / (1.0f - neuron_diff));\n" "}\n" // "printf(\"train_error out_index: %%10f %%5d\\n\", neuron_diff, out_index);\n" "train_errors[out_index] = neuron_diff;\n" //Don't update ann->training_params->num_MSE because it can be calculated later // "printf(\"(%%5d %%5d %%5d) train_errors actual_output neuron_value: %%10f %%10f %%10f\\n\", input_id, n, o, train_errors[out_index], actual_outputs[act_out_off+o*sizes[5]], outputs[out_index]);\n" "}\n" "layer_o += num_outputs_l;\n" "}\n" "unsigned int net_index = ann_id*sizes[5]+input_id;\n" "num_bit_fail[net_index] = bit_fail;\n" "MSE_values[net_index] = MSE_value;\n" "}\n" "float calc_act(float sum, float steepness, int act)\n" "{\n" "float max_sum;\n" "sum *= steepness;\n" "max_sum = 150.0f/steepness;\n" "if(sum > max_sum)\n" "sum = max_sum;\n" "else if(sum < -max_sum)\n" "sum = -max_sum;\n" "switch(act)\n" "{\n" "case %d:\n" "return sum;\n" "case %d:\n" "return ((sum < 0.0f) ? 0.0f : (sum > 1.0f) ? 1.0f : sum);\n" "case %d:\n" "return ((sum < -1.0f) ? -1.0f : (sum > 1.0f) ? 1.0f : sum);\n" "case %d:\n" "return " QUOTEME(fann_sigmoid_real(sum)) ";\n" "case %d:\n" "return " QUOTEME(fann_sigmoid_symmetric_real(sum)) ";\n" "case %d:\n" "return " QUOTEME(fann_stepwise(-2.64665293693542480469e+00f, -1.47221934795379638672e+00f, -5.49306154251098632812e-01f, 5.49306154251098632812e-01f, 1.47221934795379638672e+00f, 2.64665293693542480469e+00f, -9.90000009536743164062e-01f, -8.99999976158142089844e-01f, -5.00000000000000000000e-01f, 5.00000000000000000000e-01f, 8.99999976158142089844e-01f, 9.90000009536743164062e-01f, -1.0f, 1.0f, sum)) ";\n" "case %d:\n" "return " QUOTEME(fann_stepwise(-2.64665246009826660156e+00f, -1.47221946716308593750e+00f, -5.49306154251098632812e-01f, 5.49306154251098632812e-01f, 1.47221934795379638672e+00f, 2.64665293693542480469e+00f, 4.99999988824129104614e-03f, 5.00000007450580596924e-02f, 2.50000000000000000000e-01f, 7.50000000000000000000e-01f, 9.49999988079071044922e-01f, 9.95000004768371582031e-01f, 0.0f, 1.0f, sum)) ";\n" "case %d:\n" "return ((sum < 0.0f) ? 0.0f : 1.0f);\n" "case %d:\n" "return ((sum < 0.0f) ? -1.0f : 1.0f);\n" "case %d:\n" "return " QUOTEME(fann_gaussian_real(sum)) ";\n" "case %d:\n" "return " QUOTEME(fann_gaussian_symmetric_real(sum)) ";\n" "case %d:\n" "return " QUOTEME(fann_elliot_real(sum)) ";\n" "case %d:\n" "return " QUOTEME(fann_elliot_symmetric_real(sum)) ";\n" "case %d:\n" "return " QUOTEME(fann_sin_symmetric_real(sum)) ";\n" "case %d:\n" "return " QUOTEME(fann_cos_symmetric_real(sum)) ";\n" "case %d:\n" "return " QUOTEME(fann_sin_real(sum)) ";\n" "case %d:\n" "return " QUOTEME(fann_cos_real(sum)) ";\n" "case %d:\n" "return 0;\n" "}\n" "}\n" "__kernel void run(__constant unsigned int *sizes,\n" "__global float *f_params,\n" "__global unsigned int *num_layers,\n" "__global unsigned int *num_neurons,\n" "__global unsigned int *num_inputs,\n" "__global unsigned int *num_outputs,\n" "__global float *steepness,\n" "__global int *activation,\n" "__global float *weights,\n" "__global float *inputs,\n" "__global float *sums,\n" "__global float *outputs,\n" "__local float *steep_s,\n" "__local int *act_s,\n" "__local float *weights_s )\n" "{\n" "unsigned int input_id = get_global_id(0);\n" "unsigned int ann_id = get_global_id(1);\n" "unsigned int lid = get_local_id(0);\n" "unsigned int lsz = get_local_size(0);\n" "unsigned int l;\n" //Calculate for all layers "for(l = 0; l < num_layers[ann_id]; ++l) {\n" "unsigned int part_layer_off = ann_id*sizes[1]+l;\n" "unsigned int n;\n" "unsigned int num_neurons_l = num_neurons[part_layer_off];\n" "unsigned int n_layer_off = sizes[2]*part_layer_off;\n" "unsigned int o_layer_off = sizes[4]*part_layer_off;\n" "unsigned int output_off = o_layer_off;\n" //Copy steepness & activation to shared mem "barrier(CLK_LOCAL_MEM_FENCE);\n" "for(n = 0; n < num_neurons[part_layer_off]; n += lsz) {\n" "unsigned int neuron_num = n+lid;\n" "if (neuron_num < num_neurons[part_layer_off]){\n" "steep_s[neuron_num] = steepness[n_layer_off+neuron_num];\n" "act_s[neuron_num] = activation[n_layer_off+neuron_num];\n" "}\n" "}\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" //Calcalate for all neurons "for(n = 0; n < num_neurons[part_layer_off]; ++n) {\n" "unsigned int num_outputs_l = num_outputs[n_layer_off+n];\n" "unsigned int num_inputs_l = num_inputs[n_layer_off+n];\n" "unsigned int o;\n" //Calculate for all outputs "for(o = 0; o < num_outputs_l; ++o) {\n" "unsigned int i;\n" "float sum = 0.0f;\n" //Weight & sum data from the inputs & bias "for(i = 0; i < num_inputs_l; ++i) {\n" "float in_val;\n" "unsigned int weights_i = (sizes[3]*o+i) %% lsz;\n" //Load shared memory as appropriate "if (weights_i == 0) {\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "if (sizes[3]*o+i+lid < sizes[3]*num_outputs_l)\n" "weights_s[lid] = weights[output_off*sizes[3]+i+lid];\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" "}\n" //Don't overrun data "if (sizes[5] <= input_id)\n" "continue;\n" "if (i == num_inputs_l-1) {\n" //Bias "in_val = 1.0f;\n" // "printf(\"%%5d %%2d %%2d %%2d %%2d: %%15fCL %%15fCL\\n\", input_id, l, n, o ,i, weights_s[weights_i], in_val);\n" "} else if (l == 0) {\n" //Handle input from user "in_val = inputs[i*sizes[5]+input_id];\n" // "printf(\"%%5d %%2d %%2d %%2d %%2d: %%15fCL %%15fCL\\n\", input_id, l, n, o ,i, weights_s[weights_i], in_val);\n" "} else {\n" //Handle input from neurons "in_val = outputs[(o_layer_off-sizes[4]+i)*sizes[5]+input_id];\n" // "printf(\"%%5d %%2d %%2d %%2d %%2d: %%15fCL %%15fCL N:%%5d\\n\", input_id, l, n, o ,i, weights_s[weights_i], in_val, (o_layer_off-sizes[4]+i)*sizes[5]+input_id);\n" "}\n" //Weight & sum "sum += weights_s[weights_i]*in_val;\n" "}\n" //Don't overrun data "if (sizes[5] > input_id){\n" //Save into output data array "sums[output_off*sizes[5]+input_id] = sum;\n" "outputs[output_off*sizes[5]+input_id] = calc_act(sum, steep_s[n], act_s[n]);\n" // "printf(\"%%5d %%2d %%2d %%2d %%2d: %%15f %%15f N:%%5d\\n\", input_id, l, n, o ,i, sums[output_off*sizes[5]+input_id], outputs[output_off*sizes[5]+input_id], output_off*sizes[5]+input_id);\n" "}\n" "++output_off;\n" "}\n" "}\n" "}\n" "}" /* + remember to set ann->training_params->num_MSE on return + remember to init train errors like in fann_compute_MSE() + remember that the group size must be a power of two >= 64 or == 1 for reduce to work remember to set neuron->num_backprop_done on return */ "__kernel void train_batch(\n"//ANN structure "__constant unsigned int *sizes,\n" "__global float *f_params,\n" "__global unsigned int *num_layers,\n" "__global unsigned int *num_neurons,\n" "__global unsigned int *num_inputs,\n" "__global unsigned int *num_outputs,\n" //Network values "__global float *steepness,\n" "__global int *activation,\n" "__global float *weights,\n" //Per-run data "__global float *inputs,\n" "__global float *sums,\n" "__global float *outputs,\n" //Per-run training memory "__global float *train_errors,\n" "__global float *actual_outputs,\n" "__global float *MSE_values,\n" "__global float *num_bit_fail,\n" "__global float *weights_deltas,\n" //Shared areas "__local float *steep_s,\n" "__local int *act_s,\n" "__local float *weights_s,\n" "__local float *reduce_s)\n" "{\n" //Do the normal procedure of an epoch "run(sizes, f_params, num_layers, num_neurons, num_inputs, num_outputs,\n" "steepness, activation, weights, inputs, sums, outputs,\n" "steep_s, act_s, weights_s);\n" "compute_MSE(sizes, f_params, num_layers, num_neurons, num_outputs,\n" "activation, outputs, train_errors, actual_outputs,\n" "MSE_values, num_bit_fail, act_s);\n" "backpropagate_MSE(sizes, num_layers, num_neurons, num_inputs, num_outputs,\n" "steepness, activation, weights, inputs, sums,\n" "outputs, train_errors, weights_deltas,\n" "steep_s, act_s, weights_s, reduce_s);\n" "}\n"; //Insert enum values here because I can't seem to do it at compile time fin_program_src = calloc(128000, sizeof(char)); sprintf(fin_program_src, program_source, FANN_LINEAR, FANN_LINEAR_PIECE, FANN_LINEAR_PIECE_SYMMETRIC, FANN_SIGMOID, FANN_SIGMOID_STEPWISE, FANN_SIGMOID_SYMMETRIC, FANN_SIGMOID_SYMMETRIC_STEPWISE, FANN_GAUSSIAN, FANN_GAUSSIAN_SYMMETRIC, FANN_ELLIOT, FANN_ELLIOT_SYMMETRIC, FANN_SIN_SYMMETRIC, FANN_COS_SYMMETRIC, FANN_SIN, FANN_COS, FANN_THRESHOLD_SYMMETRIC, FANN_THRESHOLD, FANN_GAUSSIAN_STEPWISE, FANN_LINEAR_PIECE_SYMMETRIC, FANN_THRESHOLD_SYMMETRIC, FANN_SIGMOID_SYMMETRIC, FANN_SIGMOID_SYMMETRIC_STEPWISE, FANN_ELLIOT_SYMMETRIC, FANN_GAUSSIAN_SYMMETRIC, FANN_SIN_SYMMETRIC, FANN_COS_SYMMETRIC, FANN_LINEAR, FANN_LINEAR_PIECE, FANN_LINEAR_PIECE_SYMMETRIC, FANN_SIGMOID, FANN_SIGMOID_SYMMETRIC, FANN_SIGMOID_SYMMETRIC_STEPWISE, FANN_SIGMOID_STEPWISE, FANN_THRESHOLD, FANN_THRESHOLD_SYMMETRIC, FANN_GAUSSIAN, FANN_GAUSSIAN_SYMMETRIC, FANN_ELLIOT, FANN_ELLIOT_SYMMETRIC, FANN_SIN_SYMMETRIC, FANN_COS_SYMMETRIC, FANN_SIN, FANN_COS, FANN_GAUSSIAN_STEPWISE); program = clCreateProgramWithSource(context, 1, (const char**)&fin_program_src, NULL, &err); assert(err == CL_SUCCESS); err = clBuildProgram(program, 1, &device, "-cl-fast-relaxed-math -Werror", NULL, NULL); //Detailed debugging info if (err != CL_SUCCESS) { size_t len; char *buffer = (char *)calloc(128000, sizeof(char)); printf("Error: Failed to build program executable!\n"); printf("clBuildProgram return:\n"); if(err == CL_INVALID_PROGRAM) printf("CL_INVALID_PROGRAM\n"); else if(err == CL_INVALID_VALUE) printf("CL_INVALID_VALUE\n"); else if(err == CL_INVALID_BINARY) printf("CL_INVALID_BINARY\n"); else if(err == CL_INVALID_BUILD_OPTIONS) printf("CL_INVALID_BUILD_OPTIONS\n"); else if(err == CL_INVALID_OPERATION) printf("CL_INVALID_OPERATION\n"); else if(err == CL_COMPILER_NOT_AVAILABLE) printf("CL_COMPILER_NOT_AVAILABLE\n"); else if(err == CL_BUILD_PROGRAM_FAILURE) printf("CL_BUILD_PROGRAM_FAILURE\n"); else if(err == CL_OUT_OF_HOST_MEMORY) printf("CL_OUT_OF_HOST_MEMORY\n"); err = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, 128000*sizeof(char), buffer, &len); assert(err == CL_SUCCESS); printf("Build Status:\n"); if(buffer[0] == CL_BUILD_NONE) printf("CL_BUILD_NONE\n"); else if(buffer[0] == CL_BUILD_ERROR) printf("CL_BUILD_ERROR\n"); else if(buffer[0] == CL_BUILD_SUCCESS) printf("CL_BUILD_SUCCESS\n"); else if(buffer[0] == CL_BUILD_IN_PROGRESS) printf("CL_BUILD_IN_PROGRESS\n"); err = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 128000*sizeof(char), buffer, &len); printf("Get Build Info:\n"); switch (err) { case CL_INVALID_DEVICE: printf("CL_INVALID_DEVICE\n"); break; case CL_INVALID_VALUE: printf("CL_INVALID_VALUE\n"); break; case CL_INVALID_PROGRAM: printf("CL_INVALID_PROGRAM\n"); break; } printf("Build Info:\n%s\nProgram Source:\n%s\n", buffer, fin_program_src); free(buffer); exit(1); } kernel = clCreateKernel(program, kern_name, &err); clReleaseProgram(program); free(fin_program_src); return kernel; }
void parse(const char *buf) { uint16_t count = 0; char *loop; char ch; while ((ch = *buf++)) { switch (ch) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': x = ch - '0'; while (*buf >= '0' && *buf <= '9') { x = x*10 + (*buf++ - '0'); } break; case 'p': send_num(x); send_str(PSTR("\r\n")); break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': port = ch - 'a'; pin = x % 8; break; case 'i': *(uint8_t *)(0x21 + port * 3) &= ~(1 << pin); // direction = input x = *(uint8_t *)(0x20 + port * 3) & (1 << pin) ? 1 : 0; // x = pin break; case 'o': if (x % 2) { *(uint8_t *)(0x22 + port * 3) |= (1 << pin); // pin = hi } else { *(uint8_t *)(0x22 + port * 3) &= ~(1 << pin); // pin = low } *(uint8_t *)(0x21 + port * 3) |= (1 << pin); // direction = output break; case 'm': _delay_ms(x); break; case 'u': _delay_loop_2(x*(F_CPU/4000000UL)); break; case '{': count = x; loop = buf; while ((ch = *buf++) && ch != '}') { } case '}': if (count) { count--; buf = loop; } break; case 'k': x = count; break; case '_': while ((ch = *buf++) && ch != '_') { usb_serial_putchar(ch); } send_str(PSTR("\r\n")); break; case 's': x = analogRead(x); break; case 'v': #define QUOTEME_(x) #x #define QUOTEME(x) QUOTEME_(x) send_str(PSTR(QUOTEME(MCU))); send_str(PSTR("\r\n")); break; case 'h': send_str(PSTR("0-9<num>\tenter number\r\n<num>p\t\tprint number\r\n<num>a-f<pin>\tselect pin\r\n<pin>i<num>\tinput\r\n<pin><num>o\toutput\r\n<num>m\t\tmsec delay\r\n<num>u\t\tusec delay\r\n<num>{}\t\trepeat\r\nk<num>\t\tloop count\r\n_<words>_\tprint words\r\n<num>s<num>\tanalog sample\r\nv\t\tprint version\r\nh\t\tprint help\r\n")); break; } } }