int main(int argc, char** argv){ NNet network; network.setSeed(0); NNet::Layer* inputLayer = new NNet::Layer(2); network.addLayer(inputLayer); network.addLayer(NNet::sigmoidFunc, 2000, -IW, IW); network.addLayer(NNet::sigmoidFunc, 2000, -IW, IW); network.addLayer(NNet::sigmoidFunc, 2000, -IW, IW); network.addLayer(NNet::sigmoidFunc, 2, -IW, IW); NNet::Layer* outputLayer = network.backLayer(0); // normally we would train the network before executing it, for the // sake of demonstration, this step is omitted. See examples/nnet_train. // test the performance by executing it on the C++ side double t1 = NSys::now(); inputLayer->neuron(0)->input(0.25); inputLayer->neuron(1)->input(0.70); double dt = NSys::now() - t1; cout << "c++ time is: " << dt << endl; cout << "output[0]= " << outputLayer->neuron(0)->output() << endl; cout << "output[1]= " << outputLayer->neuron(1)->output() << endl; // an NNModule holds compiled neural networks NNModule module; t1 = NSys::now(); // compile the neural network to run with 8 threads, giving it the // name "test" module.compile("test", network, 8); dt = NSys::now() - t1; cout << "compile time is: " << dt << endl; // the input and output layers must be a NPLVector of double's and // of the length which matches the C++ side network as constructed // above typedef NPLVector<double, 2> Vec2; Vec2 inputs; inputs[0] = 0.25; inputs[1] = 0.70; Vec2 outputs; t1 = NSys::now(); // execute the network module.run("test", &inputs, &outputs); dt = NSys::now() - t1; cout << "run time is: " << dt << endl; // verify that these outputs match our first outputs cout << "outputs: " << outputs << endl; // delete the resources used by the compiled neural network named "test" module.remove("test"); return 0; }
bool compile(const nstr& name, NNet& network, size_t threads){ RunNetwork* runNetwork = new RunNetwork; NNet::Layer* inputLayer = network.layer(0); size_t numLayers = network.numLayers(); RunLayer* lastRunLayer = 0; for(size_t l = 1; l < numLayers; ++l){ RunLayer* runLayer = new RunLayer; runLayer->queue = new Queue(threads); size_t inputLayerSize = inputLayer->size(); NNet::Layer* layer = network.layer(l); size_t layerSize = layer->size(); if(l > 1){ runLayer->inputVecStart = lastRunLayer->outputVecStart; runLayer->inputVec = lastRunLayer->outputVec; } if(l < numLayers - 1){ double* outputVecPtrStart; double* outputVecPtr; allocVector(layerSize, &outputVecPtrStart, &outputVecPtr); runLayer->outputVecStart = outputVecPtrStart; runLayer->outputVec = outputVecPtr; } TypeVec args; args.push_back(getPointer(doubleVecType(inputLayerSize))); args.push_back(getPointer(doubleVecType(inputLayerSize))); args.push_back(getPointer(doubleType())); args.push_back(int32Type()); FunctionType* ft = FunctionType::get(voidType(), args, false); Function* f = Function::Create(ft, Function::ExternalLinkage, name.c_str(), &module_); BasicBlock* entry = BasicBlock::Create(context_, "entry", f); builder_.SetInsertPoint(entry); auto aitr = f->arg_begin(); Value* inputVecPtr = aitr; inputVecPtr->setName("input_vec_ptr"); ++aitr; Value* weightVecPtr = aitr; weightVecPtr->setName("weight_vec_ptr"); ++aitr; Value* outputVecPtr = aitr; outputVecPtr->setName("output_vec_ptr"); ++aitr; Value* outputIndex = aitr; outputIndex->setName("output_index"); Value* inputVec = builder_.CreateLoad(inputVecPtr, "input_vec"); Value* weightVec = builder_.CreateLoad(weightVecPtr, "weight_vec"); Value* mulVec = builder_.CreateFMul(inputVec, weightVec, "mul_vec"); Value* sumActivation = builder_.CreateExtractElement(mulVec, getInt32(0), "sum_elem"); for(size_t i = 1; i < inputLayerSize; ++i){ Value* elem = builder_.CreateExtractElement(mulVec, getInt32(i), "sum_elem"); sumActivation = builder_.CreateFAdd(sumActivation, elem, "sum_activation"); } Value* output = getActivationOutput(layer->neuron(0), sumActivation); Value* outputElement = builder_.CreateGEP(outputVecPtr, outputIndex, "out_elem"); builder_.CreateStore(output, outputElement); builder_.CreateRetVoid(); runLayer->f = f; runLayer->fp = (void (*)(void*, void*, void*, int)) engine_->getPointerToFunction(f); for(size_t j = 0; j < layerSize; ++j){ NNet::Neuron* nj = layer->neuron(j); RunNeuron* runNeuron = new RunNeuron; runNeuron->layer = runLayer; runNeuron->outputIndex = j; double* weightVecPtrStart; double* weightVecPtr; allocVector(inputLayerSize, &weightVecPtrStart, &weightVecPtr); runNeuron->weightVecStart = weightVecPtrStart; runNeuron->weightVec = weightVecPtr; for(size_t i = 0; i < inputLayerSize; ++i){ NNet::Neuron* ni = inputLayer->neuron(i); weightVecPtr[i] = nj->weight(ni); } runLayer->queue->add(runNeuron); } runNetwork->layerVec.push_back(runLayer); inputLayer = layer; lastRunLayer = runLayer; } networkMap_.insert(make_pair(name, runNetwork)); return true; }