Beispiel #1
0
    int UdpSocket::send(Address addr, unsigned short port, const char* bytes, int len) {
        if (addr.getType() != addr_type)
            return 0;
        
        char addr_buf[sizeof(sockaddr_in6)];
        memset(addr_buf, 0, sizeof(sockaddr_in6));
        
        if (addr_type == Address::IPV4) {
#if !defined(__ANDROID__)
            ((sockaddr_in*)addr_buf)->sin_len = sizeof(sockaddr_in);
#endif
            ((sockaddr_in*)addr_buf)->sin_family = AF_INET;
            memcpy(&((sockaddr_in*)addr_buf)->sin_addr, addr.getAddr(), sizeof(in_addr));
            ((sockaddr_in*)addr_buf)->sin_port = htons(port);
        } else {
#if !defined(__ANDROID__)
            ((sockaddr_in6*)addr_buf)->sin6_len = sizeof(sockaddr_in6);
#endif
            ((sockaddr_in6*)addr_buf)->sin6_family = AF_INET6;
            memcpy(&((sockaddr_in6*)addr_buf)->sin6_addr, addr.getAddr(), sizeof(in6_addr));
            ((sockaddr_in*)addr_buf)->sin_port = htons(port);
        }
        
        return (int)sendto(fd, bytes, len, 0, (sockaddr*)addr_buf, addr_type == Address::IPV4 ? sizeof(sockaddr_in) : sizeof(sockaddr_in6));
    }
DominatingValue<RValue>::saved_type
DominatingValue<RValue>::saved_type::save(CodeGenFunction &CGF, RValue rv) {
  if (rv.isScalar()) {
    llvm::Value *V = rv.getScalarVal();

    // These automatically dominate and don't need to be saved.
    if (!DominatingLLVMValue::needsSaving(V))
      return saved_type(V, ScalarLiteral);

    // Everything else needs an alloca.
    Address addr =
      CGF.CreateDefaultAlignTempAlloca(V->getType(), "saved-rvalue");
    CGF.Builder.CreateStore(V, addr);
    return saved_type(addr.getPointer(), ScalarAddress);
  }

  if (rv.isComplex()) {
    CodeGenFunction::ComplexPairTy V = rv.getComplexVal();
    llvm::Type *ComplexTy =
      llvm::StructType::get(V.first->getType(), V.second->getType(),
                            (void*) nullptr);
    Address addr = CGF.CreateDefaultAlignTempAlloca(ComplexTy, "saved-complex");
    CGF.Builder.CreateStore(V.first,
                            CGF.Builder.CreateStructGEP(addr, 0, CharUnits()));
    CharUnits offset = CharUnits::fromQuantity(
               CGF.CGM.getDataLayout().getTypeAllocSize(V.first->getType()));
    CGF.Builder.CreateStore(V.second,
                            CGF.Builder.CreateStructGEP(addr, 1, offset));
    return saved_type(addr.getPointer(), ComplexAddress);
  }

  assert(rv.isAggregate());
  Address V = rv.getAggregateAddress(); // TODO: volatile?
  if (!DominatingLLVMValue::needsSaving(V.getPointer()))
    return saved_type(V.getPointer(), AggregateLiteral,
                      V.getAlignment().getQuantity());

  Address addr =
    CGF.CreateTempAlloca(V.getType(), CGF.getPointerAlign(), "saved-rvalue");
  CGF.Builder.CreateStore(V.getPointer(), addr);
  return saved_type(addr.getPointer(), AggregateAddress,
                    V.getAlignment().getQuantity());
}
Address GMachine::executeMain()
{
	SuperCombinator* main = getCombinator("main");
	int mainIndex = -1;
	for (size_t ii = 0; ii < globals.size(); ++ii)
	{
		Address addr = globals[ii];
		if (addr.getType() == GLOBAL && addr.getNode()->global == main)
		{
			mainIndex = ii;
			break;
		}
	}
	assert(mainIndex != -1);
	SuperCombinator sc;
	sc.name == "__main";
	sc.arity = 0;
	sc.instructions = std::vector<GInstruction> { GInstruction(GOP::PUSH_GLOBAL, mainIndex), GInstruction(GOP::EVAL) };
	return evaluate(sc);
}
void GMachine::execute(GEnvironment& environment)
{
	const std::vector<GInstruction>& code = environment.combinator->instructions;
	StackFrame<Address>& stack = environment.stack;

	for (size_t index = 0; index < code.size(); index++)
	{
		const GInstruction& instruction = code[index];
		switch (instruction.op)
		{
		case GOP::ALLOC:
			{
				for (int i = 0; i < instruction.value; i++)
				{
					heap.push_back(Node(nullptr));
					environment.stack.push(Address::indirection(&heap.back()));
				}
			}
			break;
		case GOP::EVAL:
			{
				static SuperCombinator unwind { "__uniwnd", Type(), 0, std::vector<GInstruction>{ GInstruction(GOP::UNWIND) } };
				GEnvironment child = environment.child(&unwind);
				child.stack.push(environment.stack.top());
				execute(child);
				environment.stack.top() = child.stack.top();
			}
			break;
		case GOP::MKAP:
			{
				Address func = environment.stack.top();
				environment.stack.pop();
				Address arg = environment.stack.top();
				heap.push_back(Node(func, arg));
				environment.stack.top() = Address::application(&heap.back());
			}
			break;
		case GOP::PACK:
			{
				int tag = instruction.value & (0xFFFF);//Low two bytes
				int arity = instruction.value >> 16;//High two bytes
				heap.push_back(Node(tag, new Address[arity+1]));
				Node& ctor = heap.back();
				for (int ii = 0; ii < arity; ii++)
				{
					ctor.constructor.arguments[ii] = stack.pop();
				}
				ctor.constructor.arguments[arity + 1] = Address::indirection(nullptr);//Use as end of this constructor
				stack.push(Address::constructor(&ctor));
			}
			break;
		case GOP::SPLIT:
			{
				Address top = stack.pop();
				assert(top.getType() == CONSTRUCTOR);
				ConstructorNode& ctor = top.getNode()->constructor;
				for (int ii = 0; ii < instruction.value; ii++)
				{
					assert(ctor.arguments[ii].getType() != NodeType::INDIRECTION || ctor.arguments[ii].getNode() != nullptr);
					stack.push(ctor.arguments[ii]);
				}
			}
			break;
		case GOP::CASEJUMP:
			{
				Address top = stack.top();
				assert(top.getType() == CONSTRUCTOR);
				ConstructorNode& ctor = top.getNode()->constructor;
				if (ctor.tag != instruction.value)
					index++;//Skip the next instruction which is the jump instruction
			}
			break;
		case GOP::JUMP:
			index = instruction.value - 1;
			break;
		case GOP::POP:
			for (int i = 0; i < instruction.value; i++)
			{
				environment.stack.pop();
			}
			break;
		case GOP::PUSH:
			{
				Address addr = environment.stack[instruction.value];
				environment.stack.push(addr);
			}
			break;
		case GOP::PUSH_DICTIONARY_MEMBER:
			{
				assert(stack.base().getType() == NodeType::CONSTRUCTOR);//Must be instance dictionary
				ConstructorNode& ctor = stack.base().getNode()->constructor;
				Address& func = ctor.arguments[instruction.value];
				stack.push(func);
			}
			break;
		case GOP::PUSH_GLOBAL:
			{
				Address addr = globals.at(instruction.value);
				environment.stack.push(addr);
			}
			break;
		case GOP::PUSH_INT:
			{
				heap.push_back(Node(instruction.value));
				environment.stack.push(Address::number(&heap.back()));
			}
			break;
		case GOP::PUSH_DOUBLE:
			{
				heap.push_back(Node(instruction.doubleValue));
				environment.stack.push(Address::numberDouble(&heap.back()));
			}
			break;
		case GOP::SLIDE:
			{
				slide(environment, instruction);
			}
			break;
		case GOP::UNWIND:
			{
				Address top = environment.stack.top();
				switch (top.getType())
				{
				case NUMBER:
					break;
				case APPLICATION:
					{
						Node& n = *top.getNode();
						environment.stack.push(n.apply.func);
						--index;//Redo the unwind instruction
					}
					break;
				case FUNCTION_POINTER:
					{
						int arity = top.getNode()->function.args;
						if (stack.stackSize() - 1 < size_t(arity))
						{
							while (stack.stackSize() > 1)
							{
								stack.pop();
							}
						}
						else
						{
							size_t ii = environment.stack.stackSize() - arity - 1;
							for (; ii < environment.stack.stackSize() - 1; ii++)
							{
								Address& addr = environment.stack[ii];
								assert(addr.getType() == APPLICATION);
								addr = addr.getNode()->apply.arg;
							}
							t_ffi_func func = top.getNode()->function.ptr;
							assert(func != nullptr);
							StackFrame<Address> newStack = stack.makeChildFrame(arity + 1);
							func(this, &newStack);
							Address result = newStack.top();
							for (int i = 0; i < arity; i++)
								environment.stack.pop();
							environment.stack.push(result);
						}
					}
					break;
				case GLOBAL:
					{
						SuperCombinator* comb = top.getNode()->global;
						if (environment.stack.stackSize() - 1 < size_t(comb->arity))
						{
							while (stack.stackSize() > 1)
							{
								stack.pop();
							}
						}
						else
						{
							//Before calling the function, replace all applications on the stack with the actual arguments
							//This gives faster access to a functions arguments when using PUSH
							size_t ii = environment.stack.stackSize() - comb->arity - 1;
							for (; ii < environment.stack.stackSize() - 1; ii++)
							{
								Address& addr = environment.stack[ii];
								assert(addr.getType() == APPLICATION);
								addr = addr.getNode()->apply.arg;
							}

							GEnvironment child = environment.child(comb);
							if (debug)
							{
								std::cerr << "Executing function '" << comb->name << "'" << std::endl;
								std::cerr << "Arguments { ";
								for (size_t i = 0; i < child.stack.stackSize(); i++)
								{
									std::cerr << child.stack[i];
								}
								std::cerr << " }" << std::endl;
							}
							execute(child);
							Address result = child.stack.top();
							for (int i = 0; i < comb->arity; i++)
								environment.stack.pop();
							environment.stack.push(result);
						}
					}
					break;
				case INDIRECTION:
					{
						environment.stack.top() = top.getNode()->indirection;
						--index;//Redo the unwind instruction
					}
					break;
				default:
					break;
				}
			}
			break;
		case GOP::UPDATE:
			{
				Address top = environment.stack.top();
				heap.push_back(Node(top));
				environment.stack[instruction.value] = Address::indirection(&heap.back());
			}
			break;
#define BINOP2(op, opname) \
		case GOP:: opname:\
			{\
			Address rhs = environment.stack.pop(); \
			Address lhs = environment.stack.top(); \
			int result = lhs.getNode()->number op rhs.getNode()->number; \
			heap.push_back(Node(result)); \
			environment.stack.top() = Address::number(&heap.back()); \
			}\
            break;
#define BINOP(f, name) case GOP:: name: binopInt<f>(environment, heap); break;

			BINOP(add<int>, ADD)
			BINOP(subtract<int>, SUBTRACT)
			BINOP(multiply<int>, MULTIPLY)
			BINOP(divide<int>, DIVIDE)
			BINOP(remainder<int>, REMAINDER)
			
#define BINOP_DOUBLE(f, name) case GOP:: name: binopDouble<f>(environment, heap); break;

			BINOP_DOUBLE(add<double>, ADD_DOUBLE)
			BINOP_DOUBLE(subtract<double>, SUBTRACT_DOUBLE)
			BINOP_DOUBLE(multiply<double>, MULTIPLY_DOUBLE)
			BINOP_DOUBLE(divide<double>, DIVIDE_DOUBLE)

#undef BINOP_DOUBLE
		case GOP::NEGATE:
			{
				Address x = environment.stack.top();
				heap.push_back(Node(-x.getNode()->number));
				environment.stack.top() = Address::number(&heap.back());
			}
			break;

			BINOP2(== , COMPARE_EQ)
			BINOP2(!= , COMPARE_NEQ)
			BINOP2(> , COMPARE_GT)
			BINOP2(>=, COMPARE_GE)
			BINOP2(< , COMPARE_LT)
			BINOP2(<=, COMPARE_LE)

#undef BINOP
#undef BINOP2

		default:
			std::cout << "Unimplemented instruction " << int(code[index].op) << std::endl;
			break;
		}
	}
	if (debug)
	{
		std::cerr << "Returning '" << stack.top() << "' from '" << environment.combinator->name << "'" << std::endl;
	}
}
Beispiel #5
0
// CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local
// array and kernels are launched using cudaLaunchKernel().
void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
                                            FunctionArgList &Args) {
  // Build the shadow stack entry at the very start of the function.

  // Calculate amount of space we will need for all arguments.  If we have no
  // args, allocate a single pointer so we still have a valid pointer to the
  // argument array that we can pass to runtime, even if it will be unused.
  Address KernelArgs = CGF.CreateTempAlloca(
      VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args",
      llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size())));
  // Store pointers to the arguments in a locally allocated launch_args.
  for (unsigned i = 0; i < Args.size(); ++i) {
    llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer();
    llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy);
    CGF.Builder.CreateDefaultAlignedStore(
        VoidVarPtr, CGF.Builder.CreateConstGEP1_32(KernelArgs.getPointer(), i));
  }

  llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");

  // Lookup cudaLaunchKernel function.
  // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
  //                              void **args, size_t sharedMem,
  //                              cudaStream_t stream);
  TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
  DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
  IdentifierInfo &cudaLaunchKernelII =
      CGM.getContext().Idents.get("cudaLaunchKernel");
  FunctionDecl *cudaLaunchKernelFD = nullptr;
  for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) {
    if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result))
      cudaLaunchKernelFD = FD;
  }

  if (cudaLaunchKernelFD == nullptr) {
    CGM.Error(CGF.CurFuncDecl->getLocation(),
              "Can't find declaration for cudaLaunchKernel()");
    return;
  }
  // Create temporary dim3 grid_dim, block_dim.
  ParmVarDecl *GridDimParam = cudaLaunchKernelFD->getParamDecl(1);
  QualType Dim3Ty = GridDimParam->getType();
  Address GridDim =
      CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "grid_dim");
  Address BlockDim =
      CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim");
  Address ShmemSize =
      CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size");
  Address Stream =
      CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream");
  llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction(
      llvm::FunctionType::get(IntTy,
                              {/*gridDim=*/GridDim.getType(),
                               /*blockDim=*/BlockDim.getType(),
                               /*ShmemSize=*/ShmemSize.getType(),
                               /*Stream=*/Stream.getType()},
                              /*isVarArg=*/false),
      "__cudaPopCallConfiguration");

  CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn,
                              {GridDim.getPointer(), BlockDim.getPointer(),
                               ShmemSize.getPointer(), Stream.getPointer()});

  // Emit the call to cudaLaunch
  llvm::Value *Kernel = CGF.Builder.CreatePointerCast(CGF.CurFn, VoidPtrTy);
  CallArgList LaunchKernelArgs;
  LaunchKernelArgs.add(RValue::get(Kernel),
                       cudaLaunchKernelFD->getParamDecl(0)->getType());
  LaunchKernelArgs.add(RValue::getAggregate(GridDim), Dim3Ty);
  LaunchKernelArgs.add(RValue::getAggregate(BlockDim), Dim3Ty);
  LaunchKernelArgs.add(RValue::get(KernelArgs.getPointer()),
                       cudaLaunchKernelFD->getParamDecl(3)->getType());
  LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(ShmemSize)),
                       cudaLaunchKernelFD->getParamDecl(4)->getType());
  LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(Stream)),
                       cudaLaunchKernelFD->getParamDecl(5)->getType());

  QualType QT = cudaLaunchKernelFD->getType();
  QualType CQT = QT.getCanonicalType();
  llvm::Type *Ty = CGM.getTypes().ConvertType(CQT);
  llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(Ty);

  const CGFunctionInfo &FI =
      CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD);
  llvm::FunctionCallee cudaLaunchKernelFn =
      CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel");
  CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
               LaunchKernelArgs);
  CGF.EmitBranch(EndBlock);

  CGF.EmitBlock(EndBlock);
}