示例#1
0
/**
 * @brief   ChibiOS/RT initialization.
 * @details After executing this function the current instructions stream
 *          becomes the main thread.
 * @pre     Interrupts must be still disabled when @p chSysInit() is invoked
 *          and are internally enabled.
 * @post    The main thread is created with priority @p NORMALPRIO.
 * @note    This function has special, architecture-dependent, requirements,
 *          see the notes into the various port reference manuals.
 *
 * @special
 */
void chSysInit(void) {
  static Thread mainthread;

  port_init();
  scheduler_init();
  vt_init();
#if CH_USE_MEMCORE
  core_init();
#endif
#if CH_USE_HEAP
  heap_init();
#endif
#if CH_DBG_ENABLE_TRACE
  trace_init();
#endif

  /* Now this instructions flow becomes the main thread.*/
  setcurrp(_thread_init(&mainthread, NORMALPRIO));
  currp->p_state = THD_STATE_CURRENT;
  chSysEnable();

#if !CH_NO_IDLE_THREAD
  /* This thread has the lowest priority in the system, its role is just to
     serve interrupts in its context while keeping the lowest energy saving
     mode compatible with the system status.*/
  chThdCreateStatic(_idle_thread_wa, sizeof(_idle_thread_wa), IDLEPRIO,
                    (tfunc_t)_idle_thread, NULL);
#endif
}
示例#2
0
void supervisor_init(void)
{
	trace_init();

	thinkos_thread_create_inf((void *)supervisor_task, (void *)NULL,
							  &supervisor_inf);
}
int mxosrvr_init_seabed_trace_dll()
{
	bool bret = true;

	const char *gp_mxosrvr_trace_filename         = "mxosrvr_trace_";
	const char *gp_mxosrvr_env_trace_enable       = "MXOSRVR_TRACE_ENABLE";	 // enable trace
	const char *gp_mxosrvr_env_trace_ems          = "MXOSRVR_TRACE_EMS";     // trace what used to be sent to the legacy ems collector
	const char *gp_mxosrvr_env_trace_legacy       = "MXOSRVR_TRACE_LEGACY";
	bool        gv_mxosrvr_trace_enable = false;

	msg_getenv_bool(gp_mxosrvr_env_trace_enable, &gv_mxosrvr_trace_enable);

	if(gv_mxosrvr_trace_enable)
	{
		msg_getenv_bool(gp_mxosrvr_env_trace_ems, &gv_trace_ems_dll);
		msg_getenv_bool(gp_mxosrvr_env_trace_legacy, &gv_trace_legacy_dll);

		trace_init((char*)gp_mxosrvr_trace_filename,
                  true,
                  NULL,
                  false);

	}
	return 0;
}
示例#4
0
void init(int argc, char* argv[]) {
    {extern void input_init(int, char *[]); input_init(argc, argv);}
    {extern void main_init(int, char *[]); main_init(argc, argv);}
    {extern void prof_init(int, char *[]); prof_init(argc, argv);}
    {extern void trace_init(int, char *[]); trace_init(argc, argv);}
    {extern void type_init(int, char *[]); type_init(argc, argv);}
}
void app_start(int, char **)
{
    // set the baud rate for output printing
	get_stdio_serial().baud(YOTTA_CFG_K64F_BORDER_ROUTER_BAUD);

    // set heap size and memory error handler for this application
    ns_dyn_mem_init(app_stack_heap, APP_DEFINED_HEAP_SIZE, app_heap_error_handler, 0);

    trace_init(); // set up the tracing library
    set_trace_print_function(trace_printer);
    set_trace_config(TRACE_MODE_COLOR | TRACE_ACTIVE_LEVEL_DEBUG | TRACE_CARRIAGE_RETURN);

    const char *mac_src = STR(YOTTA_CFG_K64F_BORDER_ROUTER_BACKHAUL_MAC_SRC);

    if (strcmp(mac_src, "BOARD") == 0) {
        /* Setting the MAC Address from UID (A yotta function)
         * Takes UID Mid low and UID low and shuffles them around. */
        mbed_mac_address((char *)mac);
    } else if (strcmp(mac_src, "CONFIG") == 0) {
        /* MAC is defined by the user through yotta configuration */
        const uint8_t mac48[] = YOTTA_CFG_K64F_BORDER_ROUTER_BACKHAUL_MAC;

        for (uint32_t i = 0; i < sizeof(mac); ++i) {
            mac[i] = mac48[i];
        }
    }

    // run LED toggler in the Minar scheduler
    minar::Scheduler::postCallback(mbed::util::FunctionPointer0<void>
                                   (toggle_led1).bind()).period(minar::milliseconds(500));

    tr_info("Starting K64F border router...");
    border_router_start();
}
示例#6
0
int
trace_enter(int fd, char *line, int closed)
{
	struct trace *tmp;
	int res;

	assert(fd >= 0);

	if (trace_refs == NULL || fd >= trace_refsize) {
		res = trace_init(fd);
		if (res == -1)
			goto error;
	}

	if ((tmp = TAILQ_LAST(trace_refs[fd], traceq)) != NULL) {
		if (tmp->closed)
			trace_free(fd);
	}

	if ((tmp = malloc(sizeof(struct trace))) == NULL)
		goto error;

	tmp->line = line;
	tmp->closed = closed;
	TAILQ_INSERT_TAIL(trace_refs[fd], tmp, next);

	return (0);

 error:
	free(line);
	return (-1);
}
示例#7
0
int main() {

    printf("============== Before allocating blocks ==============\n");
    display_mallinfo();

    MbedClient mbed_client;

    m2mclient = &mbed_client;

    trace_init();
    set_trace_print_function( trace_printer );
    set_trace_config(TRACE_MODE_COLOR|TRACE_ACTIVE_LEVEL_DEBUG|TRACE_CARRIAGE_RETURN);

    signal(SIGINT, (signalhandler_t)ctrl_c_handle_function);

    bool result = mbed_client.create_interface();
    if(true == result) {
        printf("\nInterface created\n");
    }
    result = mbed_client.create_bootstrap_object();
    if(true == result) {
        printf("Bootstrap object created");
    }

    result = mbed_client.create_register_object();
    if(true == result) {
        printf("Register object created");
    }

    result = mbed_client.create_device_object();
    if(true == result) {
        printf("\nDevice object created !!\n");
    }

    result = mbed_client.create_generic_object();

    if(true == result) {
        printf("\nGeneric object created\n");
    }

//    printf("Bootstrapping endpoint\n");
//    mbed_client.test_bootstrap();

    printf("Registering endpoint\n");
    mbed_client.test_register();


    pthread_create(&bootstrap_thread, NULL, &wait_for_bootstrap, (void*) &mbed_client);
    pthread_create(&observation_thread, NULL, &send_observation, (void*) &mbed_client);
    pthread_create(&unregister_thread, NULL, &wait_for_unregister, (void*) &mbed_client);

    pthread_join(bootstrap_thread, NULL);
    pthread_join(unregister_thread, NULL);
    pthread_join(observation_thread, NULL);

    printf("\n============== After freeing blocks ==============\n");
    display_mallinfo();

    exit(EXIT_SUCCESS);
}
示例#8
0
文件: sync.c 项目: eieio/am33x-cm3
void init_m3_state_machine(void)
{
	int i = 0;

	/* Flush out NVIC interrupts */
	for (i=0; i<AM335X_NUM_EXT_INTERRUPTS; i++)
	{
		nvic_disable_irq(i);
		nvic_clear_irq(i);
	}

	msg_init();

	trace_init();

	pm_init();

	/* Enable only the MBX IRQ */
	nvic_enable_irq(AM335X_IRQ_MBINT0);

	/*
	 * In the remote case where we disabled the MPU CLOCK
	 * enable it again, no harm in writing to the reg
	 * even if this was not needed
	 */
	mpu_enable();
	a8_notify(CMD_STAT_PASS);
}
示例#9
0
static int initr_trace(void)
{
#ifdef CONFIG_TRACE
	trace_init(gd->trace_buff, CONFIG_TRACE_BUFFER_SIZE);
#endif

	return 0;
}
示例#10
0
void EXACT_XACTION_BEGIN(void) {
#ifdef COUNT_OPS
  tx = 0;
#else
  if (trace==NULL) trace_init();
  fprintf(trace, "x\n");
#endif
}
示例#11
0
void EXACT_XACTION_END(void) {
  if (trace==NULL) trace_init();
#ifdef COUNT_OPS
  fprintf(trace, "%d\n", tx);
#else
  fprintf(trace, "X\n");
#endif
}
示例#12
0
int main(int argc, char ** argv)
{
	usb_cdc_class_t * cdc;

	DCC_LOG_INIT();
	DCC_LOG_CONNECT();

	DCC_LOG(LOG_TRACE, "1. cm3_udelay_calibrate()");
	cm3_udelay_calibrate();

	DCC_LOG(LOG_TRACE, "2. thinkos_init()");
	thinkos_init(THINKOS_OPT_PRIORITY(8) | THINKOS_OPT_ID(7));

	DCC_LOG(LOG_TRACE, "3. io_init()");
	io_init();

	DCC_LOG(LOG_TRACE, "4. external_bus_init()");
	external_bus_init();

	DCC_LOG(LOG_TRACE, "5. stdio_init()");
	stdio_init();
	printf("\n---\n");

	DCC_LOG(LOG_TRACE, "6. trace_init()");
	trace_init();

	DCC_LOG(LOG_TRACE, "7. env_init()");
	env_init();

	/* create some threads */
	DCC_LOG(LOG_TRACE, "8. monitor_init()");
	monitor_init();

	DCC_LOG(LOG_TRACE, "9. watchdog_init()");
	watchdog_init();

	DCC_LOG(LOG_TRACE, "10. net_init()");
	net_init();

	DCC_LOG(LOG_TRACE, "11. pkt_xmt_init()");
	pkt_xmt_init();
	net_pkt_mode(true);

	DCC_LOG(LOG_TRACE, "12. console_shell_init()");
	console_shell_init();

	DCC_LOG(LOG_TRACE, "13. usb_cdc_init()");
	usb_cdc_sn_set(*((uint64_t *)STM32F_UID));
	cdc = usb_cdc_init(&stm32f_otg_fs_dev, 
					   cdc_acm_def_str, 
					   cdc_acm_def_strcnt);

	DCC_LOG(LOG_TRACE, "14. usb_shell()");
	for (;;) {
		usb_shell(cdc);
	}
}
示例#13
0
void TA(EXACT_traceWrite)(struct oobj *obj, int offset, int istran) {
  VALUETYPE *ptr = (VALUETYPE *)(FIELDBASE(obj) + offset);
#ifdef COUNT_OPS
  if (istran) tx++;
#else
  if (trace==NULL) trace_init();
  fprintf(trace, "%c %p %d %d\n", istran ? 'w' : 'W', ptr, (int)sizeof(*ptr),
	  (int)FNI_ObjectSize(obj));
#endif
}
示例#14
0
文件: trace.c 项目: saucjedi/nagi
u8 *cmd_trace_on(u8 *c)
{
	if (trace_state != 0)
		return c + 1;
	else
	{
		trace_init();
		return c;
	}
}
示例#15
0
void *malloc(size_t s) {
  if(!marker) trace_init();
  if(s>0x7fffffff) return 0;
  size_t *p = real_malloc(s+sizeof(s));
  if(!p) return p;
  *p++ = s;
  void *result = p;
  VALGRIND_MEMPOOL_ALLOC(&marker, result, s);
  return result;
}
__LIBBOOTIMAGE_PUBLIC_API__ struct bootimage* bootimage_initialize(){

	/* Allocate and zero memory to store the bootimage struct
	   This will contain metadata for a loaded bootimage
	 */
	trace_init();
	struct bootimage* bi = calloc(1,sizeof(struct bootimage));
	D("bi=%p",bi);
	return bi ;

}
示例#17
0
void *realloc(void* a, size_t news) {
  if(!marker) trace_init();
  if(!a) return malloc(news);
  size_t *x = a;
  --x;
  size_t olds = *x;
  void *result = malloc(news);
  if(!result) return result;
  memcpy(result, a, olds);
  free(a);
  return result;
}
示例#18
0
/********************
 * dres_open
 ********************/
EXPORTED dres_t *
dres_open(char *file)
{
    struct stat st;
    char        path[PATH_MAX], *suffix;
    dres_t     *dres;
    size_t      len;
    int         cid;

    trace_init();
    cid = TRACE_DEFAULT_CONTEXT;
    trace_add_module(cid, &trcdres);

    
    /*
     * try to load the given file if it is found and a regular file
     */

    if (stat(file, &st) == 0 && S_ISREG(st.st_mode)) {
        if ((dres = dres_load(file)) != NULL ||
            (dres = dres_parse_file(file)) != NULL)
            return dres;

        return NULL;
    }


    /*
     * otherwise try to load it with binary and plain suffices
     */

    if ((len = strlen(file)) >= sizeof(path) ||
        len + sizeof(DRES_SUFFIX_BINARY) > sizeof(path) ||
        len + sizeof(DRES_SUFFIX_PLAIN) > sizeof(path)) {
        errno = EOVERFLOW;
        return NULL;
    }
    
    strcpy(path, file);
    suffix = path + len;
    *suffix++ = '.';
    
    strcpy(suffix, DRES_SUFFIX_BINARY);
    if ((dres = dres_load(path)) != NULL)
        return dres;
    
    strcpy(suffix, DRES_SUFFIX_PLAIN);
    return dres_parse_file(path);
}
示例#19
0
文件: main.c 项目: dbittman/seakernel
/* This is the C kernel entry point */
void kmain(struct multiboot *mboot_header, addr_t initial_stack)
{
	/* Store passed values, and initiate some early things
	 * We want serial log output as early as possible */
	kernel_state_flags=0;
	mtboot = mboot_header;
	initial_boot_stack = initial_stack;
	loader_parse_kernel_elf(mboot_header, &kernel_sections);
#if CONFIG_MODULES
	loader_init_kernel_symbols();
#endif
	serial_init();
	cpu_early_init();
#if CONFIG_MODULES
	loader_init_modules();
#endif
	syscall_init();
	fs_initrd_load(mtboot);
	cpu_timer_install(1000);
	cpu_processor_init_1();

	/* Now get the management stuff going */
	printk(1, "[kernel]: Starting system management\n");
	mm_init(mtboot);
	syslog_init();
	parse_kernel_command_line((char *)(addr_t)mtboot->cmdline);
	tm_init_multitasking();
	dm_init();
	fs_init();
	net_init();
	trace_init();
	/* Load the rest... */
	printk(KERN_MILE, "[kernel]: Kernel is setup (kv=%d, bpl=%d: ok)\n", 
	       CONFIG_VERSION_NUMBER, BITS_PER_LONG);
	printk(KERN_DEBUG, "[kernel]: structure sizes: process=%d bytes, thread=%d bytes, inode=%d bytes\n",
			sizeof(struct process), sizeof(struct thread), sizeof(struct inode));
	cpu_interrupt_set(1);
	sys_setup();
	cpu_processor_init_2();
	timer_calibrate();
#if CONFIG_SMP
	if(boot_cpus)
		cpu_boot_all_aps();
#endif
	tm_clone(0, __init_entry, 0);
	sys_setsid();
	kt_kernel_idle_task();
}
示例#20
0
int
main (int argc, char **argv)
{
  struct addrinfo hints, *res;
  trace_t trace;

  set_program_name (argv[0]);

  /* Parse command line */
  iu_argp_init ("traceroute", program_authors);
  argp_parse (&argp, argc, argv, 0, NULL, NULL);

  /* Hostname lookup first for better information */
  memset (&hints, 0, sizeof (hints));
  hints.ai_family = AF_INET;
  hints.ai_flags = AI_CANONNAME;

  if ((hostname == NULL) || (*hostname == '\0')
      || getaddrinfo (hostname, NULL, &hints, &res))
    error (EXIT_FAILURE, 0, "unknown host");

  memcpy (&dest, res->ai_addr, res->ai_addrlen);
  dest.sin_port = htons (opt_port);

  getnameinfo (res->ai_addr, res->ai_addrlen, addrstr, sizeof (addrstr),
                  NULL, 0, NI_NUMERICHOST);

  printf ("traceroute to %s (%s), %d hops max\n",
	  res->ai_canonname, addrstr, opt_max_hops);

  freeaddrinfo (res);

  trace_init (&trace, dest, opt_type);

  int hop = 1;
  while (!stop)
    {
      if (hop > opt_max_hops)
	exit (EXIT_SUCCESS);
      do_try (&trace, hop, opt_max_hops, opt_max_tries);
      trace_inc_ttl (&trace);
      trace_inc_port (&trace);
      hop++;
    }

  exit (EXIT_SUCCESS);
}
示例#21
0
int am335_init(void)
{
	int i;

	/*
	 * Each interrupt has a priority register associated with it
	 * 8 bits... only 7:6:5:4 are available for SA
	 * out of the 16 levels here... using a priority grouping
	 * these 4 bits can be further split into preempt priority
	 * and subpriority fields
	 */
	scr_enable_sleepdeep();
	scr_enable_sleeponexit();

	/* Disable all the external interrupts */
	for (i=0; i < CM3_NUM_EXT_INTERRUPTS; i++)
		nvic_disable_irq(i);

	/* Disable Tamper swakeup, a new addition for AM43XX SOCs */
	if (soc_id == AM43XX_SOC_ID)
		nvic_disable_irq(CM3_IRQ_TPM_WAKE);

	/* Clean the IPC registers */
	m3_param_reset();

	trace_init();

	pm_reset();

	setup_soc();

	/* Enable only the MBX IRQ */
	nvic_enable_irq(CM3_IRQ_MBINT0);
	nvic_enable_irq(53);

	m3_firmware_version();

	/* TODO: Enable PRCM_INT2 with a dummy handler */

	/* Notify A8 of init completion */
	a8_notify(CMD_STAT_PASS);

	/* Ok we are done here */

	return 0;
}
void app_start(int, char **)
{
	char if_desciption[] = "6LoWPAN_NODE";

	pc.baud(115200);  //Setting the Baud-Rate for trace output
    ns_dyn_mem_init(app_stack_heap, APP_DEFINED_HEAP_SIZE, app_heap_error_handler,0);
    randLIB_seed_random();
    platform_timer_enable();
    eventOS_scheduler_init();
    trace_init();
	set_trace_print_function( trace_printer );
    set_trace_config(TRACE_ACTIVE_LEVEL_DEBUG|TRACE_CARRIAGE_RETURN);
	tr_debug("M \r\n");
    net_init_core();
    rf_phy_device_register_id = rf_device_register();
    net_rf_id = arm_nwk_interface_init(NET_INTERFACE_RF_6LOWPAN, rf_phy_device_register_id, if_desciption);
    eventOS_event_handler_create(&tasklet_main, ARM_LIB_TASKLET_INIT_EVENT);
}
示例#23
0
void maintenanceTask() {

	System_printf("Main Task started\n");
	System_flush();

#if 1
	/* Enable radio SPI */
	spicom_init();
#endif

#if 1
	/* Enable Trace */
	trace_init();
#endif

#if 1
	/* Digital io */
	dio_init();
#endif


#ifdef RN_ACTIVE
	rn_sysbios_init();
	rn_module_init();
	ap_sim_module_init();

	//rn_sysbios_start();

	rn_module_start(1);
	ap_sim_module_start(1);

	rn_module_start(2);
	ap_sim_module_start(2);

#endif

	maintenanceMainTask();
	/* Should never return but in case*/
	Task_exit();
}
示例#24
0
文件: main.c 项目: theepot/esc64
int main (void)
{
	//set clock divider at 1x
	CLKPR = 0x80;
	CLKPR = 0x00;

	led_init();
	trace_init();
	usart_init();
	InitPins();
	stdout = &uart_output;
	stderr = &uart_output;
	
	puts("begin");

	test();

	puts("end");

	for(;;);
	return 0;
}
示例#25
0
int main()
{
	char     *env_input;
	char     path[4096];

	/* CGI initial print */
	printf("Content-type: text/html\n\n");

	/* create trace/log */
	trace_init("list.log");

	/* get GET content from environment variable */ 
	env_input = getenv("QUERY_STRING");
	if (env_input == NULL) {
		trace(WEB, "No env variable: QUERY_STRING.\n", NULL);
		goto exit;
	}

	/* extract page number from GET content */
	sscanf(env_input, "p=%s", path);

	/* echo HTML */
	cat("head.cat");
	printf("file: %s <br/>", basename(path));
	printf("[dmath]%s[/dmath]", first_line(path));
	cat("neck.cat");

	echo_tex_li(path);
	
	cat("ass.cat");
	cat("tail.cat");

exit:
	/* delete trace/log */
	trace_unfree();
	trace_uninit();
	return 0;
}
示例#26
0
void mesh_system_init(void)
{
    if (mesh_initialized == false) {
#ifndef YOTTA_CFG
        ns_hal_init(app_stack_heap, MBED_MESH_API_HEAP_SIZE,
                    mesh_system_heap_error_handler, NULL);
        eventOS_scheduler_mutex_wait();
        net_init_core();
        eventOS_scheduler_mutex_release();
#else
        ns_dyn_mem_init(app_stack_heap, MBED_MESH_API_HEAP_SIZE,
                        mesh_system_heap_error_handler, NULL);
        randLIB_seed_random();
        platform_timer_enable();
        eventOS_scheduler_init();
        trace_init(); // trace system needs to be initialized right after eventOS_scheduler_init
        net_init_core();
        /* initialize 6LoWPAN socket adaptation layer */
        ns_sal_init_stack();
#endif
        mesh_initialized = true;
    }
}
示例#27
0
/* Function	: trace_write
 * Description	: TAU trace output writer
 */
int trace_write(t_ev**		buf,
		t_ev** 		cur, 
		x_uint64*	last_ti, 
		x_int32 	ev,
		x_uint16        nid,   /* -- node id         -- */
		x_uint16        tid,   /* -- thread id       -- */
		x_int64         par,   /* -- event parameter -- */
		x_uint64        ti,    /* -- time [us]       -- */
		char 		*output_path)
{
	static int init_done =0;
	int retval = 0;
	
	/* Check if the buffer is already initialized */
	if(*buf == NULL && *cur== NULL){
		if((retval = trace_init(buf,cur,ev,nid,tid,par,ti))){
			return retval;
		}
	}else{
		/* 
		 * cur is pointing to the last trace event.
		 * We have to advance cur to the next available 
		 * space and check for out-of-bound.
		 */
		if( (*cur)-(*buf) >= MAX_NUM_TRACE-3 ){
			trace_dump_cont(buf, cur, output_path);
		}else{
			/* Advance the current pointer */
			*cur = (*cur)+1;
		}	
		/* Write the trace event */
		__trace_write(cur,ev,nid,tid,par,ti);
	}
	*last_ti = ti;	
	//printf("DEBUG: trace_write: ev= %d, retval = %d\n", ev,retval);
	return retval;
}
示例#28
0
/**
    Purpose
    -------
    SPOTRF computes the Cholesky factorization of a real symmetric
    positive definite matrix dA.

    The factorization has the form
       dA = U**T * U,   if UPLO = MagmaUpper, or
       dA = L  * L**T,  if UPLO = MagmaLower,
    where U is an upper triangular matrix and L is lower triangular.

    This is the block version of the algorithm, calling Level 3 BLAS.

    Arguments
    ---------
    @param[in]
    uplo    magma_uplo_t
      -     = MagmaUpper:  Upper triangle of dA is stored;
      -     = MagmaLower:  Lower triangle of dA is stored.

    @param[in]
    n       INTEGER
            The order of the matrix dA.  N >= 0.

    @param[in,out]
    dA      REAL array on the GPU, dimension (LDDA,N)
            On entry, the symmetric matrix dA.  If UPLO = MagmaUpper, the leading
            N-by-N upper triangular part of dA contains the upper
            triangular part of the matrix dA, and the strictly lower
            triangular part of dA is not referenced.  If UPLO = MagmaLower, the
            leading N-by-N lower triangular part of dA contains the lower
            triangular part of the matrix dA, and the strictly upper
            triangular part of dA is not referenced.
    \n
            On exit, if INFO = 0, the factor U or L from the Cholesky
            factorization dA = U**T * U or dA = L * L**T.

    @param[in]
    ldda     INTEGER
            The leading dimension of the array dA.  LDDA >= max(1,N).
            To benefit from coalescent memory accesses LDDA must be
            divisible by 16.

    @param[out]
    info    INTEGER
      -     = 0:  successful exit
      -     < 0:  if INFO = -i, the i-th argument had an illegal value
      -     > 0:  if INFO = i, the leading minor of order i is not
                  positive definite, and the factorization could not be
                  completed.

    @ingroup magma_sposv_comp
    ********************************************************************/
extern "C" magma_int_t
magma_spotrf2_mgpu(int num_gpus, magma_uplo_t uplo, magma_int_t m, magma_int_t n,
                   magma_int_t off_i, magma_int_t off_j, magma_int_t nb,
                   float **d_lA,  magma_int_t ldda,
                   float **d_lP,  magma_int_t lddp,
                   float *A,      magma_int_t lda,   magma_int_t h,
                   magma_queue_t stream[][3], magma_event_t event[][5],
                   magma_int_t *info )
{
#define Alo(i, j)  (A +             ((j)+off_j)*lda  + (nb*(((i)/nb)%h)+off_i))
#define Aup(i, j)  (A + (nb*(((j)/nb)%h)+off_j)*lda  +               (i+off_i))

#define  dlA(id, i, j)    (d_lA[(id)] + (j)*ldda + (i))
#define  dlP(id, i, j, k) (d_lP[(id)] + (k)*nb*lddp + (j)*lddp + (i))
#define dlPT(id, i, j, k) (d_lP[(id)] + (k)*nb*lddp + (j)*nb   + (i))

    magma_int_t     j, jb, nb0, nb2, dd, d, id, j_local, j_local2, buf;
    float c_one     = MAGMA_S_ONE;
    float c_neg_one = MAGMA_S_NEG_ONE;
    float          d_one     =  1.0;
    float          d_neg_one = -1.0;
    int upper = (uplo == MagmaUpper);
    float *dlpanel;
    //magma_event_t event0[MagmaMaxGPUs], // syrk
    //            event1[MagmaMaxGPUs], // send off-diagonal
    //            event2[MagmaMaxGPUs], // send diagonal
    //            event3[MagmaMaxGPUs]; // trsm
    magma_int_t n_local[MagmaMaxGPUs], ldpanel;
    int stream0 = 0, stream1 = 1;
    #ifdef STRSM_WORK
    float *d_dinvA[MagmaMaxGPUs][2], *d_x[MagmaMaxGPUs][2]; /* used by strsm_work */
    #endif
    
    *info = 0;
    if (! upper && uplo != MagmaLower) {
        *info = -1;
    } else if (n < 0) {
        *info = -2;
    } else if (!upper && num_gpus*ldda < max(1,n)) {
        *info = -4;
    } else if (upper && ldda < max(1,m)) {
        *info = -4;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    for( d=0; d < num_gpus; d++ ) {
        /* local-n and local-ld */
        if (upper) {
            n_local[d] = ((n/nb)/num_gpus)*nb;
            if (d < (n/nb)%num_gpus)
                n_local[d] += nb;
            else if (d == (n/nb)%num_gpus)
                n_local[d] += n%nb;
        } else {
            n_local[d] = ((m/nb)/num_gpus)*nb;
            if (d < (m/nb)%num_gpus)
                n_local[d] += nb;
            else if (d == (m/nb)%num_gpus)
                n_local[d] += m%nb;
        }
        //magma_setdevice(d);
        //magma_event_create( &event0[d] );
        //magma_event_create( &event1[d] );
        //magma_event_create( &event2[d] );
        //magma_event_create( &event3[d] );
    }
    magma_setdevice(0);

    /* == initialize the trace */
    trace_init( 1, num_gpus, 3, (magma_queue_t*)stream );

    /* Use blocked code. */
    if (upper) {
        /* ---------------------------------------------- */
        /* Upper-triangular case                          */
        /* > Compute the Cholesky factorization A = U'*U. */
        /* ---------------------------------------------- */
        
#if defined(PRECISION_d) && defined(STRSM_WORK)
        /* invert the diagonals
         * Allocate device memory for the inversed diagonal blocks, size=m*NB
         */
        for( d=0; d < num_gpus; d++ ) {
            magma_setdevice(d);
            for( j=0; j < 2; j++ ) {
                magma_smalloc( &d_dinvA[d][j], nb*nb );
                magma_smalloc( &d_x[d][j],      n*nb );
                cudaMemset(d_dinvA[d][j], 0, nb*nb*sizeof(float));
                cudaMemset(d_x[d][j],     0,  n*nb*sizeof(float));
            }
        }
        magma_setdevice(0);
#endif
        
        for (j=0; j < m; j += nb) {
            /* Set the GPU number that holds the current panel */
            id  = (j/nb)%num_gpus;
            buf = (j/nb)%num_gpus;
            
            /* Set the local index where the current panel is */
            j_local = j/(nb*num_gpus);
            jb = min(nb, (m-j));
            
            if ( j > 0 ) {
                /* needed on pluto... */
                magma_setdevice(id);
                magma_queue_sync( stream[id][stream0] ); // wait for the column on CPU

                /* broadcast off-diagonal column to all gpus */
                d = (j/nb+1)%num_gpus;
                for( dd=0; dd < num_gpus; dd++ ) {
                    if ( d != id ) {
                        magma_setdevice(d);
                
                        /* wait for it on CPU */
                        magma_queue_wait_event( stream[d][stream0], event[id][1] );
                
                        /* send it to GPU */
                        trace_gpu_start( d, stream0, "comm", "rows to GPUs" );
                        magma_ssetmatrix_async( j, jb,
                                                Aup(0,j),        lda,
                                                dlP(d,jb,0,buf), lddp,
                                                stream[d][stream0] );
                        trace_gpu_end( d, stream0 );
                        magma_event_record( event[d][1], stream[d][stream0] );
                    }
                    d = (d+1)%num_gpus;
                }
            }
            
            /* Update the current diagonal block */
            magma_setdevice(id);
            if ( j > 0 ) {
                magmablasSetKernelStream(stream[id][stream1]);
                trace_gpu_start( id, stream1, "syrk", "syrk" );
                magma_ssyrk(MagmaUpper, MagmaTrans, jb, j,
                            d_neg_one, dlA(id, 0, nb*j_local), ldda,
                            d_one,     dlA(id, j, nb*j_local), ldda);
                trace_gpu_end( id, stream1 );
                magma_event_record( event[id][0], stream[id][stream1] );
            }

            /* send the diagonal to cpu */
            magma_queue_wait_event( stream[id][stream0], event[id][0] ); // wait for syrk
            trace_gpu_start( id, stream0, "comm", "D to CPU" );
            magma_sgetmatrix_async( jb, jb,
                                    dlA(id, j, nb*j_local), ldda,
                                    Aup(j,j),               lda,
                                    stream[id][stream0] );
            trace_gpu_end( id, stream0 );

            if ( j > 0 ) {
                /* Compute the local block column of the panel. */
                d = (j/nb+1)%num_gpus;
                for( dd=0; dd < num_gpus; dd++ ) {
                    j_local2 = j_local+1;
                    if ( d > id ) j_local2 --;
                    nb0 = nb*j_local2;
                
                    if ( n_local[d] > nb0 ) {
                        /* wait for the off-diagonal */
                        if ( d != id ) {
                            //magma_queue_sync( stream[id][3] );
                            dlpanel = dlP(d, jb, 0, buf);
                            ldpanel = lddp;
                
                            /* wait for the offdiagonal column */
                            magma_queue_wait_event( stream[d][stream1], event[d][1] );
                        } else {
                            dlpanel = dlA(d, 0, nb*j_local);
                            ldpanel = ldda;
                        }
                        
                        /* update the panel */
                        magma_setdevice(d);
                        magmablasSetKernelStream(stream[d][stream1]);
                        trace_gpu_start( d, stream1, "gemm", "gemm" );
                        magma_sgemm(MagmaTrans, MagmaNoTrans,
                                    jb, n_local[d]-nb0, j,
                                    c_neg_one, dlpanel,        ldpanel,
                                               dlA(d, 0, nb0), ldda,
                                    c_one,     dlA(d, j, nb0), ldda);
                        trace_gpu_end( d, stream1 );
                    }
                    d = (d+1)%num_gpus;
                }
            }
            
            /* factor the diagonal */
            magma_setdevice(id);
            magma_queue_sync( stream[id][stream0] ); // wait for the diagonal
            trace_cpu_start( 0, "getrf", "getrf" );
            lapackf77_spotrf(MagmaUpperStr, &jb, Aup(j,j), &lda, info);
            trace_cpu_end( 0 );
            if (*info != 0) {
                *info = *info + j;
                break;
            }

            /* send the diagonal to gpus */
            if ( (j+jb) < n) {
                d = (j/nb+1)%num_gpus;
                for( dd=0; dd < num_gpus; dd++ ) {
                    magma_setdevice(d);
                    if ( d == id ) {
                        dlpanel = dlA(d, j, nb*j_local);
                        ldpanel = ldda;
                    } else {
                        dlpanel = dlP(d, 0, 0, buf);
                        ldpanel = lddp;
                    }
                    
                    trace_gpu_start( d, stream0, "comm", "D to GPUs" );
                    magma_ssetmatrix_async( jb, jb,
                                            Aup(j,j), lda,
                                            dlpanel,  ldpanel,
                                            stream[d][stream0] );
                    trace_gpu_end( d, stream0 );
                    magma_event_record( event[d][2], stream[d][stream0] );
                    d = (d+1)%num_gpus;
                }
            } else {
                magma_setdevice(id);
                trace_gpu_start( id, stream0, "comm", "D to GPUs" );
                magma_ssetmatrix_async( jb, jb,
                                        Aup(j,j),               lda,
                                        dlA(id, j, nb*j_local), ldda,
                                        stream[id][stream0] );
                trace_gpu_end( id, stream0 );
            }
            
            /* panel-factorize the off-diagonal */
            if ( (j+jb) < n) {
                d = (j/nb+1)%num_gpus;
                for( dd=0; dd < num_gpus; dd++ ) {
                    /* next column */
                    j_local2 = j_local+1;
                    if ( d > id ) j_local2--;
                    if ( d == id ) {
                        dlpanel = dlA(d, j, nb*j_local);
                        ldpanel = ldda;
                    } else {
                        dlpanel = dlP(d, 0, 0, buf);
                        ldpanel = lddp;
                    }
                    nb2 = n_local[d]-nb*j_local2;
                    nb0 = min(nb, nb2 );
                    
                    magma_setdevice(d);
                    magmablasSetKernelStream(stream[d][stream1]);
                    magma_queue_wait_event( stream[d][stream1], event[d][2] ); // wait for the diagonal
                    if ( j+jb < m && d == (j/nb+1)%num_gpus ) {
                        /* owns the next column, look-ahead the column */
                        trace_gpu_start( d, stream1, "trsm", "trsm" );
#if defined(PRECISION_d) && defined(STRSM_WORK)
                        magmablas_strsm_work( MagmaLeft, MagmaUpper, MagmaTrans, MagmaNonUnit,
                                              jb, nb0, c_one,
                                              dlpanel,                ldpanel,
                                              dlA(d, j, nb*j_local2), ldda,
                                              d_dinvA[d][0], d_x[d][0] );
                        /*nb2 = n_local[d] - j_local2*nb;
                        magmablas_strsm_work( MagmaLeft, MagmaUpper, MagmaTrans, MagmaNonUnit,
                                              jb, nb2, c_one,
                                              dlpanel,                ldpanel,
                                              dlA(d, j, nb*j_local2), ldda,
                                              d_dinvA[d], d_x[d] ); */
#else
                        /*nb2 = n_local[d] - j_local2*nb;
                        magma_strsm( MagmaLeft, MagmaUpper, MagmaTrans, MagmaNonUnit,
                                     jb, nb2, c_one,
                                     dlpanel,                ldda,
                                     dlA(d, j, nb*j_local2), ldda);
                        */
                        magma_strsm( MagmaLeft, MagmaUpper, MagmaTrans, MagmaNonUnit,
                                     jb, nb0, c_one,
                                     dlpanel,                ldpanel,
                                     dlA(d, j, nb*j_local2), ldda);
#endif
                        trace_gpu_end( d, stream1 );
                        magma_event_record( event[d][3], stream[d][stream1] );
                        
                        /* send the column to cpu */
                        if ( j+jb < m ) {
                            trace_gpu_start( d, stream0, "comm", "rows to CPU" );
                            magma_queue_wait_event( stream[d][stream0], event[d][3] ); // wait for lookahead
                            magma_sgetmatrix_async( (j+jb), nb0,
                                                    dlA(d, 0, nb*j_local2), ldda,
                                                    Aup(0,j+jb),            lda,
                                                    stream[d][stream0] );
                            trace_gpu_end( d, stream0 );
                            magma_event_record( event[d][1], stream[d][stream0] );
                        }
                        
                        /* update the remaining blocks */
                        nb2 = nb2 - nb0;
#if defined(PRECISION_d) && defined(STRSM_WORK)
                        magmablas_strsm_work( MagmaLeft, MagmaUpper, MagmaTrans, MagmaNonUnit,
                                              jb, nb2, c_one,
                                              dlpanel,                    ldpanel,
                                              dlA(d, j, nb*j_local2+nb0), ldda,
                                              d_dinvA[d][1], d_x[d][1] );
#else
                        magma_strsm( MagmaLeft, MagmaUpper, MagmaTrans, MagmaNonUnit,
                                     jb, nb2, c_one,
                                     dlpanel,                    ldpanel,
                                     dlA(d, j, nb*j_local2+nb0), ldda);
#endif
                    } else if ( nb2 > 0 ) {
                        /* update the entire trailing matrix */
                        trace_gpu_start( d, stream1, "trsm", "trsm" );
#if defined(PRECISION_d) && defined(STRSM_WORK)
                        magmablas_strsm_work( MagmaLeft, MagmaUpper, MagmaTrans, MagmaNonUnit,
                                              jb, nb2, c_one,
                                              dlpanel,                ldpanel,
                                              dlA(d, j, nb*j_local2), ldda,
                                    d_dinvA[d][1], d_x[d][1] );
#else
                        magma_strsm( MagmaLeft, MagmaUpper, MagmaTrans, MagmaNonUnit,
                                     jb, nb2, c_one,
                                     dlpanel,                ldpanel,
                                     dlA(d, j, nb*j_local2), ldda);
#endif
                        trace_gpu_end( d, stream1 );
                    }
                    d = (d+1)%num_gpus;
                }
            } /* end of strsm */
        } /* end of for j=1, .., n */
    } else {
        /* -------------------------------------------- */
        /* Lower-triangular case                        */
        /* Compute the Cholesky factorization A = L*L'. */
        /* -------------------------------------------- */
#if defined(PRECISION_d) && defined(STRSM_WORK)
        /*
         * Allocate device memory for the inversed diagonal blocks, size=N*BLOCK_SIZE
         */
        for( d=0; d < num_gpus; d++ ) {
            magma_setdevice(d);
            for( j=0; j < 2; j++ ) {
                magma_smalloc( &d_dinvA[d][j], nb*nb );
                magma_smalloc( &d_x[d][j],     nb*m  );
                cudaMemset(d_dinvA[d][j], 0, nb*nb*sizeof(float));
                cudaMemset(d_x[d][j],     0, nb* m*sizeof(float));
            }
        }
        magma_setdevice(0);
#endif

        for (j=0; j < n; j += nb) {
            /* Set the GPU number that holds the current panel */
            id  = (j/nb)%num_gpus;
            buf = (j/nb)%num_gpus;
            
            /* Set the local index where the current panel is */
            j_local = j/(nb*num_gpus);
            jb = min(nb, (n-j));
            
            if ( j > 0 ) {
                /* needed on pluto... */
                magma_setdevice(id);
                magma_queue_sync( stream[id][stream0] ); // wait for the column on CPU

                /* broadcast offdiagonal row to all gpus */
                d = (j/nb+1)%num_gpus;
                for( dd=0; dd < num_gpus; dd++ ) {
                    if ( d != id ) {
                        magma_setdevice(d);
                        /* wait for it on CPU */
                        magma_queue_wait_event( stream[d][stream0], event[id][1] );
            
                        /* send it to GPU */
                        magma_ssetmatrix_async( jb, j,
                                                Alo(j,0),         lda,
                                                dlPT(d,0,jb,buf), nb,
                                                stream[d][stream0] );
                        magma_event_record( event[d][1], stream[d][stream0] );
                    }
                    d = (d+1)%num_gpus;
                }
            }

            /* Update the current diagonal block */
            magma_setdevice(id);
            if ( j > 0 ) {
                magmablasSetKernelStream(stream[id][stream1]);
                magma_ssyrk(MagmaLower, MagmaNoTrans, jb, j,
                            d_neg_one, dlA(id, nb*j_local, 0), ldda,
                            d_one,     dlA(id, nb*j_local, j), ldda);
                magma_event_record( event[id][0], stream[id][stream1] );
            }
            
            /* send the diagonal to cpu */
            magma_queue_wait_event( stream[id][stream0], event[id][0] ); // wait for syrk
            magma_sgetmatrix_async( jb, jb,
                                    dlA(id, nb*j_local, j), ldda,
                                    Alo(j,j),               lda,
                                    stream[id][stream0] );

            /* update the offdiagonal blocks */
            if ( j > 0 ) {
                /* compute the block-rows of the panel */
                d = (j/nb+1)%num_gpus;
                for( dd=0; dd < num_gpus; dd++ ) {
                    j_local2 = j_local+1;
                    if ( d > id ) j_local2 --;
                    nb0 = nb*j_local2;
            
                    if ( nb0 < n_local[d] ) {
                        if ( d != id ) {
                            dlpanel = dlPT(d, 0, jb, buf);
                            ldpanel = nb;
            
                            /* wait for offdiagonal row */
                            magma_queue_wait_event( stream[d][stream1], event[d][1] );
                        } else {
                            dlpanel = dlA(d, nb*j_local, 0);
                            ldpanel = ldda;
                        }
            
                        magma_setdevice(d);
                        magmablasSetKernelStream(stream[d][stream1]);
                        magma_sgemm( MagmaNoTrans, MagmaTrans,
                                     n_local[d]-nb0, jb, j,
                                     c_neg_one, dlA(d, nb0, 0), ldda,
                                                dlpanel,        ldpanel,
                                     c_one,     dlA(d, nb0, j), ldda);
                    }
                    d = (d+1)%num_gpus;
                }
            }

            /* factor the diagonal */
            magma_setdevice(id);
            magma_queue_sync( stream[id][stream0] );
            lapackf77_spotrf(MagmaLowerStr, &jb, Alo(j,j), &lda, info);
            if (*info != 0) {
                *info = *info + j;
                break;
            }

            /* send the diagonal to gpus */
            if ( (j+jb) < m ) {
                d = (j/nb+1)%num_gpus;
                for( dd=0; dd < num_gpus; dd++ ) {
                    magma_setdevice(d);
                    if ( d == id ) {
                        dlpanel = dlA(d, nb*j_local, j);
                        ldpanel = ldda;
                    } else {
                        dlpanel = dlPT(d, 0, 0, buf);
                        ldpanel = nb;
                    }
                    magma_ssetmatrix_async( jb, jb,
                                            Alo(j,j), lda,
                                            dlpanel,  ldpanel,
                                            stream[d][stream0] );
                    magma_event_record( event[d][2], stream[d][stream0] );
                    d = (d+1)%num_gpus;
                }
            } else {
                magma_setdevice(id);
                magma_ssetmatrix_async( jb, jb,
                                        Alo(j,j),               lda,
                                        dlA(id, nb*j_local, j), ldda,
                                        stream[id][stream0] );
            }

            /* factorize off-diagonal blocks */
            if ( (j+jb) < m ) {
                d = (j/nb+1)%num_gpus;
                for( dd=0; dd < num_gpus; dd++ ) {
                    /* next column */
                    j_local2 = j_local+1;
                    if ( d > id ) j_local2--;
                    if ( d == id ) {
                        dlpanel = dlA(d, nb*j_local, j);
                        ldpanel = ldda;
                    } else {
                        dlpanel = dlPT(d, 0, 0, buf);
                        ldpanel = nb;
                    }
                    nb2 = n_local[d] - j_local2*nb;
                    nb0 = min(nb, nb2 );
            
                    magma_setdevice(d);
                    magmablasSetKernelStream(stream[d][stream1]);
                    magma_queue_wait_event( stream[d][stream1], event[d][2] ); // wait for the diagonal
                    if ( j+jb < n && d == (j/nb+1)%num_gpus ) {
                        /* owns the next column, look-ahead the column */
#if defined(PRECISION_d) && defined(STRSM_WORK)
                        magmablas_strsm_work( MagmaRight, MagmaLower, MagmaTrans, MagmaNonUnit,
                                              nb0, jb, c_one,
                                              dlpanel,                ldpanel,
                                              dlA(d, nb*j_local2, j), ldda,
                                              d_dinvA[d][0], d_x[d][0]);
#else
                        magma_strsm( MagmaRight, MagmaLower, MagmaTrans, MagmaNonUnit,
                                     nb0, jb, c_one,
                                     dlpanel,                ldpanel,
                                     dlA(d, nb*j_local2, j), ldda);
#endif
                        magma_event_record( event[d][3], stream[d][stream1] );

                        /* send the column to cpu */
                        if ( j+jb < n ) {
                            magma_queue_wait_event( stream[d][stream0], event[d][3] ); // wait for lookahead
                            magma_sgetmatrix_async( nb0, j+jb,
                                                    dlA(d, nb*j_local2, 0), ldda,
                                                    Alo(j+jb,0),            lda,
                                                    stream[d][stream0] );
                            magma_event_record( event[d][1], stream[d][stream0] );
                        }

                        /* update the remaining blocks */
                        nb2 = nb2 - nb0;
#if defined(PRECISION_d) && defined(STRSM_WORK)
                        magmablas_strsm_work( MagmaRight, MagmaLower, MagmaTrans, MagmaNonUnit,
                                              nb2, jb, c_one,
                                              dlpanel,                    ldpanel,
                                              dlA(d, nb*j_local2+nb0, j), ldda,
                                              d_dinvA[d][1], d_x[d][1] );
#else
                        magma_strsm( MagmaRight, MagmaLower, MagmaTrans, MagmaNonUnit,
                                     nb2, jb, c_one,
                                     dlpanel,                    ldpanel,
                                     dlA(d, nb*j_local2+nb0, j), ldda);
#endif
                    } else if ( nb2 > 0 ) {
                        /* update the entire trailing matrix */
#if defined(PRECISION_d) && defined(STRSM_WORK)
                        magmablas_strsm_work( MagmaRight, MagmaLower, MagmaTrans, MagmaNonUnit,
                                              nb2, jb, c_one,
                                              dlpanel,                ldpanel,
                                              dlA(d, nb*j_local2, j), ldda,
                                              d_dinvA[d][1], d_x[d][1] );
#else
                        magma_strsm( MagmaRight, MagmaLower, MagmaTrans, MagmaNonUnit,
                                     nb2, jb, c_one,
                                     dlpanel,                ldpanel,
                                     dlA(d, nb*j_local2, j), ldda);
#endif
                    }
                    d = (d+1)%num_gpus;
                }
            }
        }
    } /* end of else not upper */

    /* == finalize the trace == */
    trace_finalize( "spotrf.svg", "trace.css" );

    /* clean up */
    for( d=0; d < num_gpus; d++ ) {
        magma_setdevice(d);
        magma_queue_sync( stream[d][0] );
        magma_queue_sync( stream[d][1] );
        magmablasSetKernelStream(NULL);

        //magma_event_destroy( event0[d] );
        //magma_event_destroy( event1[d] );
        //magma_event_destroy( event2[d] );
        //magma_event_destroy( event3[d] );
    }
    magma_setdevice(0);

    return *info;
} /* magma_spotrf_mgpu */
示例#29
0
/**
    Purpose   
    =======   

    SSYTRF_nopiv_gpu computes the LDLt factorization of a real symmetric   
    matrix A.

    The factorization has the form   
       A = U^H * D * U , if UPLO = 'U', or   
       A = L  * D * L^H, if UPLO = 'L',   
    where U is an upper triangular matrix, L is lower triangular, and
    D is a diagonal matrix.

    This is the block version of the algorithm, calling Level 3 BLAS.   

    Arguments
    ---------
    @param[in]
    UPLO    CHARACTER*1   
      -     = 'U':  Upper triangle of A is stored;   
      -     = 'L':  Lower triangle of A is stored.   

    @param[in]
    N       INTEGER   
            The order of the matrix A.  N >= 0.   

    @param[in,out]
    dA      REAL array on the GPU, dimension (LDA,N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
            N-by-N upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading N-by-N lower triangular part of A contains the lower   
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   
    \n
            On exit, if INFO = 0, the factor U or L from the Cholesky   
            factorization A = U^H D U or A = L D L^H.   
    \n 
            Higher performance is achieved if A is in pinned memory, e.g.
            allocated using cudaMallocHost.

    @param[in]
    LDA     INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    @param[out]
    INFO    INTEGER   
      -     = 0:  successful exit   
      -     < 0:  if INFO = -i, the i-th argument had an illegal value 
                  if INFO = -6, the GPU memory allocation failed 
      -     > 0:  if INFO = i, the leading minor of order i is not   
                  positive definite, and the factorization could not be   
                  completed.   
    
    @ingroup magma_ssytrf_comp
    ******************************************************************* */
extern "C" magma_int_t
magma_ssytrf_nopiv_gpu(
    magma_uplo_t uplo, magma_int_t n,
    magmaFloat_ptr dA, magma_int_t ldda,
    magma_int_t *info)
{
    #define  A(i, j)  (A)
    #define dA(i, j)  (dA +(j)*ldda + (i))
    #define dW(i, j)  (dW +(j)*ldda + (i))
    #define dWt(i, j) (dW +(j)*nb   + (i))

    /* Local variables */
    float zone  = MAGMA_S_ONE;
    float mzone = MAGMA_S_NEG_ONE;
    int                upper = (uplo == MagmaUpper);
    magma_int_t j, k, jb, nb, ib, iinfo;

    *info = 0;
    if (! upper && uplo != MagmaLower) {
      *info = -1;
    } else if (n < 0) {
      *info = -2;
    } else if (ldda < max(1,n)) {
      *info = -4;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return MAGMA_ERR_ILLEGAL_VALUE;
    }

    /* Quick return */
    if ( n == 0 )
      return MAGMA_SUCCESS;

    nb = magma_get_ssytrf_nopiv_nb(n);
    ib = min(32, nb); // inner-block for diagonal factorization

    magma_queue_t orig_stream;
    magmablasGetKernelStream( &orig_stream );


    magma_queue_t stream[2];
    magma_event_t event;
    magma_queue_create(&stream[0]);
    magma_queue_create(&stream[1]);
    magma_event_create( &event );
    trace_init( 1, 1, 2, stream );

    // CPU workspace
    float *A;
    if (MAGMA_SUCCESS != magma_smalloc_pinned( &A, nb*nb )) {
        *info = MAGMA_ERR_HOST_ALLOC;
        return *info;
    }

    // GPU workspace
    magmaFloat_ptr dW;
    if (MAGMA_SUCCESS != magma_smalloc( &dW, (1+nb)*ldda )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }

    /* Use hybrid blocked code. */
    if (upper) {
        //=========================================================
        // Compute the LDLt factorization A = U'*D*U without pivoting.
        // main loop
        for (j=0; j<n; j += nb) {
            jb = min(nb, (n-j));
            
            // copy A(j,j) back to CPU
            trace_gpu_start( 0, 0, "get", "get" );
            //magma_queue_wait_event( stream[1], event );                                                                
            magma_event_sync(event);
            magma_sgetmatrix_async(jb, jb, dA(j, j), ldda, A(j,j), nb, stream[1]);
            trace_gpu_end( 0, 0 );

            // factorize the diagonal block
            magma_queue_sync(stream[1]);
            trace_cpu_start( 0, "potrf", "potrf" );
            ssytrf_nopiv_cpu(MagmaUpper, jb, ib, A(j, j), nb, info);
            trace_cpu_end( 0 );
            if (*info != 0){
                *info = *info + j;
                break;
            }
            
            // copy A(j,j) back to GPU
            trace_gpu_start( 0, 0, "set", "set" );
            magma_ssetmatrix_async(jb, jb, A(j, j), nb, dA(j, j), ldda, stream[0]);
            trace_gpu_end( 0, 0 );
                
            if ( (j+jb) < n) {
                // compute the off-diagonal blocks of current block column
                magmablasSetKernelStream( stream[0] );
                trace_gpu_start( 0, 0, "trsm", "trsm" );
                magma_strsm(MagmaLeft, MagmaUpper, MagmaConjTrans, MagmaUnit, 
                            jb, (n-j-jb), 
                            zone, dA(j, j),    ldda, 
                            dA(j, j+jb), ldda);
                magma_scopymatrix( jb, n-j-jb, dA( j, j+jb ), ldda, dWt( 0, j+jb ), nb );
                
                // update the trailing submatrix with D
                magmablas_slascl_diag(MagmaUpper, jb, n-j-jb,
                                      dA(j,    j), ldda,
                                      dA(j, j+jb), ldda,
                                      &iinfo);
                trace_gpu_end( 0, 0 );
                
                // update the trailing submatrix with U and W
                trace_gpu_start( 0, 0, "gemm", "gemm" );
                for (k=j+jb; k<n; k+=nb) {
                    magma_int_t kb = min(nb,n-k);
                    magma_sgemm(MagmaConjTrans, MagmaNoTrans, kb, n-k, jb,
                                mzone, dWt(0, k), nb, 
                                       dA(j, k), ldda,
                                zone,  dA(k, k), ldda);
                    if (k==j+jb)
                        magma_event_record( event, stream[0] );
                }
                trace_gpu_end( 0, 0 );
            }
        }
    } else {
        //=========================================================
        // Compute the LDLt factorization A = L*D*L' without pivoting.
        // main loop
        for (j=0; j<n; j+=nb) {
            jb = min(nb, (n-j));
            
            // copy A(j,j) back to CPU
            trace_gpu_start( 0, 0, "get", "get" );
            //magma_queue_wait_event( stream[0], event );                                                                
            magma_event_sync(event);
            magma_sgetmatrix_async(jb, jb, dA(j, j), ldda, A(j,j), nb, stream[1]);
            trace_gpu_end( 0, 0 );
            
            // factorize the diagonal block
            magma_queue_sync(stream[1]);
            trace_cpu_start( 0, "potrf", "potrf" );
            ssytrf_nopiv_cpu(MagmaLower, jb, ib, A(j, j), nb, info);
            trace_cpu_end( 0 );
            if (*info != 0){
                *info = *info + j;
                break;
            }

            // copy A(j,j) back to GPU
            trace_gpu_start( 0, 0, "set", "set" );
            magma_ssetmatrix_async(jb, jb, A(j, j), nb, dA(j, j), ldda, stream[0]);
            trace_gpu_end( 0, 0 );
            
            if ( (j+jb) < n) {
                // compute the off-diagonal blocks of current block column
                magmablasSetKernelStream( stream[0] );
                trace_gpu_start( 0, 0, "trsm", "trsm" );
                magma_strsm(MagmaRight, MagmaLower, MagmaConjTrans, MagmaUnit, 
                            (n-j-jb), jb, 
                            zone, dA(j,    j), ldda, 
                            dA(j+jb, j), ldda);
                magma_scopymatrix( n-j-jb,jb, dA( j+jb, j ), ldda, dW( j+jb, 0 ), ldda );
                
                // update the trailing submatrix with D
                magmablas_slascl_diag(MagmaLower, n-j-jb, jb,
                                      dA(j,    j), ldda,
                                      dA(j+jb, j), ldda,
                                      &iinfo);
                trace_gpu_end( 0, 0 );
                
                // update the trailing submatrix with L and W
                trace_gpu_start( 0, 0, "gemm", "gemm" );
                for (k=j+jb; k<n; k+=nb) {
                    magma_int_t kb = min(nb,n-k);
                    magma_sgemm(MagmaNoTrans, MagmaConjTrans, n-k, kb, jb,
                                mzone, dA(k, j), ldda, 
                                       dW(k, 0), ldda,
                                zone,  dA(k, k), ldda);
                    if (k==j+jb)
                        magma_event_record( event, stream[0] );
                }
                trace_gpu_end( 0, 0 );
            }
        }
    }
    
    trace_finalize( "ssytrf.svg","trace.css" );
    magma_queue_destroy(stream[0]);
    magma_queue_destroy(stream[1]);
    magma_event_destroy( event );
    magma_free( dW );
    magma_free_pinned( A );
    
    magmablasSetKernelStream( orig_stream );
    return MAGMA_SUCCESS;
} /* magma_ssytrf_nopiv */
示例#30
0
/**
 * Showtime main
 */
int
main(int argc, char **argv)
{
    struct timeval tv;
    const char *settingspath = NULL;
    const char *uiargs[16];
    const char *argv0 = argc > 0 ? argv[0] : "showtime";
    const char *forceview = NULL;
    int nuiargs = 0;
    int can_standby = 0;
    int can_poweroff = 0;
    int r;

    trace_level = TRACE_INFO;

    gettimeofday(&tv, NULL);
    srand(tv.tv_usec);

    arch_set_default_paths(argc, argv);

    /* We read options ourselfs since getopt() is broken on some (nintento wii)
       targets */

    argv++;
    argc--;

    while(argc > 0) {
        if(!strcmp(argv[0], "-h") || !strcmp(argv[0], "--help")) {
            printf("HTS Showtime %s\n"
                   "Copyright (C) 2007-2010 Andreas Öman\n"
                   "\n"
                   "Usage: %s [options] [<url>]\n"
                   "\n"
                   "  Options:\n"
                   "   -h, --help        - This help text.\n"
                   "   -d                - Enable debug output.\n"
                   "   --ffmpeglog       - Print ffmpeg log messages.\n"
                   "   --with-standby    - Enable system standby.\n"
                   "   --with-poweroff   - Enable system power-off.\n"
                   "   -s <path>         - Non-default Showtime settings path.\n"
                   "   --ui <ui>         - Use specified user interface.\n"
                   "   -L <ip:host>      - Send log messages to remote <ip:host>.\n"
                   "   --syslog          - Send log messages to syslog.\n"
#if ENABLE_STDIN
                   "   --stdin           - Listen on stdin for events.\n"
#endif
                   "   -v <view>         - Use specific view for <url>.\n"
                   "   --cache <path>    - Set path for cache [%s].\n"
#if ENABLE_SERDEV
                   "   --serdev          - Probe service ports for devices.\n"
#endif
                   "\n"
                   "  URL is any URL-type supported by Showtime, "
                   "e.g., \"file:///...\"\n"
                   "\n",
                   htsversion_full,
                   argv0,
                   showtime_cache_path);
            exit(0);
            argc--;
            argv++;

        } else if(!strcmp(argv[0], "-d")) {
            trace_level++;
            argc -= 1;
            argv += 1;
            continue;
        } else if(!strcmp(argv[0], "--ffmpeglog")) {
            ffmpeglog = 1;
            argc -= 1;
            argv += 1;
            continue;
        } else if(!strcmp(argv[0], "--syslog")) {
            trace_to_syslog = 1;
            argc -= 1;
            argv += 1;
            continue;
        } else if(!strcmp(argv[0], "--stdin")) {
            listen_on_stdin = 1;
            argc -= 1;
            argv += 1;
            continue;
#if ENABLE_SERDEV
        } else if(!strcmp(argv[0], "--serdev")) {
            enable_serdev = 1;
            argc -= 1;
            argv += 1;
            continue;
#endif
        } else if(!strcmp(argv[0], "--with-standby")) {
            can_standby = 1;
            argc -= 1;
            argv += 1;
            continue;
        } else if(!strcmp(argv[0], "--with-poweroff")) {
            can_poweroff = 1;
            argc -= 1;
            argv += 1;
            continue;
        } else if(!strcmp(argv[0], "-s") && argc > 1) {
            settingspath = argv[1];
            argc -= 2;
            argv += 2;
            continue;
        } else if(!strcmp(argv[0], "--ui") && argc > 1) {
            if(nuiargs < 16)
                uiargs[nuiargs++] = argv[1];
            argc -= 2;
            argv += 2;
            continue;
        } else if(!strcmp(argv[0], "-L") && argc > 1) {
            showtime_logtarget = argv[1];
            argc -= 2;
            argv += 2;
            continue;
        } else if (!strcmp(argv[0], "-v") && argc > 1) {
            forceview = argv[1];
            argc -= 2;
            argv += 2;
        } else if (!strcmp(argv[0], "--cache") && argc > 1) {
            mystrset(&showtime_cache_path, argv[1]);
            argc -= 2;
            argv += 2;
#ifdef __APPLE__
            /* ignore -psn argument, process serial number */
        } else if(!strncmp(argv[0], "-psn", 4)) {
            argc -= 1;
            argv += 1;
            continue;
#endif
        } else
            break;
    }


    unicode_init();

    /* Initialize property tree */
    prop_init();
    init_global_info();

    /* Initiailize logging */
    trace_init();

    /* Callout framework */
    callout_init();

    /* Notification framework */
    notifications_init();

    /* Architecture specific init */
    arch_init();

    htsmsg_store_init();

    /* Try to create cache path */
    if(showtime_cache_path != NULL &&
            (r = makedirs(showtime_cache_path)) != 0) {
        TRACE(TRACE_ERROR, "cache", "Unable to create cache path %s -- %s",
              showtime_cache_path, strerror(r));
        showtime_cache_path = NULL;
    }

    /* Initializte blob cache */
    blobcache_init();

    /* Try to create settings path */
    if(showtime_settings_path != NULL &&
            (r = makedirs(showtime_settings_path)) != 0) {
        TRACE(TRACE_ERROR, "settings", "Unable to create settings path %s -- %s",
              showtime_settings_path, strerror(r));
        showtime_settings_path = NULL;
    }

    /* Initialize keyring */
    keyring_init();

    /* Initialize settings */
    settings_init();

    /* Initialize libavcodec & libavformat */
    av_lockmgr_register(fflockmgr);
    av_log_set_callback(fflog);
    av_register_all();

    /* Freetype keymapper */
#if ENABLE_LIBFREETYPE
    freetype_init();
#endif

    /* Global keymapper */
    keymapper_init();

    /* Initialize media subsystem */
    media_init();

    /* Service handling */
    service_init();

    /* Initialize backend content handlers */
    backend_init();

    /* Initialize navigator */
    nav_init();

    /* Initialize audio subsystem */
    audio_init();

    /* Initialize bookmarks */
    bookmarks_init();

    /* Initialize plugin manager and load plugins */
    plugins_init();

    /* Internationalization */
    i18n_init();


    nav_open(NAV_HOME, NULL);

    /* Open initial page */
    if(argc > 0)
        nav_open(argv[0], forceview);

    /* Various interprocess communication stuff (D-Bus on Linux, etc) */
    ipc_init();

    /* Service discovery. Must be after ipc_init() (d-bus and threads, etc) */
    sd_init();

    /* Initialize various external APIs */
    api_init();

    /* HTTP server and UPNP */
#if ENABLE_HTTPSERVER
    http_server_init();
    upnp_init();
#endif


    /* */
    runcontrol_init(can_standby, can_poweroff);

    TRACE(TRACE_DEBUG, "core", "Starting UI");

    /* Initialize user interfaces */
    ui_start(nuiargs, uiargs, argv0);

    finalize();
}