void FATR ga_antisymmetrize_(Integer *g_a) { DoublePrecision alpha = 0.5; int i, me = GA_Nodeid(); extern void * FATR ga_malloc(Integer nelem, int type, char *name); extern void FATR ga_free(void *ptr); void FATR gai_subtr(int *lo, int *hi, void *a, void *b, DoublePrecision alpha, int type, Integer nelem, int ndim); int alo[GA_MAX_DIM], ahi[GA_MAX_DIM], lda[GA_MAX_DIM]; int blo[GA_MAX_DIM], bhi[GA_MAX_DIM], ldb[GA_MAX_DIM]; int ndim, dims[GA_MAX_DIM], type; Integer nelem=1; Logical have_data; void *a_ptr, *b_ptr; GA_Sync(); NGA_Inquire((int)(*g_a), &type, &ndim, dims); if (dims[0] != dims[1]) GA_Error("ga_sym: can only sym square matrix", 0L); /* Find the local distribution */ NGA_Distribution((int)(*g_a), me, alo, ahi); have_data = ahi[0]>=0; for(i=1; i<ndim; i++) have_data = have_data && ahi[i]>=0; if(have_data) { NGA_Access((int)(*g_a), alo, ahi, &a_ptr, lda); for(i=0; i<ndim; i++) nelem *= ahi[i]-alo[i] +1; b_ptr = (void *) ga_malloc(nelem, MT_C_DBL, "v"); for(i=2; i<ndim; i++) {bhi[i]=ahi[i]; blo[i]=alo[i]; } /* switch rows and cols */ blo[1]=alo[0]; bhi[1]=ahi[0]; blo[0]=alo[1]; bhi[0]=ahi[1]; for (i=0; i < ndim-1; i++) ldb[i] = bhi[i+1] - blo[i+1] + 1; NGA_Get((int)(*g_a), blo, bhi, b_ptr, ldb); } GA_Sync(); if(have_data) { gai_subtr(alo, ahi, a_ptr, b_ptr, alpha, type, nelem, ndim); NGA_Release_update((int)(*g_a), alo, ahi); ga_free(b_ptr); } GA_Sync(); }
struct vsource_frame * vsource_frame_init(struct vsource_frame *frame, int maxwidth, int maxheight, int maxstride) { int i; // bzero(frame, sizeof(struct vsource_frame)); // for(i = 0; i < MAX_STRIDE; i++) { frame->linesize[i] = maxstride; } frame->maxstride = maxstride; frame->imgbufsize = maxheight * maxstride; if(ga_malloc(frame->imgbufsize, (void**) &frame->imgbuf_internal, &frame->alignment) < 0) { return NULL; } frame->imgbuf = frame->imgbuf_internal + frame->alignment; bzero(frame->imgbuf, frame->imgbufsize); return frame; }
void * vencoder_threadproc(void *arg) { // arg is pointer to source pipe // image info int iid; int iwidth; int iheight; int rtp_id; struct pooldata *data = NULL; struct vsource_frame *frame = NULL; pipeline *pipe = (pipeline*) arg; AVCodecContext *encoder = NULL; // AVFrame *pic_in = NULL; unsigned char *pic_in_buf = NULL; int pic_in_size; unsigned char *nalbuf = NULL, *nalbuf_a = NULL; int nalbuf_size = 0, nalign = 0; long long basePts = -1LL, newpts = 0LL, pts = -1LL, ptsSync = 0LL; pthread_mutex_t condMutex = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t cond = PTHREAD_COND_INITIALIZER; // int resolution[2]; int video_written = 0; // if(pipe == NULL) { ga_error("video encoder: NULL pipeline specified.\n"); goto video_quit; } // rtspconf = rtspconf_global(); // init variables iid = ((struct vsource_config*) pipe->get_privdata())->id; iwidth = video_source_maxwidth(iid); iheight = video_source_maxheight(iid); rtp_id = ((struct vsource_config*) pipe->get_privdata())->rtp_id; // outputW = iwidth; // by default, the same as max resolution outputH = iheight; if(ga_conf_readints("output-resolution", resolution, 2) == 2) { outputW = resolution[0]; outputH = resolution[1]; } // ga_error("video encoder: image source from '%s' (%dx%d) via channel %d, resolution=%dx%d.\n", pipe->name(), iwidth, iheight, rtp_id, outputW, outputH); // encoder = ga_avcodec_vencoder_init( NULL, rtspconf->video_encoder_codec, outputW, outputH, rtspconf->video_fps, rtspconf->vso); if(encoder == NULL) { ga_error("video encoder: cannot initialized the encoder.\n"); goto video_quit; } // nalbuf_size = 100000+12 * outputW * outputH; if(ga_malloc(nalbuf_size, (void**) &nalbuf, &nalign) < 0) { ga_error("video encoder: buffer allocation failed, terminated.\n"); goto video_quit; } nalbuf_a = nalbuf + nalign; // if((pic_in = avcodec_alloc_frame()) == NULL) { ga_error("video encoder: picture allocation failed, terminated.\n"); goto video_quit; } pic_in_size = avpicture_get_size(PIX_FMT_YUV420P, outputW, outputH); if((pic_in_buf = (unsigned char*) av_malloc(pic_in_size)) == NULL) { ga_error("video encoder: picture buffer allocation failed, terminated.\n"); goto video_quit; } avpicture_fill((AVPicture*) pic_in, pic_in_buf, PIX_FMT_YUV420P, outputW, outputH); //ga_error("video encoder: linesize = %d|%d|%d\n", pic_in->linesize[0], pic_in->linesize[1], pic_in->linesize[2]); // start encoding ga_error("video encoding started: tid=%ld %dx%d@%dfps, nalbuf_size=%d, pic_in_size=%d.\n", ga_gettid(), iwidth, iheight, rtspconf->video_fps, nalbuf_size, pic_in_size); // pipe->client_register(ga_gettid(), &cond); // while(encoder_running() > 0) { AVPacket pkt; int got_packet = 0; // wait for notification data = pipe->load_data(); if(data == NULL) { int err; struct timeval tv; struct timespec to; gettimeofday(&tv, NULL); to.tv_sec = tv.tv_sec+1; to.tv_nsec = tv.tv_usec * 1000; // if((err = pipe->timedwait(&cond, &condMutex, &to)) != 0) { ga_error("viedo encoder: image source timed out.\n"); continue; } data = pipe->load_data(); if(data == NULL) { ga_error("viedo encoder: unexpected NULL frame received (from '%s', data=%d, buf=%d).\n", pipe->name(), pipe->data_count(), pipe->buf_count()); continue; } } frame = (struct vsource_frame*) data->ptr; // handle pts if(basePts == -1LL) { basePts = frame->imgpts; ptsSync = encoder_pts_sync(rtspconf->video_fps); newpts = ptsSync; } else { newpts = ptsSync + frame->imgpts - basePts; } // XXX: assume always YUV420P if(pic_in->linesize[0] == frame->linesize[0] && pic_in->linesize[1] == frame->linesize[1] && pic_in->linesize[2] == frame->linesize[2]) { bcopy(frame->imgbuf, pic_in_buf, pic_in_size); } else { ga_error("video encoder: YUV mode failed - mismatched linesize(s) (src:%d,%d,%d; dst:%d,%d,%d)\n", frame->linesize[0], frame->linesize[1], frame->linesize[2], pic_in->linesize[0], pic_in->linesize[1], pic_in->linesize[2]); pipe->release_data(data); goto video_quit; } pipe->release_data(data); // pts must be monotonically increasing if(newpts > pts) { pts = newpts; } else { pts++; } // encode pic_in->pts = pts; av_init_packet(&pkt); pkt.data = nalbuf_a; pkt.size = nalbuf_size; if(avcodec_encode_video2(encoder, &pkt, pic_in, &got_packet) < 0) { ga_error("video encoder: encode failed, terminated.\n"); goto video_quit; } if(got_packet) { if(pkt.pts == (int64_t) AV_NOPTS_VALUE) { pkt.pts = pts; } pkt.stream_index = 0; // send the packet if(encoder_send_packet_all("video-encoder", rtp_id/*rtspconf->video_id*/, &pkt, pkt.pts) < 0) { goto video_quit; } // free unused side-data if(pkt.side_data_elems > 0) { int i; for (i = 0; i < pkt.side_data_elems; i++) av_free(pkt.side_data[i].data); av_freep(&pkt.side_data); pkt.side_data_elems = 0; } // if(video_written == 0) { video_written = 1; ga_error("first video frame written (pts=%lld)\n", pts); } } } // video_quit: if(pipe) { pipe->client_unregister(ga_gettid()); pipe = NULL; } // if(pic_in_buf) av_free(pic_in_buf); if(pic_in) av_free(pic_in); if(nalbuf) free(nalbuf); if(encoder) ga_avcodec_close(encoder); // ga_error("video encoder: thread terminated (tid=%ld).\n", ga_gettid()); // return NULL; }
void FATR ga_symmetrize_(Integer *g_a) { DoublePrecision alpha = 0.5; Integer i, me = ga_nodeid_(); Integer alo[GA_MAX_DIM], ahi[GA_MAX_DIM], lda[GA_MAX_DIM], nelem=1; Integer blo[GA_MAX_DIM], bhi[GA_MAX_DIM], ldb[GA_MAX_DIM]; Integer ndim, dims[GA_MAX_DIM], type; Logical have_data; Integer g_b; /* temporary global array (b = A') */ Integer num_blocks_a; Void *a_ptr, *b_ptr; int local_sync_begin,local_sync_end; char *tempB = "A_transpose"; local_sync_begin = _ga_sync_begin; local_sync_end = _ga_sync_end; _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/ if(local_sync_begin)ga_sync_(); GA_PUSH_NAME("ga_symmetrize"); num_blocks_a = ga_total_blocks_(g_a); nga_inquire_internal_(g_a, &type, &ndim, dims); if (type != C_DBL) ga_error("ga_symmetrize: only implemented for double precision",0); if (num_blocks_a < 0) { if (dims[ndim-1] != dims[ndim-2]) ga_error("ga_sym: can only sym square matrix", 0L); /* Find the local distribution */ nga_distribution_(g_a, &me, alo, ahi); have_data = ahi[0]>0; for(i=1; i<ndim; i++) have_data = have_data && ahi[i]>0; if(have_data) { nga_access_ptr(g_a, alo, ahi, &a_ptr, lda); for(i=0; i<ndim; i++) nelem *= ahi[i]-alo[i] +1; b_ptr = (Void *) ga_malloc(nelem, MT_F_DBL, "v"); for(i=0; i<ndim-2; i++) {bhi[i]=ahi[i]; blo[i]=alo[i]; } /* switch rows and cols */ blo[ndim-1]=alo[ndim-2]; bhi[ndim-1]=ahi[ndim-2]; blo[ndim-2]=alo[ndim-1]; bhi[ndim-2]=ahi[ndim-1]; for (i=0; i < ndim-1; i++) ldb[i] = bhi[i] - blo[i] + 1; nga_get_(g_a, blo, bhi, b_ptr, ldb); } ga_sync_(); if(have_data) { gai_add(alo, ahi, a_ptr, b_ptr, alpha, type, nelem, ndim); nga_release_update_(g_a, alo, ahi); ga_free(b_ptr); } } else { /* For block-cyclic data, probably most efficient solution is to create duplicate copy, transpose it and add the results together */ DoublePrecision half = 0.5; if (!ga_duplicate(g_a, &g_b, tempB)) ga_error("ga_symmetrize: duplicate failed", 0L); ga_transpose_(g_a, &g_b); ga_add_(&half, g_a, &half, &g_b, g_a); ga_destroy_(&g_b); } GA_POP_NAME; if(local_sync_end)ga_sync_(); }