예제 #1
0
void FATR 
ga_antisymmetrize_(Integer *g_a) {
  
  DoublePrecision alpha = 0.5;
  int i, me = GA_Nodeid();
  extern void * FATR ga_malloc(Integer nelem, int type, char *name);
  extern void FATR ga_free(void *ptr);
  void FATR gai_subtr(int *lo, int *hi, void *a, void *b, DoublePrecision alpha,
                      int type, Integer nelem, int ndim);

  int alo[GA_MAX_DIM], ahi[GA_MAX_DIM], lda[GA_MAX_DIM];
  int blo[GA_MAX_DIM], bhi[GA_MAX_DIM], ldb[GA_MAX_DIM];
  int ndim, dims[GA_MAX_DIM], type;
  Integer nelem=1;
  Logical have_data;
  void *a_ptr, *b_ptr;

  GA_Sync();


  
  NGA_Inquire((int)(*g_a), &type, &ndim, dims);
  
  if (dims[0] != dims[1]) 
    GA_Error("ga_sym: can only sym square matrix", 0L);
  
  /* Find the local distribution */
  NGA_Distribution((int)(*g_a), me, alo, ahi);
 
 
  have_data = ahi[0]>=0;
  for(i=1; i<ndim; i++) have_data = have_data && ahi[i]>=0;
  
  if(have_data) {
    NGA_Access((int)(*g_a), alo, ahi, &a_ptr, lda); 
    
    for(i=0; i<ndim; i++) nelem *= ahi[i]-alo[i] +1;
    b_ptr = (void *) ga_malloc(nelem, MT_C_DBL, "v");
    
    for(i=2; i<ndim; i++) {bhi[i]=ahi[i]; blo[i]=alo[i]; }
    
    /* switch rows and cols */
    blo[1]=alo[0];
    bhi[1]=ahi[0];
    blo[0]=alo[1];
    bhi[0]=ahi[1];

    for (i=0; i < ndim-1; i++) 
      ldb[i] = bhi[i+1] - blo[i+1] + 1; 
    NGA_Get((int)(*g_a), blo, bhi, b_ptr, ldb);
  }
  GA_Sync(); 

  if(have_data) {
    gai_subtr(alo, ahi, a_ptr, b_ptr, alpha, type, nelem, ndim);
    NGA_Release_update((int)(*g_a), alo, ahi);
    ga_free(b_ptr);
  }
  GA_Sync();
}
예제 #2
0
struct vsource_frame *
vsource_frame_init(struct vsource_frame *frame, int maxwidth, int maxheight, int maxstride) {
	int i;
	//
	bzero(frame, sizeof(struct vsource_frame));
	//
	for(i = 0; i < MAX_STRIDE; i++) {
		frame->linesize[i] = maxstride;
	}
	frame->maxstride = maxstride;
	frame->imgbufsize = maxheight * maxstride;
	if(ga_malloc(frame->imgbufsize, (void**) &frame->imgbuf_internal, &frame->alignment) < 0) {
		return NULL;
	}
	frame->imgbuf = frame->imgbuf_internal + frame->alignment;
	bzero(frame->imgbuf, frame->imgbufsize);
	return frame;
}
void *
vencoder_threadproc(void *arg) {
	// arg is pointer to source pipe
	// image info
	int iid;
	int iwidth;
	int iheight;
	int rtp_id;
	struct pooldata *data = NULL;
	struct vsource_frame *frame = NULL;
	pipeline *pipe = (pipeline*) arg;
	AVCodecContext *encoder = NULL;
	//
	AVFrame *pic_in = NULL;
	unsigned char *pic_in_buf = NULL;
	int pic_in_size;
	unsigned char *nalbuf = NULL, *nalbuf_a = NULL;
	int nalbuf_size = 0, nalign = 0;
	long long basePts = -1LL, newpts = 0LL, pts = -1LL, ptsSync = 0LL;
	pthread_mutex_t condMutex = PTHREAD_MUTEX_INITIALIZER;
	pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
	//
	int resolution[2];
	int video_written = 0;
	//
	if(pipe == NULL) {
		ga_error("video encoder: NULL pipeline specified.\n");
		goto video_quit;
	}
	//
	rtspconf = rtspconf_global();
	// init variables
	iid = ((struct vsource_config*) pipe->get_privdata())->id;
	iwidth = video_source_maxwidth(iid);
	iheight = video_source_maxheight(iid);
	rtp_id = ((struct vsource_config*) pipe->get_privdata())->rtp_id;
	//
	outputW = iwidth;	// by default, the same as max resolution
	outputH = iheight;
	if(ga_conf_readints("output-resolution", resolution, 2) == 2) {
		outputW = resolution[0];
		outputH = resolution[1];
	}
	//
	ga_error("video encoder: image source from '%s' (%dx%d) via channel %d, resolution=%dx%d.\n",
		pipe->name(), iwidth, iheight, rtp_id, outputW, outputH);
	//
	encoder = ga_avcodec_vencoder_init(
			NULL,
			rtspconf->video_encoder_codec,
			outputW, outputH,
			rtspconf->video_fps,
			rtspconf->vso);
	if(encoder == NULL) {
		ga_error("video encoder: cannot initialized the encoder.\n");
		goto video_quit;
	}
	//
	nalbuf_size = 100000+12 * outputW * outputH;
	if(ga_malloc(nalbuf_size, (void**) &nalbuf, &nalign) < 0) {
		ga_error("video encoder: buffer allocation failed, terminated.\n");
		goto video_quit;
	}
	nalbuf_a = nalbuf + nalign;
	//
	if((pic_in = avcodec_alloc_frame()) == NULL) {
		ga_error("video encoder: picture allocation failed, terminated.\n");
		goto video_quit;
	}
	pic_in_size = avpicture_get_size(PIX_FMT_YUV420P, outputW, outputH);
	if((pic_in_buf = (unsigned char*) av_malloc(pic_in_size)) == NULL) {
		ga_error("video encoder: picture buffer allocation failed, terminated.\n");
		goto video_quit;
	}
	avpicture_fill((AVPicture*) pic_in, pic_in_buf,
			PIX_FMT_YUV420P, outputW, outputH);
	//ga_error("video encoder: linesize = %d|%d|%d\n", pic_in->linesize[0], pic_in->linesize[1], pic_in->linesize[2]);
	// start encoding
	ga_error("video encoding started: tid=%ld %dx%d@%dfps, nalbuf_size=%d, pic_in_size=%d.\n",
		ga_gettid(),
		iwidth, iheight, rtspconf->video_fps,
		nalbuf_size, pic_in_size);
	//
	pipe->client_register(ga_gettid(), &cond);
	//
	while(encoder_running() > 0) {
		AVPacket pkt;
		int got_packet = 0;
		// wait for notification
		data = pipe->load_data();
		if(data == NULL) {
			int err;
			struct timeval tv;
			struct timespec to;
			gettimeofday(&tv, NULL);
			to.tv_sec = tv.tv_sec+1;
			to.tv_nsec = tv.tv_usec * 1000;
			//
			if((err = pipe->timedwait(&cond, &condMutex, &to)) != 0) {
				ga_error("viedo encoder: image source timed out.\n");
				continue;
			}
			data = pipe->load_data();
			if(data == NULL) {
				ga_error("viedo encoder: unexpected NULL frame received (from '%s', data=%d, buf=%d).\n",
					pipe->name(), pipe->data_count(), pipe->buf_count());
				continue;
			}
		}
		frame = (struct vsource_frame*) data->ptr;
		// handle pts
		if(basePts == -1LL) {
			basePts = frame->imgpts;
			ptsSync = encoder_pts_sync(rtspconf->video_fps);
			newpts = ptsSync;
		} else {
			newpts = ptsSync + frame->imgpts - basePts;
		}
		// XXX: assume always YUV420P
		if(pic_in->linesize[0] == frame->linesize[0]
		&& pic_in->linesize[1] == frame->linesize[1]
		&& pic_in->linesize[2] == frame->linesize[2]) {
			bcopy(frame->imgbuf, pic_in_buf, pic_in_size);
		} else {
			ga_error("video encoder: YUV mode failed - mismatched linesize(s) (src:%d,%d,%d; dst:%d,%d,%d)\n",
				frame->linesize[0], frame->linesize[1], frame->linesize[2],
				pic_in->linesize[0], pic_in->linesize[1], pic_in->linesize[2]);
			pipe->release_data(data);
			goto video_quit;
		}
		pipe->release_data(data);
		// pts must be monotonically increasing
		if(newpts > pts) {
			pts = newpts;
		} else {
			pts++;
		}
		// encode
		pic_in->pts = pts;
		av_init_packet(&pkt);
		pkt.data = nalbuf_a;
		pkt.size = nalbuf_size;
		if(avcodec_encode_video2(encoder, &pkt, pic_in, &got_packet) < 0) {
			ga_error("video encoder: encode failed, terminated.\n");
			goto video_quit;
		}
		if(got_packet) {
			if(pkt.pts == (int64_t) AV_NOPTS_VALUE) {
				pkt.pts = pts;
			}
			pkt.stream_index = 0;
			// send the packet
			if(encoder_send_packet_all("video-encoder",
				rtp_id/*rtspconf->video_id*/, &pkt,
				pkt.pts) < 0) {
				goto video_quit;
			}
			// free unused side-data
			if(pkt.side_data_elems > 0) {
				int i;
				for (i = 0; i < pkt.side_data_elems; i++)
					av_free(pkt.side_data[i].data);
				av_freep(&pkt.side_data);
				pkt.side_data_elems = 0;
			}
			//
			if(video_written == 0) {
				video_written = 1;
				ga_error("first video frame written (pts=%lld)\n", pts);
			}
		}
	}
	//
video_quit:
	if(pipe) {
		pipe->client_unregister(ga_gettid());
		pipe = NULL;
	}
	//
	if(pic_in_buf)	av_free(pic_in_buf);
	if(pic_in)	av_free(pic_in);
	if(nalbuf)	free(nalbuf);
	if(encoder)	ga_avcodec_close(encoder);
	//
	ga_error("video encoder: thread terminated (tid=%ld).\n", ga_gettid());
	//
	return NULL;
}
예제 #4
0
void FATR 
ga_symmetrize_(Integer *g_a) {
  
  DoublePrecision alpha = 0.5;
  Integer i, me = ga_nodeid_();
  Integer alo[GA_MAX_DIM], ahi[GA_MAX_DIM], lda[GA_MAX_DIM], nelem=1;
  Integer blo[GA_MAX_DIM], bhi[GA_MAX_DIM], ldb[GA_MAX_DIM];
  Integer ndim, dims[GA_MAX_DIM], type;
  Logical have_data;
  Integer g_b; /* temporary global array (b = A') */
  Integer num_blocks_a;
  Void *a_ptr, *b_ptr;
  int local_sync_begin,local_sync_end;
  char *tempB = "A_transpose";

  local_sync_begin = _ga_sync_begin; local_sync_end = _ga_sync_end;
  _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/
  if(local_sync_begin)ga_sync_();

  GA_PUSH_NAME("ga_symmetrize");
  
  num_blocks_a = ga_total_blocks_(g_a);

  nga_inquire_internal_(g_a, &type, &ndim, dims);

  if (type != C_DBL)
    ga_error("ga_symmetrize: only implemented for double precision",0);

  if (num_blocks_a < 0) {

    if (dims[ndim-1] != dims[ndim-2]) 
      ga_error("ga_sym: can only sym square matrix", 0L);

    /* Find the local distribution */
    nga_distribution_(g_a, &me, alo, ahi);


    have_data = ahi[0]>0;
    for(i=1; i<ndim; i++) have_data = have_data && ahi[i]>0;

    if(have_data) {
      nga_access_ptr(g_a, alo, ahi, &a_ptr, lda); 

      for(i=0; i<ndim; i++) nelem *= ahi[i]-alo[i] +1;
      b_ptr = (Void *) ga_malloc(nelem, MT_F_DBL, "v");

      for(i=0; i<ndim-2; i++) {bhi[i]=ahi[i]; blo[i]=alo[i]; }

      /* switch rows and cols */
      blo[ndim-1]=alo[ndim-2];
      bhi[ndim-1]=ahi[ndim-2];
      blo[ndim-2]=alo[ndim-1];
      bhi[ndim-2]=ahi[ndim-1];

      for (i=0; i < ndim-1; i++) 
        ldb[i] = bhi[i] - blo[i] + 1; 
      nga_get_(g_a, blo, bhi, b_ptr, ldb);
    }
    ga_sync_(); 

    if(have_data) {
      gai_add(alo, ahi, a_ptr, b_ptr, alpha, type, nelem, ndim);
      nga_release_update_(g_a, alo, ahi);
      ga_free(b_ptr);
    }
  } else {
    /* For block-cyclic data, probably most efficient solution is to
       create duplicate copy, transpose it and add the results together */
    DoublePrecision half = 0.5;
    if (!ga_duplicate(g_a, &g_b, tempB))
      ga_error("ga_symmetrize: duplicate failed", 0L);
    ga_transpose_(g_a, &g_b);
    ga_add_(&half, g_a, &half, &g_b, g_a);
    ga_destroy_(&g_b);
  }
  GA_POP_NAME;
  if(local_sync_end)ga_sync_();
}