Пример #1
0
/**
 * Stop tracing.
 *
 * Stops MPI event tracing for the thread executing this function.
 * Sends any events remaining in the buffer.
 *
 * @param arguments    Encoded (unused) function arguments.
 */
void cbtf_collector_stop()
{
    /* Access our thread-local storage */
#ifdef USE_EXPLICIT_TLS
    TLS* tls = CBTF_GetTLS(TLSKey);
#else
    TLS* tls = &the_tls;
#endif

    Assert(tls != NULL);

#ifndef NDEBUG
    if (IsCollectorDebugEnabled) {
	fprintf(stderr,"[%ld,%d] ENTERED cbtf_collector_stop.\n",tls->header.pid, tls->header.omp_tid);
    }
#endif

    tls->header.time_end = CBTF_GetTime();

    /* Stop sampling */
    defer_trace(0);

    /* Are there any unsent samples? */
#if defined(PROFILE)
#ifndef NDEBUG
    if (IsCollectorDebugEnabled) {
	    fprintf(stderr,"[%ld,%d] cbtf_collector_stop count_len:%d stacktraces_len%d\n",tls->header.pid, tls->header.omp_tid,tls->data.count.count_len, tls->data.stacktraces.stacktraces_len);
    }
#endif
    if(tls->data.count.count_len > 0 || tls->data.stacktraces.stacktraces_len > 0) {
	send_samples(tls);
    }
#else
#ifndef NDEBUG
    if (IsCollectorDebugEnabled) {
	    fprintf(stderr,"[%ld,%d] cbtf_collector_stop events_len:%d stacktraces_len%d\n",tls->header.pid, tls->header.omp_tid,tls->data.events.events_len, tls->data.stacktraces.stacktraces_len);
    }
#endif
    if(tls->data.events.events_len > 0 || tls->data.stacktraces.stacktraces_len > 0) {
	send_samples(tls);
    }
#endif

    /* Destroy our thread-local storage */
#ifdef CBTF_SERVICE_USE_EXPLICIT_TLS
    free(tls);
    CBTF_SetTLS(TLSKey, NULL);
#endif
}
Пример #2
0
/**
 * Called by the CBTF collector service in order to stop data collection.
 */
void cbtf_collector_stop()
{
    /* Access our thread-local storage */
#ifdef USE_EXPLICIT_TLS
    TLS* tls = CBTF_GetTLS(TLSKey);
#else
    TLS* tls = &the_tls;
#endif
    Assert(tls != NULL);

    if (tls->EventSet == PAPI_NULL) {
	/*fprintf(stderr,"hwcsamp_stop_sampling RETURNS - NO EVENTSET!\n");*/
	/* we are called before eny events are set in papi. just return */
        return;
    }

    /* Stop counters */
    CBTF_Stop(tls->EventSet, evalues);

    /* Stop sampling */
    CBTF_Timer(0, NULL);

    tls->header.time_end = CBTF_GetTime();

    /* Are there any unsent samples? */
    if(tls->buffer.length > 0) {
	/* Send these samples */
	send_samples(tls);
    }

    /* Destroy our thread-local storage */
#ifdef CBTF_SERVICE_USE_EXPLICIT_TLS
    destroy_explicit_tls();
#endif
}
Пример #3
0
void
profile_thread(void)
{
    spl_t	    s;
    buffer_t	    buf_entry;
    queue_entry_t   prof_queue_entry;
    prof_data_t	    pbuf;
    kern_return_t   kr;
    int		    j;

    thread_swappable(current_act(), FALSE);

    /* Initialise the queue header for the prof_queue */
    mpqueue_init(&prof_queue);

    while (TRUE) {

	/* Dequeue the first buffer. */
	s = splsched();
	mpdequeue_head(&prof_queue, &prof_queue_entry);
	splx(s);

	if ((buf_entry = (buffer_t) prof_queue_entry) == NULLPBUF) { 
	    assert_wait((event_t) profile_thread, FALSE);
	    thread_block((void (*)(void)) 0);
	    if (current_thread()->wait_result != THREAD_AWAKENED)
		break;
	} else 
#if DCI
	{
	    register int    sum_samples = 0;
	    int		    i;

	    pbuf = buf_entry->p_prof;
/*
 * sum all the points from all the cpus on the machine.
*/
	    for(i=0;i < NCPUS; i++)
		sum_samples += buf_entry->p_index[i];

	    kr = send_samples(pbuf->prof_port, (void *)buf_entry->p_zone,
			(mach_msg_type_number_t)sum_samples);
	    if (kr != KERN_SUCCESS)
	    {
		task_suspend(pbuf->task); /* suspend task */
		kr = send_notices(pbuf->prof_port, (void *)buf_entry->p_zone,
				  (mach_msg_type_number_t)sum_samples,
				  MACH_SEND_ALWAYS);
	    }
	    bzero((char *)buf_entry->p_zone, NCPUS*SIZE_PROF_BUFFER);
#else
	{
	    int		    dropped;

	    pbuf = buf_entry->p_prof;
	    kr = send_samples(pbuf->prof_port, (void *)buf_entry->p_zone,
			(mach_msg_type_number_t)buf_entry->p_index);
	    profile_sample_count += buf_entry->p_index;
	    if (kr != KERN_SUCCESS)
	      printf("send_samples(%x, %x, %d) error %x\n",
			pbuf->prof_port, buf_entry->p_zone, buf_entry->p_index, kr); 
	    dropped = buf_entry->p_dropped;
	    if (dropped > 0) {
		printf("kernel: profile dropped %d sample%s\n", dropped,
		       dropped == 1 ? "" : "s");
		buf_entry->p_dropped = 0;
	    }

#endif /* DCI */
	    /* Indicate you've finished the dirty job */
#if DCI
	    {
		int i;
		for(i=0;i<NCPUS;i++)
		    buf_entry->p_full[i] = FALSE;
	    }
#else
	    buf_entry->p_full = FALSE;
#endif /* DCI */
	    if (buf_entry->p_wakeme)
	      thread_wakeup((event_t) &buf_entry->p_wakeme);
	}

    }
    /* The profile thread has been signalled to exit.  Any threads waiting
       for the last buffer of samples to be acknowledged should be woken
       up now.  */
    profile_thread_id = THREAD_NULL;
    while (1) {
	s = splsched();
	mpdequeue_head(&prof_queue, &prof_queue_entry);
	splx(s);
	if ((buf_entry = (buffer_t) prof_queue_entry) == NULLPBUF)
	    break;
	if (buf_entry->p_wakeme)
	    thread_wakeup((event_t) &buf_entry->p_wakeme);
    }
#if 0	/* XXXXX */
    thread_halt_self();
#else
	panic("profile_thread(): halt_self");
#endif	/* XXXXX */
}

/*
 *****************************************************************************
 * send_last_sample is the drain mechanism to allow partial profiled buffers
 * to be sent to the receive_prof thread in the server.
 *****************************************************************************
*/

void
send_last_sample_buf(prof_data_t pbuf)
{
    spl_t    s;
    buffer_t buf_entry;

    if (pbuf == NULLPROFDATA)
	return;

    /* Ask for the sending of the last PC buffer.
     * Make a request to the profile_thread by inserting
     * the buffer in the send queue, and wake it up. 
     * The last buffer must be inserted at the head of the
     * send queue, so the profile_thread handles it immediatly. 
     */ 
    buf_entry = pbuf->prof_area + pbuf->prof_index;
    buf_entry->p_prof = pbuf;

    /* 
       Watch out in case profile thread exits while we are about to
       queue data for it.
     */
    s = splsched();
    if (profile_thread_id == THREAD_NULL)
	splx(s);
    else {
	buf_entry->p_wakeme = 1;
	mpenqueue_tail(&prof_queue, &buf_entry->p_list);
	thread_wakeup((event_t) profile_thread);
	assert_wait((event_t) &buf_entry->p_wakeme, TRUE);
	splx(s); 
	thread_block((void (*)(void)) 0);
    }
}
Пример #4
0
SR_PRIV int scanaplus_receive_data(int fd, int revents, void *cb_data)
{
	int bytes_read;
	struct sr_dev_inst *sdi;
	struct dev_context *devc;
	uint64_t max, n;

	(void)fd;
	(void)revents;

	if (!(sdi = cb_data))
		return TRUE;

	if (!(devc = sdi->priv))
		return TRUE;

	if (!devc->ftdic)
		return TRUE;

	/* Get a block of data. */
	bytes_read = ftdi_read_data(devc->ftdic, devc->compressed_buf,
				    COMPRESSED_BUF_SIZE);
	if (bytes_read < 0) {
		sr_err("Failed to read FTDI data (%d): %s.",
		       bytes_read, ftdi_get_error_string(devc->ftdic));
		sdi->driver->dev_acquisition_stop(sdi, sdi);
		return FALSE;
	}
	if (bytes_read == 0) {
		sr_spew("Received 0 bytes, nothing to do.");
		return TRUE;
	}

	/*
	 * After a ScanaPLUS acquisition starts, a bunch of samples will be
	 * returned as all-zero, no matter which signals are actually present
	 * on the channels. This is probably due to the FPGA reconfiguring some
	 * of its internal state/config during this time.
	 *
	 * As far as we know there is apparently no way for the PC-side to
	 * know when this "reconfiguration" starts or ends. The FTDI chip
	 * will return all-zero "dummy" samples during this time, which is
	 * indistinguishable from actual all-zero samples.
	 *
	 * We currently simply ignore the first 64kB of data after an
	 * acquisition starts. Empirical tests have shown that the
	 * "reconfigure" time is a lot less than that usually.
	 */
	if (devc->compressed_bytes_ignored < COMPRESSED_BUF_SIZE) {
		/* Ignore the first 64kB of data of every acquisition. */
		sr_spew("Ignoring first 64kB chunk of data.");
		devc->compressed_bytes_ignored += COMPRESSED_BUF_SIZE;
		return TRUE;
	}

	/* TODO: Handle bytes_read which is not a multiple of 2? */
	scanaplus_uncompress_block(devc, bytes_read);

	n = devc->samples_sent + (devc->bytes_received / 2);
	max = (SR_MHZ(100) / 1000) * devc->limit_msec;

	if (devc->limit_samples && (n >= devc->limit_samples)) {
		send_samples(devc, devc->limit_samples - devc->samples_sent);
		sr_info("Requested number of samples reached.");
		sdi->driver->dev_acquisition_stop(sdi, cb_data);
		return TRUE;
	} else if (devc->limit_msec && (n >= max)) {
		send_samples(devc, max - devc->samples_sent);
		sr_info("Requested time limit reached.");
		sdi->driver->dev_acquisition_stop(sdi, cb_data);
		return TRUE;
	} else {
		send_samples(devc, devc->bytes_received / 2);
	}

	return TRUE;
}
Пример #5
0
static int loadfile(struct sr_input *in, const char *filename)
{
	int res;
	struct context *ctx;
	struct sr_datafeed_packet packet;
	struct sr_datafeed_meta meta;
	struct sr_config *cfg;
	GIOStatus status;
	gboolean read_new_line;
	gsize term_pos;
	char **columns;
	gsize num_columns;
	int max_columns;

	(void)filename;

	ctx = in->internal;

	/* Send header packet to the session bus. */
	std_session_send_df_header(in->sdi, LOG_PREFIX);

	if (ctx->samplerate) {
		packet.type = SR_DF_META;
		packet.payload = &meta;
		cfg = sr_config_new(SR_CONF_SAMPLERATE,
			g_variant_new_uint64(ctx->samplerate));
		meta.config = g_slist_append(NULL, cfg);
		sr_session_send(in->sdi, &packet);
		sr_config_free(cfg);
	}

	read_new_line = FALSE;

	/* Limit the number of columns to parse. */
	if (ctx->multi_column_mode)
		max_columns = ctx->num_probes;
	else
		max_columns = 1;

	while (TRUE) {
		/*
		 * Skip reading a new line for the first time if the last read
		 * line was not a header because the sample data is not parsed
		 * yet.
		 */
		if (read_new_line || ctx->header) {
			ctx->line_number++;
			status = g_io_channel_read_line_string(ctx->channel,
				ctx->buffer, &term_pos, NULL);

			if (status == G_IO_STATUS_EOF)
				break;

			if (status != G_IO_STATUS_NORMAL) {
				sr_err("Error while reading line %zu.",
					ctx->line_number);
				free_context(ctx);
				return SR_ERR;
			}

			/* Remove line termination character(s). */
			g_string_truncate(ctx->buffer, term_pos);
		}

		read_new_line = TRUE;

		if (!ctx->buffer->len) {
			sr_spew("Blank line %zu skipped.", ctx->line_number);
			continue;
		}

		/* Remove trailing comment. */
		strip_comment(ctx->buffer, ctx->comment);

		if (!ctx->buffer->len) {
			sr_spew("Comment-only line %zu skipped.",
				ctx->line_number);
			continue;
		}

		if (!(columns = parse_line(ctx, max_columns))) {
			sr_err("Error while parsing line %zu.",
				ctx->line_number);
			free_context(ctx);
			return SR_ERR;
		}

		num_columns = g_strv_length(columns);

		/* Ensure that the first column is not out of bounds. */
		if (!num_columns) {
			sr_err("Column %zu in line %zu is out of bounds.",
				ctx->first_column, ctx->line_number);
			g_strfreev(columns);
			free_context(ctx);
			return SR_ERR;
		}

		/*
		 * Ensure that the number of probes does not exceed the number
		 * of columns in multi column mode.
		 */
		if (ctx->multi_column_mode && num_columns < ctx->num_probes) {
			sr_err("Not enough columns for desired number of probes in line %zu.",
				ctx->line_number);
			g_strfreev(columns);
			free_context(ctx);
			return SR_ERR;
		}

		if (ctx->multi_column_mode)
			res = parse_multi_columns(columns, ctx);
		else
			res = parse_single_column(columns[0], ctx);

		if (res != SR_OK) {
			g_strfreev(columns);
			free_context(ctx);
			return SR_ERR;
		}

		g_strfreev(columns);

		/*
		 * TODO: Parse sample numbers / timestamps and use it for
		 * decompression.
		 */

		/* Send sample data to the session bus. */
		res = send_samples(in->sdi, ctx->sample_buffer,
			ctx->sample_buffer_size, 1);

		if (res != SR_OK) {
			sr_err("Sending samples failed.");
			free_context(ctx);
			return SR_ERR;
		}
	}

	/* Send end packet to the session bus. */
	packet.type = SR_DF_END;
	sr_session_send(in->sdi, &packet);

	free_context(ctx);

	return SR_OK;
}
Пример #6
0
/* Parse the data section of VCD */
static void parse_contents(FILE *file, const struct sr_dev_inst *sdi, struct context *ctx)
{
	GString *token = g_string_sized_new(32);
	
	uint64_t prev_timestamp = 0;
	uint64_t prev_values = 0;
	
	/* Read one space-delimited token at a time. */
	while (read_until(file, NULL, 'N') && read_until(file, token, 'W'))
	{
		if (token->str[0] == '#' && g_ascii_isdigit(token->str[1]))
		{
			/* Numeric value beginning with # is a new timestamp value */
			uint64_t timestamp;
			timestamp = strtoull(token->str + 1, NULL, 10);
			
			if (ctx->downsample > 1)
				timestamp /= ctx->downsample;
			
			/* Skip < 0 => skip until first timestamp.
			 * Skip = 0 => don't skip
			 * Skip > 0 => skip until timestamp >= skip.
			 */
			if (ctx->skip < 0)
			{
				ctx->skip = timestamp;
				prev_timestamp = timestamp;
			}
			else if (ctx->skip > 0 && timestamp < (uint64_t)ctx->skip)
			{
				prev_timestamp = ctx->skip;
			}
			else if (timestamp == prev_timestamp)
			{
				/* Ignore repeated timestamps (e.g. sigrok outputs these) */
			}
			else
			{
				if (ctx->compress != 0 && timestamp - prev_timestamp > ctx->compress)
				{
					/* Compress long idle periods */
					prev_timestamp = timestamp - ctx->compress;
				}
			
				sr_dbg("New timestamp: %" PRIu64, timestamp);
			
				/* Generate samples from prev_timestamp up to timestamp - 1. */
				send_samples(sdi, prev_values, timestamp - prev_timestamp);
				prev_timestamp = timestamp;
			}
		}
		else if (token->str[0] == '$' && token->len > 1)
		{
			/* This is probably a $dumpvars, $comment or similar.
			 * $dump* contain useful data, but other tags will be skipped until $end. */
			if (g_strcmp0(token->str, "$dumpvars") == 0 ||
			    g_strcmp0(token->str, "$dumpon") == 0 ||
			    g_strcmp0(token->str, "$dumpoff") == 0 ||
			    g_strcmp0(token->str, "$end") == 0)
			{
				/* Ignore, parse contents as normally. */
			}
			else
			{
				/* Skip until $end */
				read_until(file, NULL, '$');
			}
		}
		else if (strchr("bBrR", token->str[0]) != NULL)
		{
			/* A vector value. Skip it and also the following identifier. */
			read_until(file, NULL, 'N');
			read_until(file, NULL, 'W');
		}
		else if (strchr("01xXzZ", token->str[0]) != NULL)
		{
			/* A new 1-bit sample value */
			int i, bit;
			GSList *l;
			struct probe *probe;

			bit = (token->str[0] == '1');
		
			g_string_erase(token, 0, 1);
			if (token->len == 0)
			{
				/* There was a space between value and identifier.
				 * Read in the rest.
				 */
				read_until(file, NULL, 'N');
				read_until(file, token, 'W');
			}
			
			for (i = 0, l = ctx->probes; i < ctx->probecount && l; i++, l = l->next)
			{
				probe = l->data;

				if (g_strcmp0(token->str, probe->identifier) == 0)
				{
					sr_dbg("Probe %d new value %d.", i, bit);
				
					/* Found our probe */
					if (bit)
						prev_values |= (1 << i);
					else
						prev_values &= ~(1 << i);
					
					break;
				}
			}
			
			if (i == ctx->probecount)
			{
				sr_dbg("Did not find probe for identifier '%s'.", token->str);
			}
		}
		else
		{
			sr_warn("Skipping unknown token '%s'.", token->str);
		}
		
		g_string_truncate(token, 0);
	}
	
	g_string_free(token, TRUE);
}
Пример #7
0
/**
 * Timer event handler.
 *
 * Called by the timer handler each time a sample is to be taken. Extracts the
 * program counter (PC) address from the signal context and places it into the
 * sample buffer. When the sample buffer is full, it is sent to the framework
 * for storage in the experiment's database.
 *
 * @note    
 * 
 * @param context    Thread context at timer interrupt.
 */
static void hwcsampTimerHandler(const ucontext_t* context)
{
    /* Access our thread-local storage */
#ifdef USE_EXPLICIT_TLS
    TLS* tls = CBTF_GetTLS(TLSKey);
#else
    TLS* tls = &the_tls;
#endif
    Assert(tls != NULL);

    if(tls->defer_sampling == true) {
        return;
    }
 
    /* Obtain the program counter (PC) address from the thread context */
    uint64_t pc = CBTF_GetPCFromContext(context);


#if defined (HAVE_OMPT)
    /* these are ompt specific.*/
    if (tls->thread_idle) {
	/* ompt. thread is in __kmp_wait_sleep from intel libomp runtime.
	 * sample count here is attributed as an idle.  Note that the sample
	 * PC address may be also be in any calls made by __kmp_wait_sleep
	 * while the ompt interface is in the idle state.
	 */
	pc = CBTF_GetAddressOfFunction(OMPT_THREAD_IDLE);
    }

    else if (tls->thread_wait_barrier) {
	/* ompt. thread is in __kmp_wait_sleep from intel libomp runtime.
	 * sample count here is attributed as a wait_barrier.  Note that the sample
	 * PC address may be also be in any calls made by __kmp_wait_sleep
	 * while the ompt interface is in the wait_barrier state.
	 */
	pc = CBTF_GetAddressOfFunction(OMPT_THREAD_WAIT_BARRIER);
    }

    else if (tls->thread_barrier) {
	/* ompt. thread is in __kmp_wait_sleep from intel libomp runtime.
	 * sample count here is attributed as an idle.  Note that the sample
	 * PC address may be also be in any calls made by __kmp_wait_sleep
	 * while the ompt interface is in the idle state.
	 */
	pc = CBTF_GetAddressOfFunction(OMPT_THREAD_BARRIER);
    }
#endif // if defined (HAVE_OMPT)

    /* This is supposed to reset counters */
    CBTF_HWCAccum(tls->EventSet, evalues);

    /* Update the sampling buffer and check if it has been filled */
    if(CBTF_UpdateHWCPCData(pc, &tls->buffer,evalues)) {
	/* Send these samples */
	send_samples(tls);
    }

    /* reset our values */
    memset(evalues,0,sizeof(evalues));

#ifndef NDEBUG
    if (IsCollectorDetailsDebugEnabled) {
      int i;
      for (i = 0; i < 6; i++) {
        if (tls->buffer.hwccounts[tls->buffer.length-1][i] > 0) {
            fprintf(stderr,"[%ld,%d] %lx HWC sampTimerHandler %d count %d is %ld\n",tls->header.pid, tls->header.omp_tid,pc,tls->buffer.length-1,i, tls->buffer.hwccounts[tls->buffer.length-1][i]);
        }
      }
    }
#endif
}
Пример #8
0
void mpi_record_event(const CBTF_mpi_event* event, uint64_t function)
#endif
#endif
{
    /* Access our thread-local storage */
#ifdef USE_EXPLICIT_TLS
    TLS* tls = CBTF_GetTLS(TLSKey);
#else
    TLS* tls = &the_tls;
#endif
    Assert(tls != NULL);

    //if (tls->defer_sampling) return;

    tls->do_trace = FALSE;

    uint64_t stacktrace[MaxFramesPerStackTrace];
    unsigned stacktrace_size = 0;
    unsigned entry = 0, start, i;
    unsigned pathindex = 0;

#ifndef NDEBUG
	if (IsCollectorDetailDebugEnabled) {
#if defined(EXTENDEDTRACE)
fprintf(stderr,"[%ld,%d] ENTERED mpi_record_event, sizeof event=%ld, sizeof stacktrace=%ld, NESTING=%d\n",tls->header.pid, tls->header.omp_tid,sizeof(CBTF_mpit_event),sizeof(stacktrace),tls->nesting_depth);
#else
fprintf(stderr,"[%ld,%d] ENTERED mpi_record_event, sizeof event=%ld, sizeof stacktrace=%ld, NESTING=%d\n",tls->header.pid, tls->header.omp_tid,sizeof(CBTF_mpi_event),sizeof(stacktrace),tls->nesting_depth);
#endif
	}
#endif

    /* Decrement the MPI function wrapper nesting depth */
    --tls->nesting_depth;

    /*
     * Don't record events for any recursive calls to our MPI function wrappers.
     * The place where this occurs is when the MPI implemetnation calls itself.
     * We don't record that data here because we are primarily interested in
     * direct calls by the application to the MPI library - not in the internal
     * implementation details of that library.
     */
    if(tls->nesting_depth > 0) {
#ifndef NDEBUG
	if (IsCollectorDebugEnabled) {
	    fprintf(stderr,"[%ld,%d] mpi_record_event RETURNS EARLY DUE TO NESTING\n",tls->header.pid, tls->header.omp_tid);
	}
#endif
	return;
    }
    
    /* Newer versions of libunwind now make io calls (open a file in /proc/<self>/maps)
     * that cause a thread lock in the libunwind dwarf parser. We are not interested in
     * any io done by libunwind while we get the stacktrace for the current context.
     * So we need to bump the nesting_depth before requesting the stacktrace and
     * then decrement nesting_depth after aquiring the stacktrace
     */

    /* Obtain the stack trace from the current thread context */
    CBTF_GetStackTraceFromContext(NULL, FALSE, OverheadFrameCount,
				    MaxFramesPerStackTrace,
				    &stacktrace_size, stacktrace);

#ifndef NDEBUG
	if (IsCollectorDetailDebugEnabled) {
	    fprintf(stderr,"[%ld,%d] mpi_record_event gets stacktrace of size:%d\n",tls->header.pid, tls->header.omp_tid,stacktrace_size);
	}
#endif

#if defined(PROFILE)

    bool_t stack_already_exists = FALSE;

    if(stacktrace_size > 0)
	stacktrace[0] = function;

    int j;
    int stackindex = 0;
    /* search individual stacks via count/indexing array */
    for (i = 0; i < tls->data.count.count_len ; i++ )
    {
	/* a count > 0 indexes the top of stack in the data buffer. */
	/* a count == 255 indicates this stack is at the count limit. */

	if (tls->buffer.count[i] == 0) {
	    continue;
	}
	if (tls->buffer.count[i] == 255) {
	    continue;
	}

	/* see if the stack addresses match */
	for (j = 0; j < stacktrace_size ; j++ )
	{
	    if ( tls->buffer.stacktraces[i+j] != stacktrace[j] ) {
		   break;
	    }
	}

	if ( j == stacktrace_size) {
	    stack_already_exists = TRUE;
	    stackindex = i;
	}
    }

    /* if the stack already exisits in the buffer, update its count
     * and return. If the stack is already at the count limit.
    */
    if (stack_already_exists && tls->buffer.count[stackindex] < 255 ) {
	/* update count for this stack */
	tls->buffer.count[stackindex] = tls->buffer.count[stackindex] + 1;
	tls->buffer.time[stackindex] += event->time;
	// reset do_trace to true.
	tls->do_trace = TRUE;
	return;
    }

    /* sample buffer has no room for these stack frames.*/
    int buflen = tls->data.stacktraces.stacktraces_len + stacktrace_size;
    if ( buflen > StackTraceBufferSize) {
	/* send the current sample buffer. (will init a new buffer) */
#ifndef NDEBUG
	if (IsCollectorDebugEnabled) {
	    fprintf(stderr,"[%ld,%d] StackTraceBufferSize SAMPLE BUFFER FULL. send samples\n",tls->header.pid, tls->header.omp_tid);
	}
#endif
	send_samples(tls);
    }

    /* add frames to sample buffer, compute addresss range */
    for (i = 0; i < stacktrace_size ; i++)
    {
	/* always add address to buffer bt */
	tls->buffer.stacktraces[tls->data.stacktraces.stacktraces_len] = stacktrace[i];

	/* top of stack indicated by a positive count. */
	/* all other elements are 0 */
	if (i > 0 ) {
	    tls->buffer.count[tls->data.count.count_len] = 0;
	} else {
	    tls->buffer.count[tls->data.count.count_len] = 1;
	    tls->buffer.time[tls->data.time.time_len] = event->time;
	}

	if (stacktrace[i] < tls->header.addr_begin ) {
	    tls->header.addr_begin = stacktrace[i];
	}
	if (stacktrace[i] > tls->header.addr_end ) {
	    tls->header.addr_end = stacktrace[i];
	}
	tls->data.stacktraces.stacktraces_len++;
	tls->data.count.count_len++;
	tls->data.time.time_len++;
    }

#else
    /*
     * Replace the first entry in the call stack with the address of the MPI
     * function that is being wrapped. On most platforms, this entry will be
     * the address of the call site of mpi_record_event() within the calling
     * wrapper. On IA64, because OverheadFrameCount is one higher, it will be
     * the mini-tramp for the wrapper that is calling mpi_record_event().
     */
    if(stacktrace_size > 0)
	stacktrace[0] = function;
    
    /*
     * Search the tracing buffer for an existing stack trace matching the stack
     * trace from the current thread context. For now do a simple linear search.
     */
    for(start = 0, i = 0;
	(i < stacktrace_size) &&
	    ((start + i) < tls->data.stacktraces.stacktraces_len);
	++i)
	
	/* Do the i'th frames differ? */
	if(stacktrace[i] != tls->buffer.stacktraces[start + i]) {
	    
	    /* Advance in the tracing buffer to the end of this stack trace */
	    for(start += i;
		(tls->buffer.stacktraces[start] != 0) &&
		    (start < tls->data.stacktraces.stacktraces_len);
		++start);
	    
	    /* Advance in the tracing buffer past the terminating zero */
	    ++start;
	    
	    /* Begin comparing at the zero'th frame again */
	    i = 0;
	    
	}
    
    /* Did we find a match in the tracing buffer? */
    if(i == stacktrace_size)
	entry = start;
    
    /* Otherwise add this stack trace to the tracing buffer */
    else {
	
	/* Send events if there is insufficient room for this stack trace */
	if((tls->data.stacktraces.stacktraces_len + stacktrace_size + 1) >=
	   StackTraceBufferSize) {
#ifndef NDEBUG
	if (IsCollectorDebugEnabled) {
	    fprintf(stderr,"[%ld,%d] StackTraceBufferSize FULL. send samples\n",tls->header.pid, tls->header.omp_tid);
	}
#endif
	    send_samples(tls);
	}
	
	/* Add each frame in the stack trace to the tracing buffer. */	
	entry = tls->data.stacktraces.stacktraces_len;
	for(i = 0; i < stacktrace_size; ++i) {
	    
	    /* Add the i'th frame to the tracing buffer */
	    tls->buffer.stacktraces[entry + i] = stacktrace[i];
	    
	    /* Update the address interval in the data blob's header */
	    if(stacktrace[i] < tls->header.addr_begin)
		tls->header.addr_begin = stacktrace[i];
	    if(stacktrace[i] > tls->header.addr_end)
		tls->header.addr_end = stacktrace[i];
	    
	}
	
	/* Add a terminating zero frame to the tracing buffer */
	tls->buffer.stacktraces[entry + stacktrace_size] = 0;
	
	/* Set the new size of the tracing buffer */
	tls->data.stacktraces.stacktraces_len += (stacktrace_size + 1);
	
    }
    
    /* Add a new entry for this event to the tracing buffer. */
#if defined(EXTENDEDTRACE)
    memcpy(&(tls->buffer.events[tls->data.events.events_len]),
	   event, sizeof(CBTF_mpit_event));
#else
    memcpy(&(tls->buffer.events[tls->data.events.events_len]),
	   event, sizeof(CBTF_mpi_event));
#endif
    tls->buffer.events[tls->data.events.events_len].stacktrace = entry;
    tls->data.events.events_len++;
    
    /* Send events if the tracing buffer is now filled with events */
    if(tls->data.events.events_len == EventBufferSize) {
#ifndef NDEBUG
	if (IsCollectorDebugEnabled) {
	    fprintf(stderr,"[%ld,%d] EventBufferSize FULL. send samples\n",tls->header.pid, tls->header.omp_tid);
	}
#endif
	send_samples(tls);
    }
#endif

    tls->do_trace = TRUE;
}