Example #1
0
/*
 * given a spool loaded by successive calls to _bt_spool,
 * create an entire btree.
 */
void
_bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
{
	BTWriteState wstate;

#ifdef BTREE_BUILD_STATS
	if (log_btree_build_stats)
	{
		ShowUsage("BTREE BUILD (Spool) STATISTICS");
		ResetUsage();
	}
#endif   /* BTREE_BUILD_STATS */

	tuplesort_performsort(btspool->sortstate);
	if (btspool2)
		tuplesort_performsort(btspool2->sortstate);

	wstate.heap = btspool->heap;
	wstate.index = btspool->index;

	/*
	 * We need to log index creation in WAL iff WAL archiving/streaming is
	 * enabled UNLESS the index isn't WAL-logged anyway.
	 */
	wstate.btws_use_wal = XLogIsNeeded() && RelationNeedsWAL(wstate.index);

	/* reserve the metapage */
	wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
	wstate.btws_pages_written = 0;
	wstate.btws_zeropage = NULL;	/* until needed */

	_bt_load(&wstate, btspool, btspool2);
}
Example #2
0
/*
 * given a spool loaded by successive calls to _bt_spool,
 * create an entire btree.
 */
void
_bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
{
	BTWriteState wstate;

#ifdef BTREE_BUILD_STATS
	if (log_btree_build_stats)
	{
		ShowUsage("BTREE BUILD (Spool) STATISTICS");
		ResetUsage();
	}
#endif   /* BTREE_BUILD_STATS */

	tuplesort_performsort(btspool->sortstate);
	if (btspool2)
		tuplesort_performsort(btspool2->sortstate);

	wstate.index = btspool->index;

	/*
	 * We need to log index creation in WAL iff WAL archiving/streaming is
	 * enabled AND it's not a temp index.
	 */
	wstate.btws_use_wal = XLogIsNeeded() && !wstate.index->rd_istemp;

	/*
	 * Write an XLOG UNLOGGED record if WAL-logging was skipped because WAL
	 * archiving is not enabled.
	 */
	if (!wstate.btws_use_wal && !wstate.index->rd_istemp)
	{
		char		reason[NAMEDATALEN + 20];

		snprintf(reason, sizeof(reason), "b-tree build on \"%s\"",
				 RelationGetRelationName(wstate.index));
		XLogReportUnloggedStatement(reason);
	}

	/* reserve the metapage */
	wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
	wstate.btws_pages_written = 0;
	wstate.btws_zeropage = NULL;	/* until needed */

	_bt_load(&wstate, btspool, btspool2);
}
Example #3
0
File: nbtsort.c Project: huor/gpdb
/*
 * given a spool loaded by successive calls to _bt_spool,
 * create an entire btree.
 */
void
_bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
{
	BTWriteState wstate;

#ifdef BTREE_BUILD_STATS
	if (log_btree_build_stats)
	{
		ShowUsage("BTREE BUILD (Spool) STATISTICS");
		ResetUsage();
	}
#endif   /* BTREE_BUILD_STATS */

	if(gp_enable_mk_sort)
	{
		tuplesort_performsort_mk((Tuplesortstate_mk *) btspool->sortstate);
		if (btspool2)
			tuplesort_performsort_mk((Tuplesortstate_mk *) btspool2->sortstate);
	}
	else
	{
		tuplesort_performsort((Tuplesortstate *) btspool->sortstate);
		if (btspool2)
			tuplesort_performsort((Tuplesortstate *) btspool2->sortstate);
	}


	wstate.index = btspool->index;

	/*
	 * We need to log index creation in WAL iff WAL archiving is enabled AND
	 * it's not a temp index.
	 */
	wstate.btws_use_wal = !XLog_UnconvertedCanBypassWal() && !wstate.index->rd_istemp;

	/* reserve the metapage */
	wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
	wstate.btws_pages_written = 0;
	wstate.btws_zeropage = NULL;	/* until needed */

	_bt_load(&wstate, btspool, btspool2);
}
Example #4
0
/*
 *	btbuild() -- build a new btree index.
 */
IndexBuildResult *
btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
{
	IndexBuildResult *result;
	double		reltuples;
	BTBuildState buildstate;

	buildstate.isUnique = indexInfo->ii_Unique;
	buildstate.haveDead = false;
	buildstate.heapRel = heap;
	buildstate.spool = NULL;
	buildstate.spool2 = NULL;
	buildstate.indtuples = 0;

#ifdef BTREE_BUILD_STATS
	if (log_btree_build_stats)
		ResetUsage();
#endif							/* BTREE_BUILD_STATS */

	/*
	 * We expect to be called exactly once for any index relation. If that's
	 * not the case, big trouble's what we have.
	 */
	if (RelationGetNumberOfBlocks(index) != 0)
		elog(ERROR, "index \"%s\" already contains data",
			 RelationGetRelationName(index));

	buildstate.spool = _bt_spoolinit(heap, index, indexInfo->ii_Unique, false);

	/*
	 * If building a unique index, put dead tuples in a second spool to keep
	 * them out of the uniqueness check.
	 */
	if (indexInfo->ii_Unique)
		buildstate.spool2 = _bt_spoolinit(heap, index, false, true);

	/* do the heap scan */
	reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
								   btbuildCallback, (void *) &buildstate);

	/* okay, all heap tuples are indexed */
	if (buildstate.spool2 && !buildstate.haveDead)
	{
		/* spool2 turns out to be unnecessary */
		_bt_spooldestroy(buildstate.spool2);
		buildstate.spool2 = NULL;
	}

	/*
	 * Finish the build by (1) completing the sort of the spool file, (2)
	 * inserting the sorted tuples into btree pages and (3) building the upper
	 * levels.
	 */
	_bt_leafbuild(buildstate.spool, buildstate.spool2);
	_bt_spooldestroy(buildstate.spool);
	if (buildstate.spool2)
		_bt_spooldestroy(buildstate.spool2);

#ifdef BTREE_BUILD_STATS
	if (log_btree_build_stats)
	{
		ShowUsage("BTREE BUILD STATS");
		ResetUsage();
	}
#endif							/* BTREE_BUILD_STATS */

	/*
	 * Return statistics
	 */
	result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));

	result->heap_tuples = reltuples;
	result->index_tuples = buildstate.indtuples;

	return result;
}
Example #5
0
File: nbtree.c Project: LJoNe/gpdb
/*
 *	btbuild() -- build a new btree index.
 */
Datum
btbuild(PG_FUNCTION_ARGS)
{
	MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_DECLARE;

	Relation	heap = (Relation) PG_GETARG_POINTER(0);
	Relation	index = (Relation) PG_GETARG_POINTER(1);
	IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
	IndexBuildResult *result;
	double		reltuples;
	BTBuildState buildstate;

	MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_ENTER;

	buildstate.isUnique = indexInfo->ii_Unique;
	buildstate.haveDead = false;
	buildstate.heapRel = heap;
	buildstate.spool = NULL;
	buildstate.spool2 = NULL;
	buildstate.indtuples = 0;

#ifdef BTREE_BUILD_STATS
	if (log_btree_build_stats)
		ResetUsage();
#endif   /* BTREE_BUILD_STATS */

	/*
	 * We expect to be called exactly once for any index relation. If that's
	 * not the case, big trouble's what we have.
	 */
	if (RelationGetNumberOfBlocks(index) != 0)
		elog(ERROR, "index \"%s\" already contains data",
			 RelationGetRelationName(index));

	PG_TRY();
	{
		buildstate.spool = _bt_spoolinit(index, indexInfo->ii_Unique, false);

		/*
		 * If building a unique index, put dead tuples in a second spool to keep
		 * them out of the uniqueness check.
		 */
		if (indexInfo->ii_Unique)
			buildstate.spool2 = _bt_spoolinit(index, false, true);

		/* do the heap scan */
		reltuples = IndexBuildScan(heap, index, indexInfo, false,
				btbuildCallback, (void *) &buildstate);

		/* okay, all heap tuples are indexed */
		if (buildstate.spool2 && !buildstate.haveDead)
		{
			/* spool2 turns out to be unnecessary */
			_bt_spooldestroy(buildstate.spool2);
			buildstate.spool2 = NULL;
		}

		/*
		 * Finish the build by (1) completing the sort of the spool file, (2)
		 * inserting the sorted tuples into btree pages and (3) building the upper
		 * levels.
		 */
		_bt_leafbuild(buildstate.spool, buildstate.spool2);
		_bt_spooldestroy(buildstate.spool);
		buildstate.spool = NULL;

		if (buildstate.spool2)
		{
			_bt_spooldestroy(buildstate.spool2);
			buildstate.spool2 = NULL;
		}
	}
	PG_CATCH();
	{
		/* Clean up the sort state on error */
		if (buildstate.spool)
		{
			_bt_spooldestroy(buildstate.spool);
			buildstate.spool = NULL;
		}

		if (buildstate.spool2)
		{
			_bt_spooldestroy(buildstate.spool2);
			buildstate.spool2 = NULL;
		}

		PG_RE_THROW();
	}
	PG_END_TRY();

#ifdef BTREE_BUILD_STATS
	if (log_btree_build_stats)
	{
		ShowUsage("BTREE BUILD STATS");
		ResetUsage();
	}
#endif   /* BTREE_BUILD_STATS */

	/*
	 * If we are reindexing a pre-existing index, it is critical to send out a
	 * relcache invalidation SI message to ensure all backends re-read the
	 * index metapage.	We expect that the caller will ensure that happens
	 * (typically as a side effect of updating index stats, but it must happen
	 * even if the stats don't change!)
	 */

	/*
	 * Return statistics
	 */
	result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));

	result->heap_tuples = reltuples;
	result->index_tuples = buildstate.indtuples;

	MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_EXIT;

	PG_RETURN_POINTER(result);
}
Example #6
0
int q_sig_search(int g_m) {
  int i, j, k;
  double time;
  int m_num;
  //char can_name_buffer[256];
  //char avr_can_name_buffer[256];
  char match_name_buffer[256];
  double temp0, temp1, switch_point, proba;
  double beta[MAX_PARTI];
  //char can_index_name_buffer[256];
  //char avr_can1_name_buffer[256];
  //sprintf(can_name_buffer, "./can_stat_m=%d_q=%d_o=%d.txt", g_m, g_q, order_num);
  //sprintf(can_index_name_buffer, "./index_stat_m=%d_q=%d_o=%d_can.txt", g_m, g_q, order_num);
  sprintf(match_name_buffer, "./match_m=%d_q=%d_o=%d.txt", g_m, g_q, order_num);

  //init power_hashmap
  power_hashmap = init_power_hashmap(N, power_hashmap);

  //init bitmap mask
  for (i = 0; i < U_INT_LEN / 2; i++) {
    mask[i] = 3 << (i << 1);
  }


  /*
    if (g_element_random_order == 1) {
    order_num = 0;
    }
   */
  // link data to stream
  FILE * fp_record;
  if ((fp_record = fopen(data_source, "rt+")) == NULL) {
    fprintf(stderr, "No Document has been found.\n");
    return EXIT_FAILURE;
  }
  // set a buffer to store combining elements as a sig

  // read data
  g_curr_record_num = read_all_documents(fp_record, g_records);
  fclose(fp_record);
  //calculate best Q based on m 
  g_q = calculate_Q(g_records[0].len, g_m);
  //fprintf(stdout, "partition length is %d\n", g_q);
  //if (g_q <= 15){
  //collapse_filtering = 0;
  //}
  //sig_str_buffer = (char *) malloc(sizeof (char) * 2 * g_q * MAX_ELEMENT_LEN);

  if (g_curr_record_num < 0) {
    fprintf(stderr, "Error: Read data error\n");
    return EXIT_FAILURE;
  }

  // init the hashtable
  init_element_index(MAX_ELEMENT_NUM);
  // create space for elements
  for (i = 0; i < g_curr_record_num; i++) {
    if ((g_records[i].element_slots = (element_slot_t *) malloc(sizeof (element_slot_t) * g_records[i].len)) == NULL) {
      fprintf(stderr, "ERROR: Out of memory\n");
      exit(-1);
    }
    // build the element list
    build_record_elements(g_records[i].str, g_records[i].len, g_records[i].element_slots);
  }

  //random the frq to random the element order
  //if (g_element_random_order == 1) {
  if (bit_fv) {
    hash_element_in_dimension(_element_block_head);
    bitwise_record(g_records, g_curr_record_num);
  }

  free(_buffer);
  //sort_all_element_list_by_freq(g_records, g_curr_record_num);
  //}

  // initiate index
  /*
    if(collapse_filtering){
    rand_bucket_num(_element_block_head);
    //fprintf(stdout,"%u\t%u\t%u\t%u\n", g_records[0].element_slots[0].element->bucket_num, g_records[0].element_slots[1].element->bucket_num, 
    //g_records[0].element_slots[2].element->bucket_num, g_records[0].element_slots[3].element->bucket_num );
    rec_bucket_init(g_records, g_curr_record_num);
    //fprintf(stdout,"%u\t%u\t%u\t%u\n", g_records[0].bucket[0], g_records[0].bucket[1], 
    //g_records[0].bucket[3], g_records[0].bucket[4]);
    }
   */

  init_sig_index(MAX_SIG_NUM);
  // create data sigs for each record from their elements
  for (i = 0; i < g_curr_record_num; i++) {

    //calculate the number of sig_prefix for each record
    g_records[i].sig_num = calculate_sig_num(&g_records[i], g_m);
    //set space for those sigs
    if ((g_records[i].sig_slots = (sig_slot *) malloc(sizeof (sig_slot) * g_records[i].sig_num)) == NULL) {
      fprintf(stderr, "ERROR: Out of memory\n");
      exit(-1);
    }
    //create sigs
    build_sigs(&g_records[i], g_q, g_m);
  }


  //random the frq to random the sig order
  //if (g_sig_random_order == 1) {
  //  random_sig_frq(_sig_block_head);
  //}
  //sort those sigs by idf
  //sort_all_sig_list_by_freq(g_records, g_curr_record_num);
  // build index
  build_sig_idf_list(g_records, g_curr_record_num, g_m, g_q);

  fprintf(stdout, "The number of documents:%d\n", g_curr_record_num);
  printf("\n");
  // print out the average length of the documents in data source
  int sum = 0;
  int avg_len = 0;
  for (i = 0; i < g_curr_record_num; i++) {
    sum += g_records[i].len;
  }
  avg_len = sum / g_curr_record_num;
  fprintf(stdout, "The average length of these documents is:%d\n", avg_len);
  fprintf(stdout, "\n");
  //show the information in the index
  fprintf(stdout, "The number of different elements is %d\n", g_element_num);
  fprintf(stdout, "The number of different sigs is %d\n", g_sig_num);


  //search part

#ifndef DEBUG_INDEX
  //output result

  if ((fp_query_match = fopen(match_name_buffer, "w+")) == NULL) {
    fprintf(stderr, "Error: create file error\n");
    return EXIT_FAILURE;
  }
  //output candidate status
  /*
    if ((fp_query_cand = fopen(can_name_buffer, "w+")) == NULL) {
      fprintf(stderr, "Document creating error.\n");
      return EXIT_FAILURE;
    }

    if ((fp_query_cand_index = fopen(can_index_name_buffer, "w+")) == NULL) {
      fprintf(stderr, "Document creating error.\n");
      return EXIT_FAILURE;
    }
   */
  //FILE * fp_query_stat;
  //if ((fp_query_stat = fopen("/Users/xyzhang/Desktop/q_sig/search_stat.txt", "w+")) == NULL) {
  //    fprintf(stderr, "Document creating error.\n");
  //    return EXIT_FAILURE;
  //}

  // data query
  FILE * fp_query;
  if ((fp_query = fopen(query_source, "rt+")) == NULL) {
    fprintf(stderr, "No Document has been found.\n");
    return EXIT_FAILURE;
  }

  //read query data
  g_curr_query_num = read_all_queries(fp_query, g_query);
  fclose(fp_query);
  //reset timer


  if (g_curr_query_num < 0) {
    fprintf(stderr, "Error: Read query error\n");
    return EXIT_FAILURE;
  }

  double average_can0, average_can1, average_can2, average_can3, average_can4, average_can5, average_esti_can1;
  average_can0 = 0;
  average_can1 = 0;
  average_can2 = 0;
  average_can3 = 0;
  average_can4 = 0;
  average_can5 = 0;
  //average_esti_can1 = 0;

  for (i = 0; i < g_curr_query_num; i++) {
    //for (i = 63; i<64 ;i++){
    //init_query_element_head();
    //set space for query elements
    if ((g_query[i].element_slots = (element_slot_t *) malloc(sizeof (element_slot_t) * g_query[i].len)) == NULL) {
      fprintf(stderr, "ERROR: Out of memory\n");
      exit(-1);
    }
    //create query elements
    build_query_elements(g_query[i].str, g_query[i].len, g_query[i].element_slots);

    if (bit_fv) {
      bitwise_query(g_query, i);
    }
    //set order by dimension

    /*
        if(collapse_filtering){
        //init_query_sig_head();
        que_bucket_init(g_query, i);
         }
     */
  }

  ResetUsage();
  mytimer preptimer;
  preptimer.timesum = 0;
  mytimer probetimer;
  probetimer.timesum = 0;

  init_search(g_curr_record_num);
  
  
  
  for (i = 0; i < g_curr_query_num; i++) {
#ifdef DEBUG_OUTPUT
    can0 = 0;
    can1 = 0;
    can2 = 0;
    can3 = 0;
    can4 = 0;
    can5 = 0;
#endif
    
  proba = 0;
  temp0 = 1;
  temp1 = 0;
    //calculate the number of sig_prefix for each query
    g_query[i].sig_num = calculate_query_sig_num(&g_query[i], g_m);

    if ((g_query[i].sig_slots = (sig_slot *) malloc(sizeof (sig_slot) * g_query[i].sig_num)) == NULL) {
      fprintf(stderr, "ERROR: Out of memory\n");
      exit(-1);
    }
    
    //build query sigs
    build_query_sigs(&(g_query[i]), g_q, g_m);

    k = 0;
     for (j = 0; j < (N- g_m +3)/2; j++) {
        //idf[k]= g_query[i].sig_slots[j].sig->last_idf;
        can0 += partition_can0[j];
        beta[k] = (double)(partition_can0[j] - (partition_len[j] - 1) * partition_exact[j] )/ (double)g_curr_record_num;
        temp0 *= (1 - beta[k]);
        k++;
    }
    
     for (j = 0; j < k; j++) {
       //fprintf(stderr, "%e %e\n", temp0, beta[j]);
        proba = (temp0 / (1 - beta[j])) * beta[j];
        //fprintf(stderr, "%e\n", proba);
        temp1 += proba;
    }
   
   switch_point = (double)can0 / ((double)g_curr_record_num * (temp0 + temp1));
   //average_esti_can1 += ((double)g_curr_record_num * ((double)1 - temp0 - temp1));

    if ((N - g_m) % 2 == 0) {
      if ( switch_point > alpha_even) {
        StartTimer(&probetimer);
        can1 = g_curr_record_num;
        for (j = 0; j < g_curr_record_num; j++) {
          m_num = bitwise_check_line_based(&(g_query[i]), &(g_records[j]), g_m);
          if (m_num >= g_m) {
            can4++;
          }
        }
        PauseTImer(&probetimer);
        can0 = g_curr_record_num;
      } else {
        StartTimer(&probetimer);
        search_in_index(g_query, i, g_records, g_curr_record_num, g_m, g_q);
        PauseTImer(&probetimer);
      }
    } else {
      if ( switch_point > alpha_odd) {
        can1 = g_curr_record_num;
        StartTimer(&probetimer);
        for (j = 0; j < g_curr_record_num; j++) {
          m_num = bitwise_check_line_based(&(g_query[i]), &(g_records[j]), g_m);
          if (m_num >= g_m) {
            can4++;
          }
        }
        PauseTImer(&probetimer);
        can0 = g_curr_record_num;
      } else {
        StartTimer(&probetimer);
        search_in_index_odd(g_query, i, g_records, g_curr_record_num, g_m, g_q);
        PauseTImer(&probetimer);
      }
    }
  
    //free malloc space
    //free(g_query[i].element_slots);
    //free(g_query[i].sig_slots);
#ifdef DEBUG_OUTPUT
    average_can0 += can0;
    average_can1 += can1;
    average_can2 += can2;
    average_can3 += can3;
    average_can4 += can4;
    average_can5 += can5;
    //average_m_num += avg_m_num;
#endif
  }
  //  fprintf(fp_query_stat,"query[%d]\t%d\t%d\n", i, g_query[i].can0, g_query[i].pair_num);
  //}

  /*
  #ifdef dump_one_query
    char order_buffer[256];
    sprintf(order_buffer, "./order_m=%d_q=%d_o=%d.txt", g_m, g_q, order_num);
    if ((fp_output_order = fopen(order_buffer, "w+")) == NULL) {
      fprintf(stderr, "Error: create file error\n");
      return EXIT_FAILURE;
    }
    for (i =0; i<order_num;i++){
    dump_element_random_order(_element_block_head, g_m, g_q, i);
    }
  #endif
   */

  //#ifdef DEBUG_OUTPUT
  time = ShowUsage();
  fprintf(stdout, "Usage: %s\n", __usage_information);

  average_can0 /= g_curr_query_num;
  average_can1 /= g_curr_query_num;
  average_can2 /= g_curr_query_num;
  average_can3 /= g_curr_query_num;
  average_can4 /= g_curr_query_num;
  average_can5 /= g_curr_query_num;
  //average_esti_can1 /= g_curr_query_num;

  /*
          FILE * fp_avr_cand1_num;
      if ((fp_avr_cand1_num = fopen(avr_can1_name_buffer, "rt+")) == NULL) {
        fprintf(stderr, "No Document has been found.\n");
        return EXIT_FAILURE;
      }
   */

  //print out the query's name
  int p;
  char * data;
  data = query_source;
  p = strlen(data) - 1;
  while (data[p] != '/') {
    p--;
  }
  p++;
  while (data[p] != '\0' && data[p] != '.') {
    fprintf(stderr, "%c", data[p]);
    p++;
  }
  fprintf(stderr, " ");

  fprintf(stderr, "m %d partition_len %d ", g_m, g_q);
  fprintf(stderr, "time %.6f can0 %f ", time, average_can0);
  fprintf(stderr, "can1 %f ", average_can1);
  //fprintf(stderr, "esti_can1 %f ", average_esti_can1);
  fprintf(stderr, "can2 %f ", average_can2);
  fprintf(stderr, "can3 %f ", average_can3);
  //fprintf(stderr, "can4 %Lf ", average_can5);
  fprintf(stderr, "can4 %f bitnum: %d\n", average_can4, bit_per_d);
  //fprintf(stderr, "avg_m_num_in_pfx\t%Lf\n", average_m_num);
  //dump useful order
  //dump_element_order(_element_block_head, g_m, g_q);
  //dump_sig_order(_sig_block_head, g_m, g_q);
  //dump_sig_order_in_prefix(_sig_block_head, g_m, g_q);

  //free space
  /*
    destroy_element_index();
    destroy_sig_index();
    for (i = 0; i < g_curr_record_num; i++) {
      free(g_records[i].element_slots);
      free(g_records[i].sig_slots);
    }

    //destroy_query_element_blocks();
    //destroy_query_sig_blocks();

    free(sig_str_buffer);
    free(_buffer);
   */
  //#endif
#endif 
  return EXIT_SUCCESS;
}
Example #7
0
/*
 * cdbdisp_dispatchCommand:
 * Send the strCommand SQL statement to all segdbs in the cluster
 * cancelOnError indicates whether an error
 * occurring on one of the qExec segdbs should cause all still-executing commands to cancel
 * on other qExecs. Normally this would be true.  The commands are sent over the libpq
 * connections that were established during gang creation.	They are run inside of threads.
 * The number of segdbs handled by any one thread is determined by the
 * guc variable gp_connections_per_thread.
 *
 * The CdbDispatchResults objects allocated for the command
 * are returned in *pPrimaryResults
 * The caller, after calling CdbCheckDispatchResult(), can
 * examine the CdbDispatchResults objects, can keep them as
 * long as needed, and ultimately must free them with
 * cdbdisp_destroyDispatcherState() prior to deallocation
 * of the memory context from which they were allocated.
 *
 * NB: Callers should use PG_TRY()/PG_CATCH() if needed to make
 * certain that the CdbDispatchResults objects are destroyed by
 * cdbdisp_destroyDispatcherState() in case of error.
 * To wait for completion, check for errors, and clean up, it is
 * suggested that the caller use cdbdisp_finishCommand().
 */
void
cdbdisp_dispatchCommand(const char *strCommand,
						char *serializedQuerytree,
						int serializedQuerytreelen,
						bool cancelOnError,
						bool needTwoPhase,
						bool withSnapshot, CdbDispatcherState * ds)
{
	DispatchCommandQueryParms queryParms;
	Gang *primaryGang;
	int	nsegdb = getgpsegmentCount();
	CdbComponentDatabaseInfo *qdinfo;

	if (log_dispatch_stats)
		ResetUsage();

	if (DEBUG5 >= log_min_messages)
		elog(DEBUG3, "cdbdisp_dispatchCommand: %s (needTwoPhase = %s)",
			 strCommand, (needTwoPhase ? "true" : "false"));
	else
		elog((Debug_print_full_dtm ? LOG : DEBUG3),
			 "cdbdisp_dispatchCommand: %.50s (needTwoPhase = %s)", strCommand,
			 (needTwoPhase ? "true" : "false"));

	MemSet(&queryParms, 0, sizeof(queryParms));
	queryParms.strCommand = strCommand;
	queryParms.serializedQuerytree = serializedQuerytree;
	queryParms.serializedQuerytreelen = serializedQuerytreelen;

	/*
	 * Allocate a primary QE for every available segDB in the system.
	 */
	primaryGang = allocateWriterGang();

	Assert(primaryGang);

	/*
	 * Serialize a version of our DTX Context Info
	 */
	queryParms.serializedDtxContextInfo =
		qdSerializeDtxContextInfo(&queryParms.serializedDtxContextInfolen,
								  withSnapshot, false,
								  mppTxnOptions(needTwoPhase),
								  "cdbdisp_dispatchCommand");

	/*
	 * sequence server info
	 */
	qdinfo = &(getComponentDatabases()->entry_db_info[0]);
	Assert(qdinfo != NULL && qdinfo->hostip != NULL);
	queryParms.seqServerHost = pstrdup(qdinfo->hostip);
	queryParms.seqServerHostlen = strlen(qdinfo->hostip) + 1;
	queryParms.seqServerPort = seqServerCtl->seqServerPort;

	/*
	 * Dispatch the command.
	 */
	ds->primaryResults = NULL;
	ds->dispatchThreads = NULL;
	cdbdisp_makeDispatcherState(ds, nsegdb, 0, cancelOnError);
	cdbdisp_queryParmsInit(ds, &queryParms);
	ds->primaryResults->writer_gang = primaryGang;

	cdbdisp_dispatchToGang(ds, primaryGang, -1, DEFAULT_DISP_DIRECT);

	/*
	 * don't pfree serializedShapshot here, it will be pfree'd when
	 * the first thread is destroyed.
	 */
}