예제 #1
0
SENNA_CHK *SENNA_CHK_new(const char *path, const char *subpath) {
  SENNA_CHK *chk = SENNA_malloc(sizeof(SENNA_CHK), 1);
  FILE *f;
  float dummy;

  memset(chk, 0, sizeof(SENNA_CHK));

  f = SENNA_fopen(path, subpath, "rb");

  SENNA_fread(&chk->window_size, sizeof(int), 1, f);
  SENNA_fread_tensor_2d(&chk->ll_word_weight, &chk->ll_word_size,
                        &chk->ll_word_max_idx, f);
  SENNA_fread_tensor_2d(&chk->ll_caps_weight, &chk->ll_caps_size,
                        &chk->ll_caps_max_idx, f);
  SENNA_fread_tensor_2d(&chk->ll_posl_weight, &chk->ll_posl_size,
                        &chk->ll_posl_max_idx, f);
  SENNA_fread_tensor_2d(&chk->l1_weight, &chk->input_state_size,
                        &chk->hidden_state_size, f);
  SENNA_fread_tensor_1d(&chk->l1_bias, &chk->hidden_state_size, f);
  SENNA_fread_tensor_2d(&chk->l2_weight, &chk->hidden_state_size,
                        &chk->output_state_size, f);
  SENNA_fread_tensor_1d(&chk->l2_bias, &chk->output_state_size, f);
  SENNA_fread_tensor_1d(&chk->viterbi_score_init, &chk->output_state_size, f);
  SENNA_fread_tensor_2d(&chk->viterbi_score_trans, &chk->output_state_size,
                        &chk->output_state_size, f);

  SENNA_fread(&chk->ll_word_padding_idx, sizeof(int), 1, f);
  SENNA_fread(&chk->ll_caps_padding_idx, sizeof(int), 1, f);
  SENNA_fread(&chk->ll_posl_padding_idx, sizeof(int), 1, f);

  SENNA_fread(&dummy, sizeof(float), 1, f);
  SENNA_fclose(f);

  if ((int)dummy != 777)
    SENNA_error("chk: data corrupted (or not IEEE floating computer)");

  chk->input_state = NULL;
  chk->hidden_state = SENNA_malloc(sizeof(float), chk->hidden_state_size);
  chk->output_state = NULL;
  chk->labels = NULL;

  /* some info if you want verbose */
  SENNA_message("chk: window size: %d", chk->window_size);
  SENNA_message("chk: vector size in word lookup table: %d", chk->ll_word_size);
  SENNA_message("chk: word lookup table size: %d", chk->ll_word_max_idx);
  SENNA_message("chk: vector size in caps lookup table: %d", chk->ll_caps_size);
  SENNA_message("chk: caps lookup table size: %d", chk->ll_caps_max_idx);
  SENNA_message("chk: vector size in pos lookup table: %d", chk->ll_posl_size);
  SENNA_message("chk: pos lookup table size: %d", chk->ll_posl_max_idx);
  SENNA_message("chk: number of hidden units: %d", chk->hidden_state_size);
  SENNA_message("chk: number of classes: %d", chk->output_state_size);

  return chk;
}
예제 #2
0
파일: SENNA_nn.cpp 프로젝트: Averroes/djinn
void SENNA_nn_viterbi(int *path, float *init, float *transition,
                      float *emission, int N, int T) {
  float *delta, *deltap;
  int *phi;
  int i, j, t;

  /* misc allocations */
  delta = SENNA_malloc(sizeof(float), N);
  deltap = SENNA_malloc(sizeof(float), N);
  phi = SENNA_malloc(sizeof(float), N * T);

  /* init */
  for (i = 0; i < N; i++) deltap[i] = init[i] + emission[i];

  /* recursion */
  for (t = 1; t < T; t++) {
    float *deltan = delta;
    for (j = 0; j < N; j++) {
      float maxValue = -FLT_MAX;
      int maxIndex = 0;
      for (i = 0; i < N; i++) {
        float z = deltap[i] + transition[i + j * N];
        if (z > maxValue) {
          maxValue = z;
          maxIndex = i;
        }
      }
      delta[j] = maxValue + emission[j + t * N];
      phi[j + t * N] = maxIndex;
    }
    delta = deltap;
    deltap = deltan;
  }

  {
    float maxValue = -FLT_MAX;
    int maxIndex = 0;
    for (j = 0; j < N; j++) {
      if (deltap[j] > maxValue) {
        maxValue = deltap[j];
        maxIndex = j;
      }
    }
    path[T - 1] = maxIndex;
  }

  for (t = T - 2; t >= 0; t--) path[t] = phi[path[t + 1] + (t + 1) * N];

  SENNA_free(delta);
  SENNA_free(deltap);
  SENNA_free(phi);
}
예제 #3
0
파일: SENNA_Hash.c 프로젝트: Aureliu/senna
SENNA_Hash* SENNA_Hash_new(const char *path, const char *filename)
{
  FILE *f;
  SENNA_Hash *hash;
  char **keys = NULL;
  int n_keys;
  char key[MAX_KEY_SIZE];		//@AureDi temporary array of key, which stores the characters from filenames.
  int i;

  SENNA_message("loading hash: %s%s", (path ? path : ""), (filename ? filename : ""));

  //@AureDi Count the number of  key (maximum is 256 characters)
  f = SENNA_fopen(path, filename, "rt"); /* the t is to comply with Windows */
  n_keys = 0;
  while(fgets(key, MAX_KEY_SIZE, f))
    n_keys++;			//@AureDi Remember reading how many times
  SENNA_fclose(f);
  keys = SENNA_malloc(n_keys, sizeof(char*));	//@AureDi 

  f = SENNA_fopen(path, filename, "rt"); /* the t is to comply with Windows */
  n_keys = 0;
  while(fgets(key, MAX_KEY_SIZE, f))
  {
    int key_size = strlen(key);
	//@ We can assume that the length of each line is not exceeded 255.
    key[key_size-1] = '\0'; /* discard the newline */	//@AureDi \0 is the sign of the end. Because this method is applied to process single word, so we can assume the end word is newline character .
    keys[n_keys] = SENNA_malloc(key_size, sizeof(char));
    strcpy(keys[n_keys], key);
    n_keys++;
  }
  SENNA_fclose(f);

  hash = SENNA_malloc(sizeof(SENNA_Hash), 1);
  hash->keys = keys;	//@AureDi keys is the pointer of second array.
  hash->size = n_keys;	//@AureDi n_keys is the length of second aaay
  hash->is_admissible_key = NULL;	//@AUreDi admissible

  /* sorted or unsorted hash ? */
  /* (unsorted cannot return an index for a key) */
  hash->is_sorted = 1;		// uper is roght
  for(i = 0; i < n_keys-1; i++)
  {
    if(strcmp(keys[i], keys[i+1]) >= 0)
    {
      hash->is_sorted = 0;
      break;
    }
  }

  return hash;
}
예제 #4
0
FILE *SENNA_fopen(const char *path, const char *subpath, const char *mode)
{
    FILE *f;
    char *complete_path = NULL;

    if(!path && !subpath)
        SENNA_error("SENNA_fopen: path or subpath should be non NULL");

    if(path && subpath)
    {
        int pathsize = strlen(path);
        int subpathsize = strlen(subpath);
        complete_path = SENNA_malloc(sizeof(char), pathsize+subpathsize+1);
        strcpy(complete_path, path);
        strcpy(complete_path+pathsize, subpath);
    }

    f = fopen((complete_path ? complete_path : (path ? path : subpath)), mode);
    if(!f)
        SENNA_error("unable to open file <%s%s>", (path ? path : ""), (subpath ? subpath : ""));

    if(sizeof(char) != 1)
        SENNA_error("char size is not 1, sorry can't load binary files");

    if(sizeof(int) != 4)
        SENNA_error("int size is not 4, sorry can't load binary files");

    if(sizeof(float) != 4)
        SENNA_error("float size is not 1, sorry can't load binary files");

    SENNA_free(complete_path);
    return f;
}
예제 #5
0
static void buffer_reverse_memory(void *ptr_, int block_size, int n_blocks)
{
    char *ptr;
    char *ptrr;
    char *ptrw;
    int i, j;
    char *buffer_block;

    if(block_size == 1)
        return;

    ptr = (char *)ptr_;
    buffer_block = SENNA_malloc(sizeof(char), block_size);

    for(i = 0; i < n_blocks; i++)
    {
        ptrr = ptr + ((i+1)*block_size);
        ptrw = buffer_block;

        for(j = 0; j < block_size; j++)
        {
            ptrr--;
            *ptrw++ = *ptrr;
        }

        ptrr = buffer_block;
        ptrw = ptr + (i*block_size);
        for(j = 0; j < block_size; j++)
            *ptrw++ = *ptrr++;
    }

    SENNA_free(buffer_block);
}
예제 #6
0
void SENNA_fread_tensor_2d(float **ptr, int *n_row, int *n_column, FILE *stream)
{
    SENNA_fread(n_row, sizeof(int), 1, stream);
    SENNA_fread(n_column, sizeof(int), 1, stream);
    *ptr = SENNA_malloc(sizeof(float), (*n_row)*(*n_column));
    SENNA_fread(*ptr, sizeof(float), (*n_row)*(*n_column), stream);
}
예제 #7
0
SENNA_PSG* SENNA_PSG_new(const char *path, const char *subpath)
{
  SENNA_PSG *psg = SENNA_malloc(sizeof(SENNA_PSG), 1);
  FILE *f;
  float dummy;

  memset(psg, 0, sizeof(SENNA_PSG));

  f = SENNA_fopen(path, subpath, "rb");

  SENNA_fread_tensor_2d(&psg->ll_word_weight, &psg->ll_word_size, &psg->ll_word_max_idx, f);
  SENNA_fread_tensor_2d(&psg->ll_caps_weight, &psg->ll_caps_size, &psg->ll_caps_max_idx, f);
  SENNA_fread_tensor_2d(&psg->ll_posl_weight, &psg->ll_posl_size, &psg->ll_posl_max_idx, f);
  SENNA_fread_tensor_2d(&psg->ll_psgl_weight, &psg->ll_psgl_size, &psg->ll_psgl_max_idx, f);
  SENNA_fread_tensor_2d(&psg->l1_weight, &psg->input_state_size, &psg->l1_state_size, f);
  SENNA_fread_tensor_1d(&psg->l1_bias, &psg->l1_state_size, f);
  SENNA_fread_tensor_2d(&psg->l2_bias, &psg->l1_state_size, &psg->window_size, f);
  SENNA_fread_tensor_2d(&psg->l3_weight, &psg->l2_state_size, &psg->l3_state_size, f);
  SENNA_fread_tensor_1d(&psg->l3_bias, &psg->l3_state_size, f);
  SENNA_fread_tensor_2d(&psg->l4_weight, &psg->l3_state_size, &psg->l4_state_size, f);
  SENNA_fread_tensor_1d(&psg->l4_bias, &psg->l4_state_size, f);

  SENNA_fread_tensor_1d(&psg->viterbi_score_init, &psg->l4_state_size, f);
  SENNA_fread_tensor_2d(&psg->viterbi_score_trans, &psg->l4_state_size, &psg->l4_state_size, f);

  SENNA_fread(&psg->ll_word_padding_idx, sizeof(int), 1, f);
  SENNA_fread(&psg->ll_caps_padding_idx, sizeof(int), 1, f);
  SENNA_fread(&psg->ll_posl_padding_idx, sizeof(int), 1, f);
  SENNA_fread(&psg->ll_psgl_padding_idx, sizeof(int), 1, f);

  SENNA_fread(&dummy, sizeof(float), 1, f);
  SENNA_fclose(f);

  if((int)dummy != 777)
    SENNA_error("psg: data corrupted (or not IEEE floating computer)");

  psg->input_state = NULL;
  psg->l1_state = NULL;
  psg->l2_state = NULL;
  psg->l3_state = NULL;
  psg->l4_state = NULL;
  psg->labels = NULL;
  psg->treillis = SENNA_Treillis_new();

  return psg;
}
예제 #8
0
파일: SENNA_Hash.c 프로젝트: Aureliu/senna
SENNA_Hash *SENNA_Hash_new_with_admissible_keys(const char *path, const char *filename, const char *admissible_keys_filename)
{
  SENNA_Hash *hash = SENNA_Hash_new(path, filename);
  FILE *f;
  int admissiblekeyssize = 0;

  f = SENNA_fopen(path, admissible_keys_filename, "rb");	//@Aure b means that the file is binary file.
  SENNA_fseek(f, 0, SEEK_END);		//@Aure   #define SEEK_END    2    Reposition stream position indicator
  admissiblekeyssize = SENNA_ftell(f);	//@  Get current position in stream

  if(admissiblekeyssize != hash->size)
    SENNA_error("inconsistent hash and admissible key files");

  SENNA_fseek(f, 0, SEEK_SET);
  hash->is_admissible_key = SENNA_malloc(sizeof(char), admissiblekeyssize);
  SENNA_fread(hash->is_admissible_key, 1, admissiblekeyssize, f);		//@ Read block of data from stream to char array is_admissible_key.
  SENNA_fclose(f);

  return hash;
}
예제 #9
0
void SENNA_PSG_forward(SENNA_PSG *psg, const int *sentence_words, const int *sentence_caps, const int *sentence_posl, int sentence_size,
                       int **labels_, int *n_level_)
{
  int *sentence_psgl = SENNA_malloc(sizeof(int), sentence_size);
  int *sentence_segl = SENNA_malloc(sizeof(int), sentence_size);
  int *start_and_sentence_level_label = SENNA_malloc(sizeof(int), sentence_size+1);
  int t;
  int level;

  for(t = 0; t < sentence_size; t++)
  {
    sentence_psgl[t] = 0;
    sentence_segl[t] = 0;
  }

  psg->input_state = SENNA_realloc(psg->input_state, sizeof(float), sentence_size*psg->input_state_size);
  psg->l1_state = SENNA_realloc(psg->l1_state, sizeof(float), sentence_size*psg->l1_state_size);
  psg->l2_state = SENNA_realloc(psg->l2_state, sizeof(float), sentence_size*psg->l2_state_size);
  psg->l3_state = SENNA_realloc(psg->l3_state, sizeof(float), sentence_size*psg->l3_state_size);
  psg->l4_state = SENNA_realloc(psg->l4_state, sizeof(float), sentence_size*psg->l4_state_size);
  
  SENNA_nn_lookup(psg->input_state,                                                       psg->input_state_size, psg->ll_word_weight, psg->ll_word_size, psg->ll_word_max_idx, sentence_words, sentence_size, 0, 0);
  SENNA_nn_lookup(psg->input_state+psg->ll_word_size,                                     psg->input_state_size, psg->ll_caps_weight, psg->ll_caps_size, psg->ll_caps_max_idx, sentence_caps,  sentence_size, 0, 0);
  SENNA_nn_lookup(psg->input_state+psg->ll_word_size+psg->ll_caps_size,                   psg->input_state_size, psg->ll_posl_weight, psg->ll_posl_size, psg->ll_posl_max_idx, sentence_posl,  sentence_size, 0, 0);

  level = 0;
  while(1)
  {
    int all_tags_are_o;
    int all_in_one_segment;

    SENNA_nn_lookup(psg->input_state+psg->ll_word_size+psg->ll_caps_size+psg->ll_posl_size, psg->input_state_size, psg->ll_psgl_weight, psg->ll_psgl_size, psg->ll_psgl_max_idx, sentence_psgl,  sentence_size, 0, 0);
    
    SENNA_nn_temporal_convolution(psg->l1_state, psg->l1_state_size, psg->l1_weight, psg->l1_bias, psg->input_state, psg->input_state_size, sentence_size, 1);
    SENNA_nn_temporal_max_convolution(psg->l2_state, psg->l2_bias, psg->l1_state, psg->l1_state_size, sentence_size, psg->window_size);
    SENNA_nn_temporal_convolution(psg->l3_state, psg->l3_state_size, psg->l3_weight, psg->l3_bias, psg->l2_state, psg->l1_state_size, sentence_size, 1);
    SENNA_nn_hardtanh(psg->l3_state, psg->l3_state, psg->l3_state_size*sentence_size);
    SENNA_nn_temporal_convolution(psg->l4_state, psg->l4_state_size, psg->l4_weight, psg->l4_bias, psg->l3_state, psg->l3_state_size, sentence_size, 1);
    
    SENNA_Treillis_buildfromscorewithsegmentation(psg->treillis, psg->l4_state, psg->viterbi_score_init, psg->viterbi_score_trans, sentence_segl, psg->l4_state_size, sentence_size);    
    SENNA_Treillis_viterbi(psg->treillis, start_and_sentence_level_label);

    /* update history and segmentation */
    all_tags_are_o = 1;
    for(t = 0; t < sentence_size; t++)
    {
      if(start_and_sentence_level_label[t+1])
      {
        sentence_psgl[t] = start_and_sentence_level_label[t+1]; /* note we always keep if something was there */
        sentence_segl[t] = (start_and_sentence_level_label[t+1]-1)%4+1;
        
        all_tags_are_o = 0;
      }
    }

    /* check if only one big segment */
    if(sentence_size == 1)
      all_in_one_segment = (sentence_segl[0] == SEG_S);
    else
      all_in_one_segment = (sentence_segl[0] == SEG_B) && (sentence_segl[sentence_size-1] == SEG_E);

    for(t = 1; all_in_one_segment && (t < sentence_size-1); t++)
    {
      if(sentence_segl[t] != SEG_I)
        all_in_one_segment = 0;
    }

    level++;

    if(psg->max_labels_size < sentence_size*level)
    {
      psg->labels = SENNA_realloc(psg->labels, sizeof(float), sentence_size*level);
      psg->max_labels_size = sentence_size*level;
    }
    memcpy(psg->labels+(level-1)*sentence_size, start_and_sentence_level_label+1, sizeof(float)*sentence_size);

    if(all_in_one_segment || all_tags_are_o)
      break;
  }

  free(sentence_psgl);
  free(sentence_segl);
  free(start_and_sentence_level_label);

  *labels_ = psg->labels;
  *n_level_ = level;
}
예제 #10
0
void SENNA_fread_tensor_1d(float **ptr, int *n_row, FILE *stream)
{
    SENNA_fread(n_row, sizeof(int), 1, stream);
    *ptr = SENNA_malloc(sizeof(float), *n_row);
    SENNA_fread(*ptr, sizeof(float), *n_row, stream);
}
예제 #11
0
SENNA_SRL *SENNA_SRL_new(const char *path, const char *subpath) {
  SENNA_SRL *srl = SENNA_malloc(sizeof(SENNA_SRL), 1);
  FILE *f;
  float dummy;
  int dummy_size;

  f = SENNA_fopen(path, subpath, "rb");

  SENNA_fread(&srl->window_size, sizeof(int), 1, f);
  SENNA_fread_tensor_2d(&srl->ll_word_weight, &srl->ll_word_size,
                        &srl->ll_word_max_idx, f);
  SENNA_fread_tensor_2d(&srl->ll_caps_weight, &srl->ll_caps_size,
                        &srl->ll_caps_max_idx, f);
  SENNA_fread_tensor_2d(&srl->ll_chkl_weight, &srl->ll_chkl_size,
                        &srl->ll_chkl_max_idx, f);
  SENNA_fread_tensor_2d(&srl->ll_posv_weight, &srl->ll_posv_size,
                        &srl->ll_posv_max_idx, f);
  SENNA_fread_tensor_2d(&srl->ll_posw_weight, &srl->ll_posw_size,
                        &srl->ll_posw_max_idx, f);
  SENNA_fread_tensor_2d(&srl->l1_weight_wcc, &dummy_size,
                        &srl->hidden_state1_size, f);
  SENNA_fread_tensor_2d(&srl->l1_weight_pv, &dummy_size,
                        &srl->hidden_state1_size, f);
  SENNA_fread_tensor_2d(&srl->l1_weight_pw, &dummy_size,
                        &srl->hidden_state1_size, f);
  SENNA_fread_tensor_1d(&srl->l1_bias, &srl->hidden_state1_size, f);
  SENNA_fread_tensor_2d(&srl->l3_weight, &srl->hidden_state1_size,
                        &srl->hidden_state3_size, f);
  SENNA_fread_tensor_1d(&srl->l3_bias, &srl->hidden_state3_size, f);
  SENNA_fread_tensor_2d(&srl->l4_weight, &srl->hidden_state3_size,
                        &srl->output_state_size, f);
  SENNA_fread_tensor_1d(&srl->l4_bias, &srl->output_state_size, f);
  SENNA_fread_tensor_1d(&srl->viterbi_score_init, &srl->output_state_size, f);
  SENNA_fread_tensor_2d(&srl->viterbi_score_trans, &srl->output_state_size,
                        &srl->output_state_size, f);

  SENNA_fread(&srl->ll_word_padding_idx, sizeof(int), 1, f);
  SENNA_fread(&srl->ll_caps_padding_idx, sizeof(int), 1, f);
  SENNA_fread(&srl->ll_chkl_padding_idx, sizeof(int), 1, f);

  SENNA_fread(&dummy, sizeof(float), 1, f);
  SENNA_fclose(f);

  if ((int)dummy != 777)
    SENNA_error("srl: data corrupted (or not IEEE floating computer)");

  /* states */
  srl->sentence_posv = NULL;
  srl->sentence_posw = NULL;
  srl->input_state = NULL;
  srl->input_state_wcc = NULL;
  srl->input_state_pv = NULL;
  srl->input_state_pw = NULL;
  srl->hidden_state1 = NULL;
  srl->hidden_state1_wcc = NULL;
  srl->hidden_state1_pv = NULL;
  srl->hidden_state1_pw = NULL;
  srl->hidden_state2 = NULL;
  srl->hidden_state3 = NULL;
  srl->output_state = NULL;
  srl->labels = NULL;
  srl->labels_size = 0;

  srl->service = false;
  srl->debug = false;
  srl->calls = 0;
  srl->dnntime = 0;
  srl->apptime = 0;

  /* some info if you want verbose */
  SENNA_message("srl: window size: %d", srl->window_size);
  SENNA_message("srl: vector size in word lookup table: %d", srl->ll_word_size);
  SENNA_message("srl: word lookup table size: %d", srl->ll_word_max_idx);
  SENNA_message("srl: vector size in caps lookup table: %d", srl->ll_caps_size);
  SENNA_message("srl: caps lookup table size: %d", srl->ll_caps_max_idx);
  SENNA_message("srl: vector size in verb position lookup table: %d",
                srl->ll_posv_size);
  SENNA_message("srl: verb position lookup table size: %d",
                srl->ll_posv_max_idx);
  SENNA_message("srl: vector size in word position lookup table: %d",
                srl->ll_posw_size);
  SENNA_message("srl: word position lookup table size: %d",
                srl->ll_posw_max_idx);
  SENNA_message("srl: number of hidden units (convolution): %d",
                srl->hidden_state1_size);
  SENNA_message("srl: number of hidden units (hidden layer): %d",
                srl->hidden_state3_size);
  SENNA_message("srl: number of classes: %d", srl->output_state_size);

  return srl;
}