Ejemplo n.º 1
0
Datum
nseq_concat (PG_FUNCTION_ARGS)
{
    NSEQ *a = PG_GETARG_NSEQ_P (0);
    NSEQ *b = PG_GETARG_NSEQ_P (1);
    NSEQ *retval = NULL;
    int32 totalsize, totalblocks, overflow;
    char *newval = NULL;
    int i;

    if(a->rna != b->rna)
    {
        elog(ERROR,"Cannot concatenate DNA and RNA");
    }

    totalsize = a->size+b->size;

    //elog(INFO, "%i", totalsize);

    overflow = totalsize % BLOCKSIZE;

    //elog(INFO, "%i", overflow);

    totalblocks = totalsize / BLOCKSIZE;

    //elog(INFO, "%i", totalblocks);

    if(overflow > 0)
    {
        newval = nseq_concat_slow(a,b);
        PG_RETURN_NSEQ_P (make_nseq(newval,strlen(newval),a->rna));
    }
    else
    {
        newval = (char *) palloc0 (totalblocks);

        memcpy(newval, a->data, a->compressed_size);
        memcpy(newval+(a->size / BLOCKSIZE), b->data, b->compressed_size);
    }

    retval = palloc(CALCDATASZ(a->compressed_size+b->compressed_size));

    retval->rna = a->rna;
    retval->size = totalsize;
    retval->compressed_size = a->compressed_size+b->compressed_size;

    for(i=0; i<HISTSZ; i++)
    {
        retval->histogram[i] = a->histogram[i]+b->histogram[i];
    }

    memcpy(DATAPTR(retval), newval, retval->compressed_size);

    SET_VARSIZE (retval,CALCDATASZ(retval->compressed_size));

    PG_RETURN_NSEQ_P (retval);
}
Ejemplo n.º 2
0
static NSEQ *make_nseq(const char* sequence, const size_t seqlen, const bool isRNA)
{
    NSEQ *retval = NULL;
    char *buffer = NULL;
    char tmp;
    uint32 i, offset = 0;
    uint32 bufsize = seqlen / BLOCKSIZE;
    //COMPRESSED_DATA *compressed_data;
    int32 histogram[HISTSZ] = {0,0,0,0};
    bool run = true;

    if((seqlen % BLOCKSIZE) != 0)
    {
        bufsize += 1;
    }

    buffer = (char*) palloc0(bufsize*sizeof(char));

    while (run && (offset < bufsize))
    {
        for(i = 0; i<BLOCKSIZE; i++)
        {
            tmp = toupper(sequence[(offset*BLOCKSIZE)+i]);

            switch(tmp)
            {
            case 'A':
                buffer[offset] |= (0x0 << (i*2));
                histogram[0]++;
                break;
            case 'C':
                buffer[offset] |= (0x1 << (i*2));
                histogram[1]++;
                break;
            case 'G':
                buffer[offset] |= (0x2 << (i*2));
                histogram[2]++;
                break;
            case 'U':
                if(isRNA)
                {
                    buffer[offset] |= (0x3 << (i*2));
                    histogram[3]++;
                }
                else
                {
                    elog(ERROR, "Unknown nucleotide for DNA: %c\n", tmp);
                    run = false;
                }
                break;
            case 'T':
                if(isRNA)
                {
                    elog(ERROR, "Unknown nucleotide for RNA: %c\n", tmp);
                    run = false;
                }
                else
                {
                    buffer[offset] |= (0x3 << (i*2));
                    histogram[3]++;
                }
                break;
            case '\0':
                run = false;
                break;
            }
        }
        offset++;
    }

    //compressed_data = compress_data(buffer, bufsize);

    retval = palloc(CALCDATASZ(bufsize));

    retval->rna = isRNA;
    retval->size = seqlen;
    retval->compressed_size = bufsize;
    memcpy(retval->histogram, &histogram, sizeof(histogram));

    memcpy(DATAPTR(retval), buffer, bufsize);

    SET_VARSIZE (retval,CALCDATASZ(bufsize));

    //elog(INFO,"make %d %d", seqlen, bufsize);

    return retval;
}
Ejemplo n.º 3
0
MOLECULE *
new_molecule (char *smiles, char *molfile)
{
  unsigned int sizemf;
  unsigned int sizesmi;
  size_t totalsize;
  MOLECULE *result;
  char *inchikey = NULL;
  char *ancillarydata = NULL;
  char *aidata = NULL;
  uint32 ancsize = 0;
  
  ancillarydata = ob_lyophilize_molecule(smiles);
  
  if(ancillarydata == NULL) {
    elog (ERROR, "Molecule lyophilization failed! SMILES:\n %s", smiles);
  }    
  
  ancsize = *(unsigned int*) ancillarydata;
  sizemf = strlen (molfile)+1;
  sizesmi = strlen (smiles)+1;
  totalsize = CALCDATASZ (sizemf, sizesmi, ancsize);

  result = (MOLECULE *) palloc (totalsize);
  memset (result, 0x0, totalsize);

  if (strchr (smiles, '.') != NULL)
    result->disconnected = true;

  result->sizemf = sizemf;
  result->sizesmi = sizesmi;

  strncpy (SMIPTR(result), smiles, sizesmi);
  strncpy (MFPTR(result), molfile, sizemf);
  
  aidata = (char*) &((unsigned int*)ancillarydata)[1];
  memcpy(ANCPTR(result), aidata, ancsize);

  inchikey = ob_smiles_to_inchikey (smiles);
    
  if(inchikey == NULL || strlen(inchikey) != INCHIKEYSZ) {
    elog (ERROR, "InChI key generation failled ! SMILES:\n %s", smiles);
  } else {
    memcpy(result->inchikey, inchikey, INCHIKEYSZ);
  }   

  if (inchikey != NULL) {
    free (inchikey);
    inchikey=NULL;
  }
  
  ob_fp_bin(aidata, result->fp);
  
  if(ancillarydata != NULL) {
    free(ancillarydata);
    ancillarydata=NULL;
  }  

  SET_VARSIZE (result,totalsize);
  return result;
}