Example #1
0
int test_getCodeword(){
	MyString* S = newString();
	appendString(S, 65);
	int code = getCodeword(dict, S);
	if(code != 65){
		printf("FAIL:\tgetCodeword: string \"65\" should correspond to code 65, was %d.\n", code);
		//return 1;
	}

	deleteString(S);
	S = newString();
	appendString(S, 0);
	appendString(S, 1);
	appendString(S, 2);
	appendString(S, 3);

	code = getCodeword(dict, S);
	if(code != 256){
		printf("FAIL:\tgetCodeword: string \"0123\" should correspond to code 256, was %d.\n", code);
		return 1;
	}
	return 0;
}
Example #2
0
void neighbourLookup_build(struct PSSMatrix PSSMatrix,
        struct scoreMatrix scoreMatrix, int4 wordLength) {
    int4 queryPosition = 0;
    int4 numNeighbours;
    int4 codeword;
    int4 numWords = proteinLookup_numWords;
    struct neighbour *neighbours =
        (struct neighbour *)global_malloc(sizeof(struct neighbour) * numWords);

    while (queryPosition < PSSMatrix.length - wordLength + 1) {
        codeword =
            getCodeword(PSSMatrix.queryCodes + queryPosition, wordLength);

        if (neighborLookup[codeword].numNeighbours == 0) {
            numNeighbours = 0;
            // wordLookupDFA_getNeighbours(PSSMatrix, queryPosition, &numNeighbours,
            // neighbours);

            wordLookupSM_getNeighbours(PSSMatrix.queryCodes, scoreMatrix,
                    queryPosition, &numNeighbours, neighbours);

            neighborLookup[codeword].numNeighbours = numNeighbours;
            neighborLookup[codeword].neighbours =
                (int4 *)global_malloc(sizeof(int4) * numNeighbours);

            while (numNeighbours > 0) {
                numNeighbours--;
                neighborLookup[codeword].neighbours[numNeighbours] =
                    neighbours[numNeighbours].codeword;
            }
        }

        // printf("%d %d\n", codeword, neighborLookup[codeword].numNeighbours);
        queryPosition++;
    }
    free(neighbours);
}
Example #3
0
// Get all the neighbours for given query window
void wordLookupSM_getNeighbours(char *codes, struct scoreMatrix scoreMatrix,
                                int4 queryPosition, int4 *numNeighbours,
                                struct neighbour *neighbours) {
  int4 codeword, score = 0, count = queryPosition, containsWild = 0;

  // Get score for aligning the best match codes to the query window
  while (count < queryPosition + parameters_wordSize) {
    if (codes[count] >= encoding_numRegularLetters)
      containsWild = 1;

    // score += PSSMatrix.matrix[count][PSSMatrix.bestMatchCodes[count]];
    score += scoreMatrix.matrix[codes[count]][codes[count]];
    // printf("%d %c %d\n", count, encoding_getLetter(codes[count]),
    // scoreMatrix.matrix[codes[count]][codes[count]]);
    count++;
  }

  // printf("score: %d\n", score);

  // If a word containing wildcards only consider nearest neighbour if high
  // scoring
  if (!containsWild || score >= parameters_T) {
    // Convert query word codes to codeword
    codeword = getCodeword(codes + queryPosition,
                                         parameters_wordSize);

    // Automatically add the query word itself to list of neighbours
    neighbours[*numNeighbours].codeword = codeword;
    neighbours[*numNeighbours].score = score;
    neighbours[*numNeighbours].position = 0;
    (*numNeighbours)++;

    // Recursively find remaining neighbours
    wordLookupSM_findNeighbours(codes, scoreMatrix, queryPosition,
                                numNeighbours, neighbours);
  }
}
Example #4
0
int main(int argc, char* argv[]){

    if ( argc != 3 ) // argc should be 3 for correct execution
    {
        printf( "usage: %s input_filename output_filename\n", argv[0] );
    }

	//read text file into input array
	OriginalData* orig = readOriginalData(argv[1]);

	//allocate the same amount of compressed data (lets hope it is enough)
	CompressedData* compressed = newCompressedData(orig->dataLength, 8);

	//initializez the dictionary and the symbols in the dictionary
	Dictionary* dict = newDictionary(8);
	initDictionary(dict);

    printf("Compressing...\n");
	writeToCompressedData(compressed, dict->clearCode);

	MyString* S = newString();
	int codeword;
	int dictReturn;
	while(hasNextSymbol(orig)){
		appendString(S, (uint8_t) nextSymbol(orig));

		if(getCodeword(dict, S) != -1){
			;
		} else {
			//output S minus the last char 
			S->length--;
			codeword = getCodeword(dict, S);
			writeToCompressedData(compressed, codeword);
			S->length++;

			//great for debugging :
			// printf("wrote a %d bit codeword: %d\n", compressed->bitWidth, codeword);         
			// printf("Saved new word: %d    \t", dict->wordCount);
			// printString(S);
			// printf("\n");

			//add S to the dictionary
			dictReturn = addToDictionary(dict, S);

			if(dictReturn == -2){
				//for debugging :
            	// printf("--------------------------Increased Bit Width\n");

				//increase the bit width by one
				compressed->bitWidth++;
				dict->bitWidth *= 2;
			}
			if(dictReturn == -1){
				//dictionary is full, clear it and write a clearCode
				clearDictionary(dict);
				writeToCompressedData(compressed, dict->clearCode);
            	compressed->bitWidth = compressed->rootBitWidth+1;
				//step one symbol back in original data
				orig->nextSymbol--;
				orig->dataLeft++;
				//make S empty
				S->length = 0;
			}else{
				//"delete" the last character (it will still be in the data)
				S->length--;
				S->data[0] = S->data[S->length];//make S start with the "deleted" character
				S->length = 1;	//make the length of S 1, now S is the earlier last character
			}
		}
	}
	//write the last codeword
	codeword = getCodeword(dict, S);
	writeToCompressedData(compressed, codeword);

	//write EOI
	writeToCompressedData(compressed, dict->endOfInformation);

	writeCompressedDataToFile(compressed, argv[2]);  
    printf("File %s compressed succesfully to %s\n", argv[1], argv[2]);
    return 0;
}