int stats3_decompress_bits(range_coder *c,unsigned char m[1025],int *len_out) { int i; *len_out=0; /* Check if message is encoded naturally */ int notRawASCII=range_decode_equiprobable(c,2); if (notRawASCII==0) { /* raw bytes -- copy from input to output */ // printf("decoding raw bytes: bits_used=%d\n",c->bits_used); for(i=0;c->bit_stream[i]&&i<1024&&i<(c->bit_stream_length>>3);i++) { m[i]=c->bit_stream[i]; // printf("%d 0x%02x\n",i,c->bit_stream[i]); } // printf("%d 0x%02x\n",i,c->bit_stream[i]); m[i]=0; *len_out=i; return 0; } int notPackedASCII=range_decode_symbol(c,&probPackedASCII,2); int encodedLength=decodeLength(c); for(i=0;i<encodedLength;i++) m[i]='?'; m[i]=0; *len_out=encodedLength; if (notPackedASCII==0) { /* packed ASCII -- copy from input to output */ // printf("decoding packed ASCII\n"); decodePackedASCII(c,(char *)m,encodedLength); return 0; } unsigned char nonAlphaValues[1024]; int nonAlphaPositions[1024]; int nonAlphaCount=0; decodeNonAlpha(c,nonAlphaPositions,nonAlphaValues,&nonAlphaCount,encodedLength); int alphaCount=(*len_out)-nonAlphaCount; // printf("message contains %d non-alpha characters, %d alpha chars.\n",nonAlphaCount,alphaCount); unsigned char lowerCaseAlphaChars[1025]; decodeLCAlphaSpace(c,lowerCaseAlphaChars,alphaCount); lowerCaseAlphaChars[alphaCount]=0; decodeCaseModel1(c,lowerCaseAlphaChars); mungeCase((char *)lowerCaseAlphaChars); /* reintegrate alpha and non-alpha characters */ int nonAlphaPointer=0; int alphaPointer=0; for(i=0;i<(*len_out);i++) { if (nonAlphaPointer<nonAlphaCount &&nonAlphaPositions[nonAlphaPointer]==i) { m[i]=nonAlphaValues[nonAlphaPointer++]; } else { m[i]=lowerCaseAlphaChars[alphaPointer++]; } } m[i]=0; return 0; }
int FUNC(CaseModel1)(range_coder *c,unsigned short *line,int len,stats_handle *h) { int wordNumber=0; int wordPosn=-1; int lastWordInitialCase=0; int lastWordInitialCase2=0; int lastCase=0; int i; // printf("caps eligble chars: "); for(i=0;i<len;i++) { int wordChar=charInWord(line[i]); if (!wordChar) { wordPosn=-1; lastCase=0; } else { if (isalpha(line[i])) { if (wordPosn<0) wordNumber++; wordPosn++; int upper=-1; int caseEnd=0; /* note if end of word (which includes end of message, implicitly detected here by finding null at end of string */ if (!charInWord(line[i+1])) caseEnd=1; if (wordPosn==0) { /* first letter of word, so can only use 1st-order model */ unsigned int frequencies[1]={h->caseposn1[0][0]}; if (i==0) frequencies[0]=h->casestartofmessage[0][0]; else if (wordNumber>1&&wordPosn==0) { /* start of word, so use model that considers initial case of previous word */ frequencies[0]=h->casestartofword2[lastWordInitialCase][0]; if (wordNumber>2) frequencies[0]= h->casestartofword3[lastWordInitialCase2][lastWordInitialCase][0]; if (0) printf("last word began with case=%d, p_lower=%f\n", lastWordInitialCase, (frequencies[0]*1.0)/0x1000000 ); } if (0) printf("case of first letter of word/message @ %d: p=%f\n", i,(frequencies[0]*1.0)/0x1000000); #ifdef ENCODING upper=isupper(line[i]); range_encode_symbol(c,frequencies,2,upper); #else upper=range_decode_symbol(c,frequencies,2); #endif } else { /* subsequent letter, so can use case of previous letter in model */ if (wordPosn>79) wordPosn=79; if (0) { printf("case of first letter of word/message @ %d.%d: p=%f\n", i,wordPosn, (h->caseposn2[lastCase][wordPosn][0]*1.0)/0x1000000); printf(" lastCase=%d, wordPosn=%d\n",lastCase,wordPosn); } int pos=wordPosn; while ((!h->caseposn2[lastCase][pos][0])&&pos) pos--; #ifdef ENCODING upper=isupper(line[i]); range_encode_symbol(c,h->caseposn2[lastCase][pos],2,upper); #else upper=range_decode_symbol(c,h->caseposn2[lastCase][pos],2); #endif } if (upper==1) line[i]=toupper(line[i]); if (isupper(line[i])) lastCase=1; else lastCase=0; if (wordPosn==0) { lastWordInitialCase2=lastWordInitialCase; lastWordInitialCase=lastCase; } else if (upper==-1) { fprintf(stderr,"%s(): character processed without determining case.\n", __FUNCTION__); exit(-1); } } } } return 0; }