JNIEXPORT jstring JNICALL Java_org_mozilla_charsetdetector_CharsetDetector_get_1encoding( JNIEnv *env, jclass jclazz, jstring file) { const char * path; jboolean iscopy; path = env->GetStringUTFChars(file, &iscopy); char buf[4096]; char encoding[CHARDET_MAX_ENCODING_NAME]; size_t len; int res = 0; chardet_t det = NULL; FILE* fp = NULL; chardet_create(&det); fp = fopen(path, "rb"); do { len = fread(buf, 1, sizeof(buf), fp); res = chardet_handle_data(det, buf, len); } while (res==CHARDET_RESULT_OK && feof(fp)==0); fclose(fp); chardet_data_end(det); int result = chardet_get_charset(det, encoding, CHARDET_MAX_ENCODING_NAME); chardet_destroy(det); env->ReleaseStringUTFChars(file, path); if (result == CHARDET_RESULT_OK) { return env->NewStringUTF(encoding); } return 0; }
Private() : CharDet() , m_det(NULL) { if (!isLoaded()) return; int ret = chardet_create(&m_det); if (ret != CHARDET_RESULT_OK) m_det = NULL; }
char * GetLocalEncoding(const char* in_str, unsigned int str_len){ chardet_t chardect=NULL; char *out_encode=new char[CHARDET_MAX_ENCODING_NAME]; if(chardet_create(&chardect)==CHARDET_RESULT_OK){ if(chardet_handle_data(chardect, in_str, (unsigned int)str_len) == CHARDET_RESULT_OK){ if(chardet_data_end(chardect) == CHARDET_RESULT_OK){ chardet_get_charset(chardect, out_encode, CHARDET_MAX_ENCODING_NAME); } } } if(chardect){ chardet_destroy(chardect); return out_encode; } else{ return NULL; } }
int main() { int res; chardet_t * detector = chardet_create(); char const * s = "Hello World"; res = chardet_feed(detector, s, strlen(s)); assert(res == 0); chardet_stat_t const ** stats; chardet_get_stats(detector, &stats); printf("Probabilities:\n"); for (int i = 0; stats[i] != NULL; i++) { char const * encoding = chardet_stat_get_encoding(stats[i]); float proba = chardet_stat_get_probability(stats[i]); printf(" - %s: %5.3f\n", encoding, proba); } chardet_destroy(detector); }