コード例 #1
0
int main(int argc, char *argv[])
{
	char *encoding = NULL;
	bool doHash = false;

	if (argc < 2){
		fprintf(stderr, "Usage: %s [ -e encoding ] filename [ [ -e encoding] filename2 ] ...\n", argv[0]);
		exit(1);
	}
	ucInit();
	if ( ! hashinit() ) {
		log("db: Failed to init hashtable." ); return 1; }
	// . hashinit() calls srand() w/ a fixed number
	// . let's mix it up again
	srand ( time(NULL) );

	int i;
	int flag = flNone;
	void (*parser)(char*,int,bool,char*) = NULL;
	char *parser_str = "";
	for (i=1;i<argc;i++){

		// Read cmdline args
		if (argv[i][0] == '-'){
			if (gbstrlen(argv[i])<2){
				fprintf(stderr, "Unknown argument: %s\n", 
					argv[i]);
				exit(1);
			}
			switch(argv[i][1]){
			case 'e':
				flag = flEncoding;
				break;
			case 'p':
				flag = flParser;
				break;
			case 'h':
				flag = flHash;
				break;
			case 's':
				flag = flFilterSpaces;
				break;
			default:
				fprintf(stderr, "Unknown flag: %s\n",
					argv[i]);
				exit(1);
			}
			continue; //next arg
		}
		
		// Switch default encoding
		if (flag == flEncoding){
			encoding = argv[i];
			flag=flNone;
			fprintf(stderr, "Using encoding: %s\n", encoding);
			continue;
		}
		// switch parser
		if (flag == flParser){
			if (!strncmp(argv[i], "icu", 3)){
				parser = parse_doc_icu;
				parser_str = "ICU BreakIterator";
			}
			else {
				parser = parse_doc_8859_1;
				parser_str = "iso-8859-1";
			}
			flag=flNone;
			fprintf(stderr, "Using parser: %s\n", parser_str);
			continue;
		}
		if (flag == flHash){
			if ((!strncmp(argv[i], "0", 1)) ||
			    (!strncmp(argv[i], "f", 1))){
				doHash = false;
			}
			else{ doHash = true; }
			flag = flNone;
			continue;
		}
		if (flag == flFilterSpaces){
			if ((!strncmp(argv[i], "0", 1)) ||
			    (!strncmp(argv[i], "f", 1))){
				doFilterSpaces = false;
			}
			else{ doFilterSpaces = true; }
			flag = flNone;
			continue;
		}

		char * filename = argv[i];
		fprintf(stderr, "Reading \"%s\"\n", filename);
		FILE *fp = fopen(filename,"r");
		if (!fp){
			fprintf(stderr, "Error: could not open file \"%s\"\n", 
				filename);
			continue;
		}
		// Get File size
		size_t file_size;
		fseek(fp, 0L, SEEK_END);
		file_size = (size_t)ftell(fp);
		fseek(fp, 0L, SEEK_SET);
		
		char *file_buf = (char*)malloc(file_size+1);
		size_t nread = fread(file_buf, (size_t)1,file_size, fp);
		fclose(fp);

		if (nread != file_size){
			fprintf(stderr, "Warning: wanted %d chars, but read %d\n",
				file_size, nread);
		}
		file_buf[nread] = '\0';
	       
		//struct timeval tv1, tv2;
		//struct timezone tz1, tz2;
		long usec_elapsed;
//		int testnum;
		
		char ucBuf[128*1024];
		long ucLen = ucToUnicode((UChar*)ucBuf,128*1024,
					 file_buf,nread+1,"utf-8", 10);
		ucLen <<= 1;

		
		usec_elapsed = time_parser(parser,
					   file_buf,nread,doHash,
					   encoding,
					   NUM_TEST_RUNS);
		fprintf(stderr,"Document parsed (%s, hash=%s, filterSpaces=%s): %ld usec\n", 
			parser_str, 
			doHash?"true":"false",
			doFilterSpaces?"true":"false",
			usec_elapsed);
	}
	fprintf(stderr, "Done\n");
	return 0;
}
コード例 #2
0
int main (int argc, char **argv) {
	char * filename = argv[1];
	fprintf(stderr, "Reading \"%s\"\n", filename);
	FILE *fp = fopen(filename,"r");
	if (!fp){
		fprintf(stderr, "Error: could not open file \"%s\"\n", 
			filename);
		exit(1);
	}
	
	//get charset
	char *charset = argv[2];

	// Get File size
	size_t file_size;
	fseek(fp, 0L, SEEK_END);
	file_size = (size_t)ftell(fp);
	fseek(fp, 0L, SEEK_SET);
	
	char *file_buf = (char*)malloc(file_size+1);
	size_t nread = fread(file_buf, (size_t)1,file_size, fp);
	fclose(fp);
	
	if (nread != file_size){
		fprintf(stderr, "Warning: wanted %d chars, but read %d\n",
			file_size, nread);
	}
	file_buf[nread] = '\0';
	
	int32_t ucBufSize = (int32_t)(nread*2.5);
	UChar *ucBuf = (UChar*)malloc(ucBufSize);
	int32_t ucLen = ucToUnicode(ucBuf, ucBufSize, file_buf, nread, 
				 "utf-8", NULL);
	
	struct timeval tv1, tv2;
	struct timezone tz1, tz2;

	int32_t times[test_count];
	int64_t total=0;
	int32_t max_time=-1L;
	int32_t min_time=999999999L;
	int32_t avg_time;

	//int32_t u8size = nread*2;
	//char *u8buf = (char*)malloc(u8size);
	int32_t newsize = 0;
	for (int i=0;i<test_count;i++ ){
		gettimeofday(&tv1, &tz1);
		newsize = ucToUnicode(ucBuf, ucBufSize, file_buf, nread, 
				      charset, NULL) << 1;
		gettimeofday(&tv2, &tz2);
		times[i] = elapsed_usec(&tv1, &tv2);
		total += times[i];
		if (times[i] < min_time) min_time = times[i];
		if (times[i] > max_time) max_time = times[i];
	}
	avg_time = total/test_count;

	fprintf(stderr,"ICU size: %"INT32", count: %"INT32", avg: %"INT32", min: %"INT32", max: %"INT32"\n",
		newsize, test_count, avg_time, min_time, max_time);
	int outfd = open("icu.out", O_CREAT|O_RDWR|O_TRUNC, 00666);
	if (outfd < 0) {printf("Error creating output file: %s\n", 
			       strerror(errno)); exit(1);}
	write(outfd, ucBuf, newsize);
	close(outfd);
#if 0
	total = 0; min_time = 999999999L; max_time = -1L;
	for (int i=0;i<test_count;i++ ){
		gettimeofday(&tv1, &tz1);
		//newsize = utf16ToUtf8_iconv(u8buf, u8size, ucBuf, ucLen);
		newsize = ucToUnicode_iconv(ucBuf, ucBufSize, file_buf, nread, 
					    charset, NULL) << 1;		
		gettimeofday(&tv2, &tz2);
		times[i] = elapsed_usec(&tv1, &tv2);
		total += times[i];
		if (times[i] < min_time) min_time = times[i];
		if (times[i] > max_time) max_time = times[i];
	}
	avg_time = total/test_count;

	fprintf(stderr,"iconv size: %"INT32", count: %"INT32", avg: %"INT32", min: %"INT32", max: %"INT32"\n",
		newsize, test_count, avg_time, min_time, max_time);
	outfd = open("iconv.out", O_CREAT|O_RDWR|O_TRUNC, 00666);
	if (outfd < 0) {printf("Error creating output file: %s\n", 
			       strerror(errno)); exit(1);}
	write(outfd, ucBuf, newsize);
	close(outfd);
#endif
#if 0
	total = 0; min_time = 999999999L; max_time = -1L;
	for (int i=0;i<test_count;i++ ){
		gettimeofday(&tv1, &tz1);
		newsize = utf16ToUtf8_intern(u8buf, u8size, ucBuf, ucLen);
		gettimeofday(&tv2, &tz2);
		times[i] = elapsed_usec(&tv1, &tv2);
		total += times[i];
		if (times[i] < min_time) min_time = times[i];
		if (times[i] > max_time) max_time = times[i];
	}
	avg_time = total/test_count;

	fprintf(stderr,"my size: %"INT32", count: %"INT32", avg: %"INT32", min: %"INT32", max: %"INT32"\n",
		newsize, test_count, avg_time, min_time, max_time);
	outfd = open("my.out", O_CREAT|O_RDWR|O_TRUNC, 00666);
	if (outfd < 0) {printf("Error creating output file: %s\n", 
			       strerror(errno)); exit(1);}
	write(outfd, u8buf, newsize);
	close(outfd);
#endif
	//printf("%s\n", u8buf);

}