Exemple #1
0
static VALUE ifly_asr_iat(VALUE klass, VALUE fileName) {
  char *audioFileName = StringValue(fileName);

  int ret = 0, len, status = 2, ep_status = 0, rec_status = 0, rslt_status = 0, i = 0;
    char buff[kBufferSize], text[kBufferSize];
    FILE *fin;

    ret = QISRInit("appid=50ebc3c9,vad_enable=0");
    if (ret != 0) {
      rb_raise(rb_eRuntimeError, "ISR Initialize Error");
    }

    const char *sess_id = QISRSessionBegin("", "ssm=1,sub=iat,auf=audio/L16;rate=8000,aue=amr;-1,ent=sms8k,rst=plain,rse=utf8", &ret);

    if (ret != 0){
      rb_raise(rb_eRuntimeError, "ISR Session Begin Error");
    }

    fin = fopen(audioFileName, "rb");
    if (!fin) {
      rb_raise(rb_eRuntimeError, "ISR Can not open audio input file");
    }

    while(!feof(fin)) {
        len = fread(buff, 1, kBufferSize, fin);

        ret = QISRAudioWrite(sess_id, buff, len, status, &ep_status, &rec_status);

        if (ret != 0) break;

        usleep(200000);
    }
    fclose(fin);

    status = 4;
    ret = QISRAudioWrite(sess_id, buff, 1, status, &ep_status, &rec_status);
    if (ret != 0) {
      rb_raise(rb_eRuntimeError, "ISR Audio Last Frame Write Error");
    }

    do {
        const char *result = QISRGetResult(sess_id, &rslt_status, 0, &ret);
        if (ret != 0) break;
        if (rslt_status != 1 && result) strcat(text, result);
        usleep(500000);
    } while (rslt_status != 5 && ++i < 30);

    QISRSessionEnd(sess_id, NULL);
    QISRFini();

    return rb_str_new(text, strlen(text));
}
int IflySTT::open()
{
    // sub:             请求业务类型
    // domain:          领域
    // language:        语言
    // accent:          方言
    // sample_rate:     音频采样率
    // result_type:     识别结果格式
    // result_encoding: 结果编码格式
    // 详细参数说明请参阅《iFlytek MSC Reference Manual》

    int         ret = MSP_SUCCESS;
    int         errcode = MSP_SUCCESS;
    const char* login_params = "appid = 5743ed12, work_dir = ."; // 登录参数,appid与msc库绑定,请勿随意改动
    const char* session_begin_params = "sub = iat, domain = iat, language = zh_ch, accent = mandarin, sample_rate = 1411000, result_type = plain, result_encoding = gb2312";

    // 用户登录
    ret = MSPLogin(NULL, NULL, login_params); // 第一个参数是用户名,第二个参数是密码,均传NULL即可,第三个参数是登录参数

    if (MSP_SUCCESS != ret)
    {
        return ret;
    }

    session_id_ = QISRSessionBegin(NULL, session_begin_params, &errcode); // 听写不需要语法,第一个参数为NULL

    if (MSP_SUCCESS != errcode)
    {
        MSPLogout();
        return errcode;
    }

    run_ = true;

    _beginthread(iflystt_get_text_thread, 0, this);

    return 0;
}
Exemple #3
0
void run_iat( const char* session_begin_params)
{
	const char*		session_id					=	NULL;
	char			rec_result[BUFFER_SIZE]		=	{NULL};	
	char			hints[HINTS_SIZE]			=	{NULL}; //hints为结束本次会话的原因描述,由用户自定义
	unsigned int	total_len					=	0; 
	int				aud_stat					=	MSP_AUDIO_SAMPLE_CONTINUE ;		//音频状态
	int				ep_stat						=	MSP_EP_LOOKING_FOR_SPEECH;		//端点检测
	int				rec_stat					=	MSP_REC_STATUS_SUCCESS ;			//识别状态
	int				errcode						=	MSP_SUCCESS ;

	FILE*			f_pcm						=	NULL;
	char*			p_pcm						=	NULL;
	long			pcm_count					=	0;
	long			pcm_size					=	0;
	long			read_size					=	0;

#if 0
	char *file_name = "wav/0003.pcm"; 
	
	if (NULL == file_name)
		goto iat_exit;

	f_pcm = fopen(file_name, "rb");
	if (NULL == f_pcm) 
	{
		printf("\nopen [%s] failed! \n", file_name);
		goto iat_exit;
	}
	
	fseek(f_pcm, 0, SEEK_END);
	pcm_size = ftell(f_pcm); //获取音频文件大小 
	fseek(f_pcm, 0, SEEK_SET);		

	p_pcm = (char *)malloc(pcm_size);
	if (NULL == p_pcm)
	{
		printf("\nout of memory! \n");
		goto iat_exit;
	}

	read_size = fread((void *)p_pcm, 1, pcm_size, f_pcm); //读取音频文件内容
	if (read_size != pcm_size)
	{
		printf("\nread [%s] error!\n", file_name);
		goto iat_exit;
	}
	
	printf("\n开始语音听写 ...\n");
	session_id = QISRSessionBegin(NULL, session_begin_params, &errcode); //听写不需要语法,第一个参数为NULL
	if (MSP_SUCCESS != errcode)
	{
		printf("\nQISRSessionBegin failed! error code:%d\n", errcode);
		goto iat_exit;
	}
	char *p_data = p_pcm;
#else
	WSADATA wsaData;
	WORD sockVersion = MAKEWORD(2,2);
	if(WSAStartup(sockVersion, &wsaData) != 0)
		return;

	SOCKET serSocket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
	if(serSocket == INVALID_SOCKET)
	{
		printf("socket failed \n");
		return ;
	}

	sockaddr_in serAddr;
	serAddr.sin_family = AF_INET;
	serAddr.sin_port = htons(port);
	serAddr.sin_addr.S_un.S_addr = INADDR_ANY;
	if(bind(serSocket, (LPSOCKADDR)&serAddr, sizeof(serAddr)) == SOCKET_ERROR)
	{
		printf("bind error!\n");
	    return ;
	}
	printf("服务端开启\n");
	//开始监听
    if(listen(serSocket, 5) == SOCKET_ERROR)
    {
        printf("listen error !");
        return ;
    }
	
    SOCKET sClient;
	sockaddr_in remoteAddr;
	int nAddrLen = sizeof(remoteAddr);

	sClient = accept(serSocket, (SOCKADDR *)&remoteAddr, &nAddrLen);
    if(sClient == INVALID_SOCKET)
    {
        printf("accept error !");
    }
    printf("接受到一个连接:%s \r\n", inet_ntoa(remoteAddr.sin_addr));

	unsigned int len = 10 * FRAME_LEN; // 每次写入200ms音频(16k,16bit):1帧音频20ms,10帧=200ms。16k采样率的16位音频,一帧的大小为640Byte
	char *space_buffer = (char*)malloc(SPACE_SIZE * len);
#endif
	char *p_data = space_buffer;
	char *p_data2 = space_buffer;
	int rec_number = 0;

	printf("\n开始语音听写 ...\n");
	session_id = QISRSessionBegin(NULL, session_begin_params, &errcode); //听写不需要语法,第一个参数为NULL
	if (MSP_SUCCESS != errcode)
	{
		printf("\nQISRSessionBegin failed! error code:%d\n", errcode);
		goto iat_exit;
	}
	while (1) 
	{
		
		int ret = 0;
#if 0
		if (pcm_size < 2 * len) 
			len = pcm_size;
#endif
		if (len <= 0)
			break;

		aud_stat = MSP_AUDIO_SAMPLE_CONTINUE;
		if (0 == pcm_count)
			aud_stat = MSP_AUDIO_SAMPLE_FIRST;

		printf(">");
		int total_recv = 0;
		int recvlen = 0;
		while (total_recv < len)
		{
			recvlen = recv(sClient, p_data, len - total_recv, 0);
			total_recv += recvlen;
			p_data += recvlen;
		}
		//printf("recv size %d \n", total_recv);
		ret = QISRAudioWrite(session_id, (const void *)p_data2, len, aud_stat, &ep_stat, &rec_stat);
		if (MSP_SUCCESS != ret)
		{
			printf("\nQISRAudioWrite failed! error code:%d\n", ret);
			goto iat_exit;
		}
		if (MSP_REC_STATUS_SUCCESS == rec_stat) //已经有部分听写结果
		{
			printf("识别出结果了 %d\n", rec_number);
			const char *rslt = QISRGetResult(session_id, &rec_stat, 0, &errcode);
			if (MSP_SUCCESS != errcode)
			{
				printf("\nQISRGetResult failed! error code: %d\n", errcode);
				goto iat_exit;
			}
			if (NULL != rslt)
			{
				unsigned int rslt_len = strlen(rslt);
				total_len += rslt_len;
				if (total_len >= BUFFER_SIZE)
				{
					printf("\nno enough buffer for rec_result !\n");
					goto iat_exit;
				}
				strncat(rec_result, rslt, rslt_len);
				printf("%s\n",rec_result);
			}
		}
		else
		{
			printf("还没有听写结果 %d\n", rec_number);
		}
		pcm_count += (long)len;
		//pcm_size  -= (long)len;
		p_data2 += (long)len;
		
		if (MSP_EP_AFTER_SPEECH == ep_stat)
		{
			printf("loop = %d, 检测到后端点\n", rec_number);
			break;
		}
		rec_number++;
		Sleep(200); //模拟人说话时间间隙。200ms对应10帧的音频
	}
	printf("\n语音听写结束\n");
	printf("=============================================================\n");
	printf("%s\n",rec_result);
	printf("=============================================================\n");

iat_exit:
	if (NULL != f_pcm)
	{
		fclose(f_pcm);
		f_pcm = NULL;
	}
	if (NULL != p_pcm)
	{	free(p_pcm);
		p_pcm = NULL;
	}

	QISRSessionEnd(session_id, hints);
}
Exemple #4
0
int run_asr(const char* asrfile ,  const char* param , const char* grammar)
{
	char rec_result[1024*4] = {0};
	const char *sessionID;
	FILE *f_pcm = NULL;
	char *pPCM = NULL;
	int lastAudio = 0 ;
	int audStat = 2 ;
	int epStatus = 0;
	int recStatus = 0 ;
	long pcmCount = 0;
	long pcmSize = 0;
	int ret = 0 ;
	sessionID = QISRSessionBegin(NULL, param, &ret); //asr
	if(ret !=0)
	{
		printf("QISRSessionBegin Failed,ret=%d\n",ret);
	}

	ret = QISRGrammarActivate(sessionID, grammar, NULL, 0);//可以选择在QISRSessionBegin第一个参数传入grammar,亦可通过QISRGrammarActivate激活语法,可以多次调用QISRGrammarActivate,激活多个语法。
	if(ret !=0)
	{
		printf("QISRGrammarActivate Failed,ret=%d\n",ret);
	}
    f_pcm = fopen(asrfile, "rb");
	if (NULL != f_pcm) {
		fseek(f_pcm, 0, SEEK_END);
		pcmSize = ftell(f_pcm);
		fseek(f_pcm, 0, SEEK_SET);
		pPCM = (char *)malloc(pcmSize);
		fread((void *)pPCM, pcmSize, 1, f_pcm);
		fclose(f_pcm);
		f_pcm = NULL;
	}
	while (1) {
		unsigned int len = 6400;
		if (pcmSize < 12800) {
			len = pcmSize;
			lastAudio = 1;
		}
		audStat = 2;
		if (pcmCount == 0)
			audStat = 1;
		if (0) {
			if (audStat == 1)
				audStat = 5;
			else
				audStat = 4;
		}
		if (len<=0)
		{
			break;
		}
		printf("csid=%s,count=%d,aus=%d,",sessionID,pcmCount/len,audStat);
		ret = QISRAudioWrite(sessionID, (const void *)&pPCM[pcmCount], len, audStat, &epStatus, &recStatus);
		printf("eps=%d,rss=%d,ret=%d\n",epStatus,recStatus,ret);
		if (ret != 0)
			break;
		pcmCount += (long)len;
		pcmSize -= (long)len;
		if (recStatus == 0) {
			const char *rslt = QISRGetResult(sessionID, &recStatus, 0, &ret);
			if(ret !=0)
			{
				printf("QISRGetResult Failed,ret=%d\n",ret);
				break;
			}
			if (NULL != rslt)
				printf("%s\n", rslt);
		}
		if (epStatus == MSP_EP_AFTER_SPEECH)
			break;
		usleep(150000);
	}
	ret=QISRAudioWrite(sessionID, (const void *)NULL, 0, 4, &epStatus, &recStatus);
	if (ret !=0)
	{
		printf("QISRAudioWrite Failed,ret=%d\n",ret);
	}
	free(pPCM);
	pPCM = NULL;
	while (recStatus != 5 && ret == 0) {
		const char *rslt = QISRGetResult(sessionID, &recStatus, 0, &ret);
		if (NULL != rslt)
		{
			strcat(rec_result,rslt);
		}
		usleep(150000);
	}
    ret=QISRSessionEnd(sessionID, NULL);
	if(ret !=MSP_SUCCESS)
	{
		printf("QISRSessionEnd Failed, ret=%d\n",ret);
	}	printf("=============================================================\n");
	printf("The result is: %s\n",rec_result);
	printf("=============================================================\n");
	usleep(100000);
}
int run_asr(const char* grammar , const char* asrfile)
{
	int ret = MSP_SUCCESS;
	int i = 0;
	FILE* fp = NULL;
	char buff[BUFFER_NUM];
	unsigned int len;
	int status = MSP_AUDIO_SAMPLE_CONTINUE, ep_status = -1, rec_status = -1, rslt_status = -1;

	const char* param = "rst=plain,sub=asr,ssm=1,aue=speex,auf=audio/L16;rate=16000,grt=abnf";//注意sub=asr,grt=abnf
	const char* sess_id = QISRSessionBegin(grammar, param, &ret);//将语法传入QISRSessionBegin
	if ( MSP_SUCCESS != ret )
	{
		printf("QISRSessionBegin err %d\n", ret);	
		return ret;
	}

	fp = fopen( asrfile , "rb");//我们提供了几个音频文件,测试时根据需要在这里更换
	if ( NULL == fp )
	{
		printf("failed to open file,please check the file.\n");
		QISRSessionEnd(sess_id, "normal");
		return -1;
	}

	printf("writing audio...\n");
	while ( !feof(fp) )
	{
		len = (unsigned int)fread(buff, 1, BUFFER_NUM, fp);
		feof(fp) ? status = MSP_AUDIO_SAMPLE_LAST : status = MSP_AUDIO_SAMPLE_CONTINUE;//最后一块音频要使用last
		ret = QISRAudioWrite(sess_id, buff, len, status, &ep_status, &rec_status);
		if ( ret != MSP_SUCCESS )
		{
			printf("\nQISRAudioWrite err %d\n", ret);
			break;
		}

		if ( rec_status == MSP_REC_STATUS_SUCCESS )
		{
			const char* result = QISRGetResult(sess_id, &rslt_status, 0, &ret);
			if (ret != MSP_SUCCESS )
			{
				printf("error code: %d\n", ret);
				break;
			}
			else if( rslt_status == MSP_REC_STATUS_NO_MATCH )
				printf("get result nomatch\n");
			else
			{
				if ( result != NULL )
					printf("get result[%d/%d]:len:%d\n %s\n", ret, rslt_status,strlen(result), result);
			}
		}
		printf(".");
		Sleep(200);//因为是模拟录音,为了避免数据发送太快造成缓冲区溢出,所以这里暂停200ms,如果是实时录音,不必暂停
	}
	printf("\n");

	if (ret == MSP_SUCCESS)
	{	
		printf("get reuslt\n");
		char asr_result[1024] = "";
		unsigned int pos_of_result = 0;
		int loop_count = 0;
		do 
		{
			const char* result = QISRGetResult(sess_id, &rslt_status, 0, &ret);
			if ( ret != 0 )
			{
				printf("QISRGetResult err %d\n", ret);
				break;
			}

			if( rslt_status == MSP_REC_STATUS_NO_MATCH )
			{
				printf("get result nomatch\n");
			}
			else if ( result != NULL )
			{
				printf("[%d]:get result[%d/%d]: %s\n", (loop_count), ret, rslt_status, result);
				strcpy(asr_result+pos_of_result,result);
				pos_of_result += (unsigned int)strlen(result);
			}
			else
			{
				printf("[%d]:get result[%d/%d]\n",(loop_count), ret, rslt_status);
			}
			Sleep(500);
		} while (rslt_status != MSP_REC_STATUS_COMPLETE && loop_count++ < 30);
		if (strcmp(asr_result,"")==0)
		{
			printf("no result\n");
		}

	}

	QISRSessionEnd(sess_id, NULL);
	printf("QISRSessionEnd.\n");
	fclose(fp); 

	return 0;
}
Exemple #6
0
int run_asr(UserData *udata)
{
	char asr_params[MAX_PARAMS_LEN]    = {NULL};
	//const char *rec_rslt               = NULL;
	const char *session_id             = NULL;
	//const char *asr_audiof             = NULL;
	FILE *f_pcm                        = NULL;
	char *pcm_data                     = NULL;
	long pcm_count                     = 0;
	long pcm_size                      = 0;
	int last_audio                     = 0;
	int aud_stat                       = MSP_AUDIO_SAMPLE_CONTINUE;
	int ep_status                      = MSP_EP_LOOKING_FOR_SPEECH;
	int rec_status                     = MSP_REC_STATUS_INCOMPLETE;
	int rss_status                     = MSP_REC_STATUS_INCOMPLETE;
	int errcode                        = -1;

	
        if (record())
           printf("Finish recording!!\n");
        else
           {
            printf("Fail to record!\n");
            return 1;
           }
         
       // asr_audiof = filename;
	f_pcm = fopen(filename, "rb");
	if (NULL == f_pcm) {
		printf("打开\"%s\"失败![%s]\n", f_pcm, strerror(errno));
		goto run_error;
	}
	fseek(f_pcm, 0, SEEK_END);
	pcm_size = ftell(f_pcm);
	fseek(f_pcm, 0, SEEK_SET);
	pcm_data = (char *)malloc(pcm_size);
	if (NULL == pcm_data)
		goto run_error;
	fread((void *)pcm_data, pcm_size, 1, f_pcm);
	fclose(f_pcm);
	f_pcm = NULL;

	//离线语法识别参数设置
	snprintf(asr_params, MAX_PARAMS_LEN - 1, 
		"engine_type = local, \
		asr_res_path = %s, sample_rate = %d, \
		grm_build_path = %s, local_grammar = %s, \
		result_type = xml, result_encoding = utf-8, ",
		ASR_RES_PATH,
		SAMPLE_RATE_16K,
		GRM_BUILD_PATH,
		udata->grammar_id
		);
	session_id = QISRSessionBegin(NULL, asr_params, &errcode);
	if (NULL == session_id)
		goto run_error;
	printf("开始识别...\n");

	while (1) {
		unsigned int len = 6400;

		if (pcm_size < 12800) {
			len = pcm_size;
			last_audio = 1;
		}

		aud_stat = MSP_AUDIO_SAMPLE_CONTINUE;

		if (0 == pcm_count)
			aud_stat = MSP_AUDIO_SAMPLE_FIRST;

		if (len <= 0)
			break;

		printf(">");
		fflush(stdout);
		errcode = QISRAudioWrite(session_id, (const void *)&pcm_data[pcm_count], len, aud_stat, &ep_status, &rec_status);
		if (MSP_SUCCESS != errcode)
			goto run_error;

		pcm_count += (long)len;
		pcm_size -= (long)len;

		//检测到音频结束
		if (MSP_EP_AFTER_SPEECH == ep_status)
			break;

		usleep(150 * 1000); //模拟人说话时间间隙
	}
	//主动点击音频结束
	QISRAudioWrite(session_id, (const void *)NULL, 0, MSP_AUDIO_SAMPLE_LAST, &ep_status, &rec_status);

	free(pcm_data);
	pcm_data = NULL;

	//获取识别结果
	while (MSP_REC_STATUS_COMPLETE != rss_status && MSP_SUCCESS == errcode) {
		rec_rslt = QISRGetResult(session_id, &rss_status, 0, &errcode);
		usleep(150 * 1000);
	}
	printf("\n识别结束:\n");
	printf("=============================================================\n");
	if (NULL != rec_rslt)
             {
	 printf("%s\n", rec_rslt);

               if(strstr(rec_rslt,tmpt1))
                    flag1=true;
               else if (strstr(rec_rslt,tmpt2))
                    flag2=true;
             }
	else
	      printf("没有识别结果!\n");
	printf("=============================================================\n");

	goto run_exit;

run_error:
	if (NULL != pcm_data) {
		free(pcm_data);
		pcm_data = NULL;
	}
	if (NULL != f_pcm) {
		fclose(f_pcm);
		f_pcm = NULL;
	}
run_exit:
	QISRSessionEnd(session_id, NULL);
	return errcode;
}
Exemple #7
0
void run_asr(const char* audio_file, const char* params, char* grammar_id)
{
	const char*		session_id						= NULL;
	char			rec_result[BUFFER_SIZE]		 	= {'\0'};	
	char			hints[HINTS_SIZE]				= {'\0'}; //hints为结束本次会话的原因描述,由用户自定义
	unsigned int	total_len						= 0;
	int 			aud_stat 						= MSP_AUDIO_SAMPLE_CONTINUE;		//音频状态
	int 			ep_stat 						= MSP_EP_LOOKING_FOR_SPEECH;		//端点检测
	int 			rec_stat 						= MSP_REC_STATUS_SUCCESS;			//识别状态	
	int 			errcode 						= MSP_SUCCESS;

	FILE*			f_pcm 							= NULL;
	char*			p_pcm 							= NULL;
	long 			pcm_count 						= 0;
	long 			pcm_size 						= 0;
	long			read_size						= 0;

	if (NULL == audio_file)
		goto asr_exit;

	f_pcm = fopen(audio_file, "rb");
	if (NULL == f_pcm) 
	{
		printf("\nopen [%s] failed!\n", audio_file);
		goto asr_exit;
	}
	
	fseek(f_pcm, 0, SEEK_END);
	pcm_size = ftell(f_pcm); //获取音频文件大小 
	fseek(f_pcm, 0, SEEK_SET);		

	p_pcm = (char*)malloc(pcm_size);
	if (NULL == p_pcm)
	{
		printf("\nout of memory!\n");
		goto asr_exit;
	}

	read_size = fread((void *)p_pcm, 1, pcm_size, f_pcm); //读取音频文件内容
	if (read_size != pcm_size)
	{
		printf("\nread [%s] failed!\n", audio_file);
		goto asr_exit;
	}
	
	printf("\n开始语音识别 ...\n");
	session_id = QISRSessionBegin(grammar_id, params, &errcode);
	if (MSP_SUCCESS != errcode)
	{
		printf("\nQISRSessionBegin failed, error code:%d\n", errcode);
		goto asr_exit;
	}
	
	while (1) 
	{
		unsigned int len = 10 * FRAME_LEN; // 每次写入200ms音频(16k,16bit):1帧音频20ms,10帧=200ms。16k采样率的16位音频,一帧的大小为640Byte
		int ret = 0;

		if (pcm_size < 2 * len) 
			len = pcm_size;
		if (len <= 0)
			break;
		
		aud_stat = MSP_AUDIO_SAMPLE_CONTINUE;
		if (0 == pcm_count)
			aud_stat = MSP_AUDIO_SAMPLE_FIRST;
		
		printf(">");
		ret = QISRAudioWrite(session_id, (const void *)&p_pcm[pcm_count], len, aud_stat, &ep_stat, &rec_stat);
		if (MSP_SUCCESS != ret)
		{
			printf("\nQISRAudioWrite failed, error code:%d\n",ret);
			goto asr_exit;
		}
			
		pcm_count += (long)len;
		pcm_size  -= (long)len;
		
		if (MSP_EP_AFTER_SPEECH == ep_stat)
			break;
		usleep(200*1000); //模拟人说话时间间隙,10帧的音频长度为200ms
	}
	errcode = QISRAudioWrite(session_id, NULL, 0, MSP_AUDIO_SAMPLE_LAST, &ep_stat, &rec_stat);
	if (MSP_SUCCESS != errcode)
	{
		printf("\nQISRAudioWrite failed, error code:%d\n",errcode);
		goto asr_exit;	
	}

	while (MSP_REC_STATUS_COMPLETE != rec_stat) 
	{
		const char *rslt = QISRGetResult(session_id, &rec_stat, 0, &errcode);
		if (MSP_SUCCESS != errcode)
		{
			printf("\nQISRGetResult failed, error code: %d\n", errcode);
			goto asr_exit;
		}
		if (NULL != rslt)
		{
			unsigned int rslt_len = strlen(rslt);
			total_len += rslt_len;
			if (total_len >= BUFFER_SIZE)
			{
				printf("\nno enough buffer for rec_result !\n");
				goto asr_exit;
			}
			strncat(rec_result, rslt, rslt_len);
		}
		usleep(150*1000); //防止频繁占用CPU
	}
	printf("\n语音识别结束\n");
	printf("=============================================================\n");
	printf("%s",rec_result);
	printf("=============================================================\n");

asr_exit:
	if (NULL != f_pcm)
	{
		fclose(f_pcm);
		f_pcm = NULL;
	}
	if (NULL != p_pcm)
	{	
		free(p_pcm);
		p_pcm = NULL;
	}

	QISRSessionEnd(session_id, hints);
}
Exemple #8
0
void run_iat()
{
	bool error = false;
	int ret = MSP_SUCCESS;
	int i = 0;
	FILE* fp = NULL;
	FILE* fout = NULL;
	char buff[BUFFER_NUM];
	int len;
	int status = MSP_AUDIO_SAMPLE_CONTINUE, ep_status = -1, rec_status = -1, rslt_status = -1;
	///引擎初始化,只需初始化一次
	///APPID请勿随意改动
	ret = QISRInit("appid=510f2d72");

	///第二个参数为传递的参数,使用会话模式,使用speex编解码,使用16k16bit的音频数据
	///第三个参数为返回码
	const char* param = "sub=iat,ssm=1,auf=audio/L16;rate=16000,aue=speex,ent=sms16k,rst=plain";
	const char* sess_id = QISRSessionBegin(NULL, param, &ret);
	if ( MSP_SUCCESS != ret )
	{
		printf("QISRSessionBegin err %d\n", ret);	
		error = true;
	}

	///模拟录音,输入音频
	if (error == false)
	{
		fp = fopen("iat_demo_test.wav", "rb");
		if ( NULL == fp )
		{
			printf("failed to open file,please check the file.\n");
			error = true;
		}
	}

	///结果输出到文件
	if (error == false)
	{
		fout = fopen("iat_result.txt", "ab");
		if( NULL == fout )
		{
			printf("failed to open file,please check the file.\n");
			error = true;
		}
	}
	if (error == false)
	{
		printf("writing audio...\n");

		char param_value[32] = "";//参数值的字符串形式
		size_t value_len = 32;	//字符串长度或buffer长度
		int volume = 0;//音量数值

		while ( !feof(fp) )
		{
			len = fread(buff, 1, BUFFER_NUM, fp);
			printf(".");
			///开始向服务器发送音频数据
			ret = QISRAudioWrite(sess_id, buff, len, status, &ep_status, &rec_status);
			if ( ret != MSP_SUCCESS )
			{
				printf("\nQISRAudioWrite err %d\n", ret);
				error = true;
				break;
			}
			/*********获取当前发送音频的音量信息**********/
			value_len = 32;//value_len既是传入参数,又是传出参数,每次调用QTTSGetParam时要调整为buffer长度
			ret = QISRGetParam(sess_id,"volume",param_value,&value_len);//获取音量信息,获取上行流量和下行流量的例子见ttsdemo
			if ( ret != MSP_SUCCESS )
			{
				printf("QISRGetParam: qisr get param failed Error code %d.\n",ret);
				char key = _getch();
				break;
			}
			volume = atoi(param_value);//获取到的音量信息可以用于在界面上用不同的图片展示动态效果
			//printf("volume== %d \n",volume);
			for (int i=0;i<volume;i++)
			{
				printf(".");
			}
			printf("\n");
			/*******获取当前发送音频的音量信息结束**********/
			if (ep_status == MSP_EP_AFTER_SPEECH)//检测到音频后端点
			{
				printf("QISRAudioWrite: ep_status == MSP_EP_AFTER_SPEECH.\n");
				break;
			}
			
			///服务器返回部分结果
			if ( rec_status == MSP_REC_STATUS_SUCCESS )
			{
				const char* result = QISRGetResult(sess_id, &rslt_status, 0, &ret);
				if( rslt_status == MSP_REC_STATUS_NO_MATCH )
					printf("get result nomatch\n");
				else
				{
					if ( result != NULL )
						fwrite(result, 1, strlen(result), fout);
					printf("get result[%d/%d]: %s\n", ret, rslt_status, result);
				}
			}
			Sleep(200);
		}
		printf("\n");
		fclose(fp); 
	}

	///最后一块数据
	if (error == false)
	{	
		status = MSP_AUDIO_SAMPLE_LAST;
		ret = QISRAudioWrite(sess_id, buff, 1, status, &ep_status, &rec_status);
		if ( ret != MSP_SUCCESS )
		{
			printf("QISRAudioWrite write last audio err %d\n", ret);
			error = true;
		}
	}

	///最后一块数据发完之后,循环从服务器端获取结果
	///考虑到网络环境不好的情况下,需要对循环次数作限定
	if (error == false)
	{	
		printf("get reuslt\n");
		int loop_count = 0;
		do 
		{
			const char* result = QISRGetResult(sess_id, &rslt_status, 0, &ret);
			if ( ret != MSP_SUCCESS )
			{
				printf("QISRGetResult err %d\n", ret);
				error = true;
				break;
			}

			if( rslt_status == MSP_REC_STATUS_NO_MATCH )
				printf("get result nomatch\n");
			else
			{
				if ( result != NULL )
					fwrite(result, 1, strlen(result), fout);
				printf("[%d]:get result[%d/%d]: %s\n", (loop_count), ret, rslt_status, result);
			}
			Sleep(500);
		} while (rslt_status != MSP_REC_STATUS_COMPLETE && loop_count++ < 30);
	}

	if( NULL != fout )
	{
		fclose(fout);
	}

	ret = QISRSessionEnd(sess_id, NULL);
	if ( ret != MSP_SUCCESS )
	{
		printf("QISRSessionEnd err %d\n", ret);
		return;
	}
	printf("QISRSessionEnd.\n");

	ret = QISRFini();

	return;
}
Exemple #9
0
void run_iat(const char* audio_file, const char* session_begin_params)
{
	const char*		session_id					=	NULL;
	char			rec_result[BUFFER_SIZE]		=	{NULL};	
	char			hints[HINTS_SIZE]			=	{NULL}; //hints为结束本次会话的原因描述,由用户自定义
	unsigned int	total_len					=	0; 
	int				aud_stat					=	MSP_AUDIO_SAMPLE_CONTINUE ;		//音频状态
	int				ep_stat						=	MSP_EP_LOOKING_FOR_SPEECH;		//端点检测
	int				rec_stat					=	MSP_REC_STATUS_SUCCESS ;			//识别状态
	int				errcode						=	MSP_SUCCESS ;

	FILE*			f_pcm						=	NULL;
	char*			p_pcm						=	NULL;
	long			pcm_count					=	0;
	long			pcm_size					=	0;
	long			read_size					=	0;

	
	if (NULL == audio_file)
		goto iat_exit;

	f_pcm = fopen(audio_file, "rb");
	if (NULL == f_pcm) 
	{
		printf("\nopen [%s] failed! \n", audio_file);
		goto iat_exit;
	}
	
	fseek(f_pcm, 0, SEEK_END);
	pcm_size = ftell(f_pcm); //获取音频文件大小 
	fseek(f_pcm, 0, SEEK_SET);		

	p_pcm = (char *)malloc(pcm_size);
	if (NULL == p_pcm)
	{
		printf("\nout of memory! \n");
		goto iat_exit;
	}

	read_size = fread((void *)p_pcm, 1, pcm_size, f_pcm); //读取音频文件内容
	if (read_size != pcm_size)
	{
		printf("\nread [%s] error!\n", audio_file);
		goto iat_exit;
	}
	
	printf("\n开始语音听写 ...\n");
	session_id = QISRSessionBegin(NULL, session_begin_params, &errcode); //听写不需要语法,第一个参数为NULL
	if (MSP_SUCCESS != errcode)
	{
		printf("\nQISRSessionBegin failed! error code:%d\n", errcode);
		goto iat_exit;
	}
	char *p_data = p_pcm;
	int rec_number = 0;
	while (1) 
	{
		unsigned int len = 10 * FRAME_LEN; // 每次写入200ms音频(16k,16bit):1帧音频20ms,10帧=200ms。16k采样率的16位音频,一帧的大小为640Byte
		int ret = 0;

		if (pcm_size < 2 * len) 
			len = pcm_size;
		if (len <= 0)
			break;

		aud_stat = MSP_AUDIO_SAMPLE_CONTINUE;
		if (0 == pcm_count)
			aud_stat = MSP_AUDIO_SAMPLE_FIRST;

		printf(">");

		ret = QISRAudioWrite(session_id, (const void *)p_data, len, aud_stat, &ep_stat, &rec_stat);
		if (MSP_SUCCESS != ret)
		{
			printf("\nQISRAudioWrite failed! error code:%d\n", ret);
			goto iat_exit;
		}
		if (MSP_REC_STATUS_SUCCESS == rec_stat) //已经有部分听写结果
		{
			const char *rslt = QISRGetResult(session_id, &rec_stat, 0, &errcode);
			if (MSP_SUCCESS != errcode)
			{
				printf("\nQISRGetResult failed! error code: %d\n", errcode);
				goto iat_exit;
			}
			if (NULL != rslt)
			{
				unsigned int rslt_len = strlen(rslt);
				total_len += rslt_len;
				if (total_len >= BUFFER_SIZE)
				{
					printf("\nno enough buffer for rec_result !\n");
					goto iat_exit;
				}
				strncat(rec_result, rslt, rslt_len);
			}
		}
		else
		{
			//printf("还没有听写结果\n");
		}
		pcm_count += (long)len;
		//pcm_size  -= (long)len;
		p_data += (long)len;
		rec_number++;
		if (MSP_EP_AFTER_SPEECH == ep_stat)
		{
			printf("loop = %d, 检测到后端点\n", rec_number);
			break;
		}
		Sleep(200); //模拟人说话时间间隙。200ms对应10帧的音频
	}
	printf("\n语音听写结束\n");
	printf("=============================================================\n");
	printf("%s\n",rec_result);
	printf("=============================================================\n");

iat_exit:
	if (NULL != f_pcm)
	{
		fclose(f_pcm);
		f_pcm = NULL;
	}
	if (NULL != p_pcm)
	{	free(p_pcm);
		p_pcm = NULL;
	}

	QISRSessionEnd(session_id, hints);
}
void run_iat(const char* src_wav_filename , const char* des_text_filename , const char* param)
{
	bool error = false;
	int ret = MSP_SUCCESS;
	int i = 0;
	FILE* fp = NULL;
	FILE* fout = NULL;
	char buff[BUFFER_NUM];
	unsigned int len;
	int status = MSP_AUDIO_SAMPLE_CONTINUE, ep_status = -1, rec_status = -1, rslt_status = -1;

	///第二个参数为传递的参数,使用会话模式,使用speex编解码,使用16k16bit的音频数据
	///第三个参数为返回码
	const char* sess_id = QISRSessionBegin(NULL, param, &ret);
	if ( MSP_SUCCESS != ret )
	{
		printf("QISRSessionBegin err %d\n", ret);	
		error = true;
	}

	///模拟录音,输入音频
	if (error == false)
	{
		fp = fopen( src_wav_filename , "rb");
		if ( NULL == fp )
		{
			printf("failed to open file,please check the file.\n");
			error = true;
		}
	}

	///结果输出到文件
	if (error == false)
	{
		fout = fopen( des_text_filename , "ab");
		if( NULL == fout )
		{
			printf("failed to open file,please check the file.\n");
			error = true;
		}
	}
	if (error == false)
	{
		printf("writing audio...\n");

		char param_value[32] = "";//参数值的字符串形式
		unsigned int value_len = 32;	//字符串长度或buffer长度
		int volume = 0;//音量数值

		while ( !feof(fp) )
		{
			len = (unsigned int)fread(buff, 1, BUFFER_NUM, fp);
			printf(".");
			feof(fp) ? status = MSP_AUDIO_SAMPLE_LAST : status = MSP_AUDIO_SAMPLE_CONTINUE;//最后一块音频要使用last
			///开始向服务器发送音频数据
			ret = QISRAudioWrite(sess_id, buff, len, status, &ep_status, &rec_status);
			if ( ret != MSP_SUCCESS )
			{
				printf("\nQISRAudioWrite err %d\n", ret);
				error = true;
				break;
			}
			/*********获取当前发送音频的音量信息**********/
			value_len = 32;//value_len既是传入参数,又是传出参数,每次调用QTTSGetParam时要调整为buffer长度
			ret = QISRGetParam(sess_id,"volume",param_value,&value_len);//获取音量信息,获取上行流量和下行流量的例子见ttsdemo
			if ( ret != MSP_SUCCESS )
			{
				printf("QISRGetParam: qisr get param failed Error code %d.\n",ret);
				char key = getchar();
				break;
			}
			volume = atoi(param_value);//获取到的音量信息可以用于在界面上用不同的图片展示动态效果
			//printf("volume== %d \n",volume);
			for (int i=0;i<volume;i++)
			{
				printf(".");
			}
			printf("\n");
			/*******获取当前发送音频的音量信息结束**********/
			if (ep_status == MSP_EP_AFTER_SPEECH)//检测到音频后端点
			{
				printf("QISRAudioWrite: ep_status == MSP_EP_AFTER_SPEECH.\n");
				break;
			}

			///服务器返回部分结果
			if ( rec_status == MSP_REC_STATUS_SUCCESS )
			{
				const char* result = QISRGetResult(sess_id, &rslt_status, 0, &ret);
				if( rslt_status == MSP_REC_STATUS_NO_MATCH )
					printf("get result nomatch\n");
				else
				{
					if ( result != NULL )
						fwrite(result, 1, strlen(result), fout);
					printf("get result[%d/%d]: %s\n", ret, rslt_status, result);
				}
			}
			usleep(200000);//因为是模拟录音,为了避免数据发送太快造成缓冲区溢出,所以这里暂停200ms,如果是实时录音,不必暂停
		}
		printf("\n");
		fclose(fp); 
	}

	///最后一块数据发完之后,循环从服务器端获取结果
	///考虑到网络环境不好的情况下,可以对循环次数作限定
	if (error == false)
	{	
		printf("get reuslt\n");
		int loop_count = 0;
		do 
		{
			const char* result = QISRGetResult(sess_id, &rslt_status, 0, &ret);
			if ( ret != MSP_SUCCESS )
			{
				printf("QISRGetResult err %d\n", ret);
				error = true;
				break;
			}

			if( rslt_status == MSP_REC_STATUS_NO_MATCH )
				printf("get result nomatch\n");
			else
			{
				if ( result != NULL )
					fwrite(result, 1, strlen(result), fout);
				printf("[%d]:get result[%d/%d]: %s\n", (loop_count), ret, rslt_status, result);
			}
			usleep(500000);
		} while (rslt_status != MSP_REC_STATUS_COMPLETE && loop_count++ < 30);
	}

	if( NULL != fout )
	{
		const char* result = "\r\n";
		fwrite(result, 1, strlen(result), fout);
		fclose(fout);
	}

	ret = QISRSessionEnd(sess_id, NULL);
	if ( ret != MSP_SUCCESS )
	{
		printf("QISRSessionEnd err %d\n", ret);
		return;
	}
	printf("QISRSessionEnd.\n");
	return;
}
Exemple #11
0
/*! function to open the asr interface */
static switch_status_t pocketsphinx_asr_open(switch_asr_handle_t *ah, const char *codec, int rate, const char *dest, switch_asr_flag_t *flags)
{
	pocketsphinx_t *ps;
    
    

    const char* session_begin_params	=	"sub = iat, domain = iat, language = zh_ch, accent = mandarin, sample_rate = 16000, result_type = plain, result_encoding = utf8";   
    int errcode;
    
    /* chen */
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, ">>>>>>>>pocketsphinx_asr_open<<<<<<<<<\n");


	if (!(ps = (pocketsphinx_t *) switch_core_alloc(ah->memory_pool, sizeof(*ps)))) {
		return SWITCH_STATUS_MEMERR;
	}

	switch_mutex_init(&ps->flag_mutex, SWITCH_MUTEX_NESTED, ah->memory_pool);
	ah->private_info = ps;

	if (rate == 8000) {
		ah->rate = 8000;
	} else if (rate == 16000) {
		ah->rate = 16000;
	} else {
		switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid rate %d. Only 8000 and 16000 are supported.\n", rate);
	}

	codec = "L16";

	ah->codec = switch_core_strdup(ah->memory_pool, codec);


	ps->thresh = globals.thresh;
	ps->silence_hits = globals.silence_hits;
	ps->listen_hits = globals.listen_hits;
	ps->org_silence_hits = ps->silence_hits;
	ps->start_input_timers = globals.start_input_timers;
	ps->no_input_timeout = globals.no_input_timeout;
	ps->speech_timeout = globals.speech_timeout;
	ps->confidence_threshold = globals.confidence_threshold;
    
    
    /* ify login*/
        ps->ifly_session_id					=	NULL;
	//ps->ifly_rec_result		=	{NULL};	
	//ps->ifly_hints			=	{NULL}; //hints为结束本次会话的原因描述,由用户自定义
	ps->ifly_total_len					=	0; 
	ps->ifly_aud_stat					=	MSP_AUDIO_SAMPLE_CONTINUE ;		//音频状态
	ps->ifly_ep_stat						=	MSP_EP_LOOKING_FOR_SPEECH;		//端点检测
	ps->ifly_rec_stat					=	MSP_REC_STATUS_SUCCESS ;			//识别状态
    ps->ifly_wait_result = SWITCH_FALSE;
    
    
    

    
    
    /* ifly open session */
    ps->ifly_session_id = QISRSessionBegin(NULL, session_begin_params, &errcode);
    if (MSP_SUCCESS != errcode) {
        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, ">>>>>>>>QISRSessionBegin fail error !!!<<<<<<<<<\n");   
    }
    

    
	return SWITCH_STATUS_SUCCESS;
}