static VALUE ifly_asr_iat(VALUE klass, VALUE fileName) { char *audioFileName = StringValue(fileName); int ret = 0, len, status = 2, ep_status = 0, rec_status = 0, rslt_status = 0, i = 0; char buff[kBufferSize], text[kBufferSize]; FILE *fin; ret = QISRInit("appid=50ebc3c9,vad_enable=0"); if (ret != 0) { rb_raise(rb_eRuntimeError, "ISR Initialize Error"); } const char *sess_id = QISRSessionBegin("", "ssm=1,sub=iat,auf=audio/L16;rate=8000,aue=amr;-1,ent=sms8k,rst=plain,rse=utf8", &ret); if (ret != 0){ rb_raise(rb_eRuntimeError, "ISR Session Begin Error"); } fin = fopen(audioFileName, "rb"); if (!fin) { rb_raise(rb_eRuntimeError, "ISR Can not open audio input file"); } while(!feof(fin)) { len = fread(buff, 1, kBufferSize, fin); ret = QISRAudioWrite(sess_id, buff, len, status, &ep_status, &rec_status); if (ret != 0) break; usleep(200000); } fclose(fin); status = 4; ret = QISRAudioWrite(sess_id, buff, 1, status, &ep_status, &rec_status); if (ret != 0) { rb_raise(rb_eRuntimeError, "ISR Audio Last Frame Write Error"); } do { const char *result = QISRGetResult(sess_id, &rslt_status, 0, &ret); if (ret != 0) break; if (rslt_status != 1 && result) strcat(text, result); usleep(500000); } while (rslt_status != 5 && ++i < 30); QISRSessionEnd(sess_id, NULL); QISRFini(); return rb_str_new(text, strlen(text)); }
int IflySTT::open() { // sub: 请求业务类型 // domain: 领域 // language: 语言 // accent: 方言 // sample_rate: 音频采样率 // result_type: 识别结果格式 // result_encoding: 结果编码格式 // 详细参数说明请参阅《iFlytek MSC Reference Manual》 int ret = MSP_SUCCESS; int errcode = MSP_SUCCESS; const char* login_params = "appid = 5743ed12, work_dir = ."; // 登录参数,appid与msc库绑定,请勿随意改动 const char* session_begin_params = "sub = iat, domain = iat, language = zh_ch, accent = mandarin, sample_rate = 1411000, result_type = plain, result_encoding = gb2312"; // 用户登录 ret = MSPLogin(NULL, NULL, login_params); // 第一个参数是用户名,第二个参数是密码,均传NULL即可,第三个参数是登录参数 if (MSP_SUCCESS != ret) { return ret; } session_id_ = QISRSessionBegin(NULL, session_begin_params, &errcode); // 听写不需要语法,第一个参数为NULL if (MSP_SUCCESS != errcode) { MSPLogout(); return errcode; } run_ = true; _beginthread(iflystt_get_text_thread, 0, this); return 0; }
void run_iat( const char* session_begin_params) { const char* session_id = NULL; char rec_result[BUFFER_SIZE] = {NULL}; char hints[HINTS_SIZE] = {NULL}; //hints为结束本次会话的原因描述,由用户自定义 unsigned int total_len = 0; int aud_stat = MSP_AUDIO_SAMPLE_CONTINUE ; //音频状态 int ep_stat = MSP_EP_LOOKING_FOR_SPEECH; //端点检测 int rec_stat = MSP_REC_STATUS_SUCCESS ; //识别状态 int errcode = MSP_SUCCESS ; FILE* f_pcm = NULL; char* p_pcm = NULL; long pcm_count = 0; long pcm_size = 0; long read_size = 0; #if 0 char *file_name = "wav/0003.pcm"; if (NULL == file_name) goto iat_exit; f_pcm = fopen(file_name, "rb"); if (NULL == f_pcm) { printf("\nopen [%s] failed! \n", file_name); goto iat_exit; } fseek(f_pcm, 0, SEEK_END); pcm_size = ftell(f_pcm); //获取音频文件大小 fseek(f_pcm, 0, SEEK_SET); p_pcm = (char *)malloc(pcm_size); if (NULL == p_pcm) { printf("\nout of memory! \n"); goto iat_exit; } read_size = fread((void *)p_pcm, 1, pcm_size, f_pcm); //读取音频文件内容 if (read_size != pcm_size) { printf("\nread [%s] error!\n", file_name); goto iat_exit; } printf("\n开始语音听写 ...\n"); session_id = QISRSessionBegin(NULL, session_begin_params, &errcode); //听写不需要语法,第一个参数为NULL if (MSP_SUCCESS != errcode) { printf("\nQISRSessionBegin failed! error code:%d\n", errcode); goto iat_exit; } char *p_data = p_pcm; #else WSADATA wsaData; WORD sockVersion = MAKEWORD(2,2); if(WSAStartup(sockVersion, &wsaData) != 0) return; SOCKET serSocket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if(serSocket == INVALID_SOCKET) { printf("socket failed \n"); return ; } sockaddr_in serAddr; serAddr.sin_family = AF_INET; serAddr.sin_port = htons(port); serAddr.sin_addr.S_un.S_addr = INADDR_ANY; if(bind(serSocket, (LPSOCKADDR)&serAddr, sizeof(serAddr)) == SOCKET_ERROR) { printf("bind error!\n"); return ; } printf("服务端开启\n"); //开始监听 if(listen(serSocket, 5) == SOCKET_ERROR) { printf("listen error !"); return ; } SOCKET sClient; sockaddr_in remoteAddr; int nAddrLen = sizeof(remoteAddr); sClient = accept(serSocket, (SOCKADDR *)&remoteAddr, &nAddrLen); if(sClient == INVALID_SOCKET) { printf("accept error !"); } printf("接受到一个连接:%s \r\n", inet_ntoa(remoteAddr.sin_addr)); unsigned int len = 10 * FRAME_LEN; // 每次写入200ms音频(16k,16bit):1帧音频20ms,10帧=200ms。16k采样率的16位音频,一帧的大小为640Byte char *space_buffer = (char*)malloc(SPACE_SIZE * len); #endif char *p_data = space_buffer; char *p_data2 = space_buffer; int rec_number = 0; printf("\n开始语音听写 ...\n"); session_id = QISRSessionBegin(NULL, session_begin_params, &errcode); //听写不需要语法,第一个参数为NULL if (MSP_SUCCESS != errcode) { printf("\nQISRSessionBegin failed! error code:%d\n", errcode); goto iat_exit; } while (1) { int ret = 0; #if 0 if (pcm_size < 2 * len) len = pcm_size; #endif if (len <= 0) break; aud_stat = MSP_AUDIO_SAMPLE_CONTINUE; if (0 == pcm_count) aud_stat = MSP_AUDIO_SAMPLE_FIRST; printf(">"); int total_recv = 0; int recvlen = 0; while (total_recv < len) { recvlen = recv(sClient, p_data, len - total_recv, 0); total_recv += recvlen; p_data += recvlen; } //printf("recv size %d \n", total_recv); ret = QISRAudioWrite(session_id, (const void *)p_data2, len, aud_stat, &ep_stat, &rec_stat); if (MSP_SUCCESS != ret) { printf("\nQISRAudioWrite failed! error code:%d\n", ret); goto iat_exit; } if (MSP_REC_STATUS_SUCCESS == rec_stat) //已经有部分听写结果 { printf("识别出结果了 %d\n", rec_number); const char *rslt = QISRGetResult(session_id, &rec_stat, 0, &errcode); if (MSP_SUCCESS != errcode) { printf("\nQISRGetResult failed! error code: %d\n", errcode); goto iat_exit; } if (NULL != rslt) { unsigned int rslt_len = strlen(rslt); total_len += rslt_len; if (total_len >= BUFFER_SIZE) { printf("\nno enough buffer for rec_result !\n"); goto iat_exit; } strncat(rec_result, rslt, rslt_len); printf("%s\n",rec_result); } } else { printf("还没有听写结果 %d\n", rec_number); } pcm_count += (long)len; //pcm_size -= (long)len; p_data2 += (long)len; if (MSP_EP_AFTER_SPEECH == ep_stat) { printf("loop = %d, 检测到后端点\n", rec_number); break; } rec_number++; Sleep(200); //模拟人说话时间间隙。200ms对应10帧的音频 } printf("\n语音听写结束\n"); printf("=============================================================\n"); printf("%s\n",rec_result); printf("=============================================================\n"); iat_exit: if (NULL != f_pcm) { fclose(f_pcm); f_pcm = NULL; } if (NULL != p_pcm) { free(p_pcm); p_pcm = NULL; } QISRSessionEnd(session_id, hints); }
int run_asr(const char* asrfile , const char* param , const char* grammar) { char rec_result[1024*4] = {0}; const char *sessionID; FILE *f_pcm = NULL; char *pPCM = NULL; int lastAudio = 0 ; int audStat = 2 ; int epStatus = 0; int recStatus = 0 ; long pcmCount = 0; long pcmSize = 0; int ret = 0 ; sessionID = QISRSessionBegin(NULL, param, &ret); //asr if(ret !=0) { printf("QISRSessionBegin Failed,ret=%d\n",ret); } ret = QISRGrammarActivate(sessionID, grammar, NULL, 0);//可以选择在QISRSessionBegin第一个参数传入grammar,亦可通过QISRGrammarActivate激活语法,可以多次调用QISRGrammarActivate,激活多个语法。 if(ret !=0) { printf("QISRGrammarActivate Failed,ret=%d\n",ret); } f_pcm = fopen(asrfile, "rb"); if (NULL != f_pcm) { fseek(f_pcm, 0, SEEK_END); pcmSize = ftell(f_pcm); fseek(f_pcm, 0, SEEK_SET); pPCM = (char *)malloc(pcmSize); fread((void *)pPCM, pcmSize, 1, f_pcm); fclose(f_pcm); f_pcm = NULL; } while (1) { unsigned int len = 6400; if (pcmSize < 12800) { len = pcmSize; lastAudio = 1; } audStat = 2; if (pcmCount == 0) audStat = 1; if (0) { if (audStat == 1) audStat = 5; else audStat = 4; } if (len<=0) { break; } printf("csid=%s,count=%d,aus=%d,",sessionID,pcmCount/len,audStat); ret = QISRAudioWrite(sessionID, (const void *)&pPCM[pcmCount], len, audStat, &epStatus, &recStatus); printf("eps=%d,rss=%d,ret=%d\n",epStatus,recStatus,ret); if (ret != 0) break; pcmCount += (long)len; pcmSize -= (long)len; if (recStatus == 0) { const char *rslt = QISRGetResult(sessionID, &recStatus, 0, &ret); if(ret !=0) { printf("QISRGetResult Failed,ret=%d\n",ret); break; } if (NULL != rslt) printf("%s\n", rslt); } if (epStatus == MSP_EP_AFTER_SPEECH) break; usleep(150000); } ret=QISRAudioWrite(sessionID, (const void *)NULL, 0, 4, &epStatus, &recStatus); if (ret !=0) { printf("QISRAudioWrite Failed,ret=%d\n",ret); } free(pPCM); pPCM = NULL; while (recStatus != 5 && ret == 0) { const char *rslt = QISRGetResult(sessionID, &recStatus, 0, &ret); if (NULL != rslt) { strcat(rec_result,rslt); } usleep(150000); } ret=QISRSessionEnd(sessionID, NULL); if(ret !=MSP_SUCCESS) { printf("QISRSessionEnd Failed, ret=%d\n",ret); } printf("=============================================================\n"); printf("The result is: %s\n",rec_result); printf("=============================================================\n"); usleep(100000); }
int run_asr(const char* grammar , const char* asrfile) { int ret = MSP_SUCCESS; int i = 0; FILE* fp = NULL; char buff[BUFFER_NUM]; unsigned int len; int status = MSP_AUDIO_SAMPLE_CONTINUE, ep_status = -1, rec_status = -1, rslt_status = -1; const char* param = "rst=plain,sub=asr,ssm=1,aue=speex,auf=audio/L16;rate=16000,grt=abnf";//注意sub=asr,grt=abnf const char* sess_id = QISRSessionBegin(grammar, param, &ret);//将语法传入QISRSessionBegin if ( MSP_SUCCESS != ret ) { printf("QISRSessionBegin err %d\n", ret); return ret; } fp = fopen( asrfile , "rb");//我们提供了几个音频文件,测试时根据需要在这里更换 if ( NULL == fp ) { printf("failed to open file,please check the file.\n"); QISRSessionEnd(sess_id, "normal"); return -1; } printf("writing audio...\n"); while ( !feof(fp) ) { len = (unsigned int)fread(buff, 1, BUFFER_NUM, fp); feof(fp) ? status = MSP_AUDIO_SAMPLE_LAST : status = MSP_AUDIO_SAMPLE_CONTINUE;//最后一块音频要使用last ret = QISRAudioWrite(sess_id, buff, len, status, &ep_status, &rec_status); if ( ret != MSP_SUCCESS ) { printf("\nQISRAudioWrite err %d\n", ret); break; } if ( rec_status == MSP_REC_STATUS_SUCCESS ) { const char* result = QISRGetResult(sess_id, &rslt_status, 0, &ret); if (ret != MSP_SUCCESS ) { printf("error code: %d\n", ret); break; } else if( rslt_status == MSP_REC_STATUS_NO_MATCH ) printf("get result nomatch\n"); else { if ( result != NULL ) printf("get result[%d/%d]:len:%d\n %s\n", ret, rslt_status,strlen(result), result); } } printf("."); Sleep(200);//因为是模拟录音,为了避免数据发送太快造成缓冲区溢出,所以这里暂停200ms,如果是实时录音,不必暂停 } printf("\n"); if (ret == MSP_SUCCESS) { printf("get reuslt\n"); char asr_result[1024] = ""; unsigned int pos_of_result = 0; int loop_count = 0; do { const char* result = QISRGetResult(sess_id, &rslt_status, 0, &ret); if ( ret != 0 ) { printf("QISRGetResult err %d\n", ret); break; } if( rslt_status == MSP_REC_STATUS_NO_MATCH ) { printf("get result nomatch\n"); } else if ( result != NULL ) { printf("[%d]:get result[%d/%d]: %s\n", (loop_count), ret, rslt_status, result); strcpy(asr_result+pos_of_result,result); pos_of_result += (unsigned int)strlen(result); } else { printf("[%d]:get result[%d/%d]\n",(loop_count), ret, rslt_status); } Sleep(500); } while (rslt_status != MSP_REC_STATUS_COMPLETE && loop_count++ < 30); if (strcmp(asr_result,"")==0) { printf("no result\n"); } } QISRSessionEnd(sess_id, NULL); printf("QISRSessionEnd.\n"); fclose(fp); return 0; }
int run_asr(UserData *udata) { char asr_params[MAX_PARAMS_LEN] = {NULL}; //const char *rec_rslt = NULL; const char *session_id = NULL; //const char *asr_audiof = NULL; FILE *f_pcm = NULL; char *pcm_data = NULL; long pcm_count = 0; long pcm_size = 0; int last_audio = 0; int aud_stat = MSP_AUDIO_SAMPLE_CONTINUE; int ep_status = MSP_EP_LOOKING_FOR_SPEECH; int rec_status = MSP_REC_STATUS_INCOMPLETE; int rss_status = MSP_REC_STATUS_INCOMPLETE; int errcode = -1; if (record()) printf("Finish recording!!\n"); else { printf("Fail to record!\n"); return 1; } // asr_audiof = filename; f_pcm = fopen(filename, "rb"); if (NULL == f_pcm) { printf("打开\"%s\"失败![%s]\n", f_pcm, strerror(errno)); goto run_error; } fseek(f_pcm, 0, SEEK_END); pcm_size = ftell(f_pcm); fseek(f_pcm, 0, SEEK_SET); pcm_data = (char *)malloc(pcm_size); if (NULL == pcm_data) goto run_error; fread((void *)pcm_data, pcm_size, 1, f_pcm); fclose(f_pcm); f_pcm = NULL; //离线语法识别参数设置 snprintf(asr_params, MAX_PARAMS_LEN - 1, "engine_type = local, \ asr_res_path = %s, sample_rate = %d, \ grm_build_path = %s, local_grammar = %s, \ result_type = xml, result_encoding = utf-8, ", ASR_RES_PATH, SAMPLE_RATE_16K, GRM_BUILD_PATH, udata->grammar_id ); session_id = QISRSessionBegin(NULL, asr_params, &errcode); if (NULL == session_id) goto run_error; printf("开始识别...\n"); while (1) { unsigned int len = 6400; if (pcm_size < 12800) { len = pcm_size; last_audio = 1; } aud_stat = MSP_AUDIO_SAMPLE_CONTINUE; if (0 == pcm_count) aud_stat = MSP_AUDIO_SAMPLE_FIRST; if (len <= 0) break; printf(">"); fflush(stdout); errcode = QISRAudioWrite(session_id, (const void *)&pcm_data[pcm_count], len, aud_stat, &ep_status, &rec_status); if (MSP_SUCCESS != errcode) goto run_error; pcm_count += (long)len; pcm_size -= (long)len; //检测到音频结束 if (MSP_EP_AFTER_SPEECH == ep_status) break; usleep(150 * 1000); //模拟人说话时间间隙 } //主动点击音频结束 QISRAudioWrite(session_id, (const void *)NULL, 0, MSP_AUDIO_SAMPLE_LAST, &ep_status, &rec_status); free(pcm_data); pcm_data = NULL; //获取识别结果 while (MSP_REC_STATUS_COMPLETE != rss_status && MSP_SUCCESS == errcode) { rec_rslt = QISRGetResult(session_id, &rss_status, 0, &errcode); usleep(150 * 1000); } printf("\n识别结束:\n"); printf("=============================================================\n"); if (NULL != rec_rslt) { printf("%s\n", rec_rslt); if(strstr(rec_rslt,tmpt1)) flag1=true; else if (strstr(rec_rslt,tmpt2)) flag2=true; } else printf("没有识别结果!\n"); printf("=============================================================\n"); goto run_exit; run_error: if (NULL != pcm_data) { free(pcm_data); pcm_data = NULL; } if (NULL != f_pcm) { fclose(f_pcm); f_pcm = NULL; } run_exit: QISRSessionEnd(session_id, NULL); return errcode; }
void run_asr(const char* audio_file, const char* params, char* grammar_id) { const char* session_id = NULL; char rec_result[BUFFER_SIZE] = {'\0'}; char hints[HINTS_SIZE] = {'\0'}; //hints为结束本次会话的原因描述,由用户自定义 unsigned int total_len = 0; int aud_stat = MSP_AUDIO_SAMPLE_CONTINUE; //音频状态 int ep_stat = MSP_EP_LOOKING_FOR_SPEECH; //端点检测 int rec_stat = MSP_REC_STATUS_SUCCESS; //识别状态 int errcode = MSP_SUCCESS; FILE* f_pcm = NULL; char* p_pcm = NULL; long pcm_count = 0; long pcm_size = 0; long read_size = 0; if (NULL == audio_file) goto asr_exit; f_pcm = fopen(audio_file, "rb"); if (NULL == f_pcm) { printf("\nopen [%s] failed!\n", audio_file); goto asr_exit; } fseek(f_pcm, 0, SEEK_END); pcm_size = ftell(f_pcm); //获取音频文件大小 fseek(f_pcm, 0, SEEK_SET); p_pcm = (char*)malloc(pcm_size); if (NULL == p_pcm) { printf("\nout of memory!\n"); goto asr_exit; } read_size = fread((void *)p_pcm, 1, pcm_size, f_pcm); //读取音频文件内容 if (read_size != pcm_size) { printf("\nread [%s] failed!\n", audio_file); goto asr_exit; } printf("\n开始语音识别 ...\n"); session_id = QISRSessionBegin(grammar_id, params, &errcode); if (MSP_SUCCESS != errcode) { printf("\nQISRSessionBegin failed, error code:%d\n", errcode); goto asr_exit; } while (1) { unsigned int len = 10 * FRAME_LEN; // 每次写入200ms音频(16k,16bit):1帧音频20ms,10帧=200ms。16k采样率的16位音频,一帧的大小为640Byte int ret = 0; if (pcm_size < 2 * len) len = pcm_size; if (len <= 0) break; aud_stat = MSP_AUDIO_SAMPLE_CONTINUE; if (0 == pcm_count) aud_stat = MSP_AUDIO_SAMPLE_FIRST; printf(">"); ret = QISRAudioWrite(session_id, (const void *)&p_pcm[pcm_count], len, aud_stat, &ep_stat, &rec_stat); if (MSP_SUCCESS != ret) { printf("\nQISRAudioWrite failed, error code:%d\n",ret); goto asr_exit; } pcm_count += (long)len; pcm_size -= (long)len; if (MSP_EP_AFTER_SPEECH == ep_stat) break; usleep(200*1000); //模拟人说话时间间隙,10帧的音频长度为200ms } errcode = QISRAudioWrite(session_id, NULL, 0, MSP_AUDIO_SAMPLE_LAST, &ep_stat, &rec_stat); if (MSP_SUCCESS != errcode) { printf("\nQISRAudioWrite failed, error code:%d\n",errcode); goto asr_exit; } while (MSP_REC_STATUS_COMPLETE != rec_stat) { const char *rslt = QISRGetResult(session_id, &rec_stat, 0, &errcode); if (MSP_SUCCESS != errcode) { printf("\nQISRGetResult failed, error code: %d\n", errcode); goto asr_exit; } if (NULL != rslt) { unsigned int rslt_len = strlen(rslt); total_len += rslt_len; if (total_len >= BUFFER_SIZE) { printf("\nno enough buffer for rec_result !\n"); goto asr_exit; } strncat(rec_result, rslt, rslt_len); } usleep(150*1000); //防止频繁占用CPU } printf("\n语音识别结束\n"); printf("=============================================================\n"); printf("%s",rec_result); printf("=============================================================\n"); asr_exit: if (NULL != f_pcm) { fclose(f_pcm); f_pcm = NULL; } if (NULL != p_pcm) { free(p_pcm); p_pcm = NULL; } QISRSessionEnd(session_id, hints); }
void run_iat() { bool error = false; int ret = MSP_SUCCESS; int i = 0; FILE* fp = NULL; FILE* fout = NULL; char buff[BUFFER_NUM]; int len; int status = MSP_AUDIO_SAMPLE_CONTINUE, ep_status = -1, rec_status = -1, rslt_status = -1; ///引擎初始化,只需初始化一次 ///APPID请勿随意改动 ret = QISRInit("appid=510f2d72"); ///第二个参数为传递的参数,使用会话模式,使用speex编解码,使用16k16bit的音频数据 ///第三个参数为返回码 const char* param = "sub=iat,ssm=1,auf=audio/L16;rate=16000,aue=speex,ent=sms16k,rst=plain"; const char* sess_id = QISRSessionBegin(NULL, param, &ret); if ( MSP_SUCCESS != ret ) { printf("QISRSessionBegin err %d\n", ret); error = true; } ///模拟录音,输入音频 if (error == false) { fp = fopen("iat_demo_test.wav", "rb"); if ( NULL == fp ) { printf("failed to open file,please check the file.\n"); error = true; } } ///结果输出到文件 if (error == false) { fout = fopen("iat_result.txt", "ab"); if( NULL == fout ) { printf("failed to open file,please check the file.\n"); error = true; } } if (error == false) { printf("writing audio...\n"); char param_value[32] = "";//参数值的字符串形式 size_t value_len = 32; //字符串长度或buffer长度 int volume = 0;//音量数值 while ( !feof(fp) ) { len = fread(buff, 1, BUFFER_NUM, fp); printf("."); ///开始向服务器发送音频数据 ret = QISRAudioWrite(sess_id, buff, len, status, &ep_status, &rec_status); if ( ret != MSP_SUCCESS ) { printf("\nQISRAudioWrite err %d\n", ret); error = true; break; } /*********获取当前发送音频的音量信息**********/ value_len = 32;//value_len既是传入参数,又是传出参数,每次调用QTTSGetParam时要调整为buffer长度 ret = QISRGetParam(sess_id,"volume",param_value,&value_len);//获取音量信息,获取上行流量和下行流量的例子见ttsdemo if ( ret != MSP_SUCCESS ) { printf("QISRGetParam: qisr get param failed Error code %d.\n",ret); char key = _getch(); break; } volume = atoi(param_value);//获取到的音量信息可以用于在界面上用不同的图片展示动态效果 //printf("volume== %d \n",volume); for (int i=0;i<volume;i++) { printf("."); } printf("\n"); /*******获取当前发送音频的音量信息结束**********/ if (ep_status == MSP_EP_AFTER_SPEECH)//检测到音频后端点 { printf("QISRAudioWrite: ep_status == MSP_EP_AFTER_SPEECH.\n"); break; } ///服务器返回部分结果 if ( rec_status == MSP_REC_STATUS_SUCCESS ) { const char* result = QISRGetResult(sess_id, &rslt_status, 0, &ret); if( rslt_status == MSP_REC_STATUS_NO_MATCH ) printf("get result nomatch\n"); else { if ( result != NULL ) fwrite(result, 1, strlen(result), fout); printf("get result[%d/%d]: %s\n", ret, rslt_status, result); } } Sleep(200); } printf("\n"); fclose(fp); } ///最后一块数据 if (error == false) { status = MSP_AUDIO_SAMPLE_LAST; ret = QISRAudioWrite(sess_id, buff, 1, status, &ep_status, &rec_status); if ( ret != MSP_SUCCESS ) { printf("QISRAudioWrite write last audio err %d\n", ret); error = true; } } ///最后一块数据发完之后,循环从服务器端获取结果 ///考虑到网络环境不好的情况下,需要对循环次数作限定 if (error == false) { printf("get reuslt\n"); int loop_count = 0; do { const char* result = QISRGetResult(sess_id, &rslt_status, 0, &ret); if ( ret != MSP_SUCCESS ) { printf("QISRGetResult err %d\n", ret); error = true; break; } if( rslt_status == MSP_REC_STATUS_NO_MATCH ) printf("get result nomatch\n"); else { if ( result != NULL ) fwrite(result, 1, strlen(result), fout); printf("[%d]:get result[%d/%d]: %s\n", (loop_count), ret, rslt_status, result); } Sleep(500); } while (rslt_status != MSP_REC_STATUS_COMPLETE && loop_count++ < 30); } if( NULL != fout ) { fclose(fout); } ret = QISRSessionEnd(sess_id, NULL); if ( ret != MSP_SUCCESS ) { printf("QISRSessionEnd err %d\n", ret); return; } printf("QISRSessionEnd.\n"); ret = QISRFini(); return; }
void run_iat(const char* audio_file, const char* session_begin_params) { const char* session_id = NULL; char rec_result[BUFFER_SIZE] = {NULL}; char hints[HINTS_SIZE] = {NULL}; //hints为结束本次会话的原因描述,由用户自定义 unsigned int total_len = 0; int aud_stat = MSP_AUDIO_SAMPLE_CONTINUE ; //音频状态 int ep_stat = MSP_EP_LOOKING_FOR_SPEECH; //端点检测 int rec_stat = MSP_REC_STATUS_SUCCESS ; //识别状态 int errcode = MSP_SUCCESS ; FILE* f_pcm = NULL; char* p_pcm = NULL; long pcm_count = 0; long pcm_size = 0; long read_size = 0; if (NULL == audio_file) goto iat_exit; f_pcm = fopen(audio_file, "rb"); if (NULL == f_pcm) { printf("\nopen [%s] failed! \n", audio_file); goto iat_exit; } fseek(f_pcm, 0, SEEK_END); pcm_size = ftell(f_pcm); //获取音频文件大小 fseek(f_pcm, 0, SEEK_SET); p_pcm = (char *)malloc(pcm_size); if (NULL == p_pcm) { printf("\nout of memory! \n"); goto iat_exit; } read_size = fread((void *)p_pcm, 1, pcm_size, f_pcm); //读取音频文件内容 if (read_size != pcm_size) { printf("\nread [%s] error!\n", audio_file); goto iat_exit; } printf("\n开始语音听写 ...\n"); session_id = QISRSessionBegin(NULL, session_begin_params, &errcode); //听写不需要语法,第一个参数为NULL if (MSP_SUCCESS != errcode) { printf("\nQISRSessionBegin failed! error code:%d\n", errcode); goto iat_exit; } char *p_data = p_pcm; int rec_number = 0; while (1) { unsigned int len = 10 * FRAME_LEN; // 每次写入200ms音频(16k,16bit):1帧音频20ms,10帧=200ms。16k采样率的16位音频,一帧的大小为640Byte int ret = 0; if (pcm_size < 2 * len) len = pcm_size; if (len <= 0) break; aud_stat = MSP_AUDIO_SAMPLE_CONTINUE; if (0 == pcm_count) aud_stat = MSP_AUDIO_SAMPLE_FIRST; printf(">"); ret = QISRAudioWrite(session_id, (const void *)p_data, len, aud_stat, &ep_stat, &rec_stat); if (MSP_SUCCESS != ret) { printf("\nQISRAudioWrite failed! error code:%d\n", ret); goto iat_exit; } if (MSP_REC_STATUS_SUCCESS == rec_stat) //已经有部分听写结果 { const char *rslt = QISRGetResult(session_id, &rec_stat, 0, &errcode); if (MSP_SUCCESS != errcode) { printf("\nQISRGetResult failed! error code: %d\n", errcode); goto iat_exit; } if (NULL != rslt) { unsigned int rslt_len = strlen(rslt); total_len += rslt_len; if (total_len >= BUFFER_SIZE) { printf("\nno enough buffer for rec_result !\n"); goto iat_exit; } strncat(rec_result, rslt, rslt_len); } } else { //printf("还没有听写结果\n"); } pcm_count += (long)len; //pcm_size -= (long)len; p_data += (long)len; rec_number++; if (MSP_EP_AFTER_SPEECH == ep_stat) { printf("loop = %d, 检测到后端点\n", rec_number); break; } Sleep(200); //模拟人说话时间间隙。200ms对应10帧的音频 } printf("\n语音听写结束\n"); printf("=============================================================\n"); printf("%s\n",rec_result); printf("=============================================================\n"); iat_exit: if (NULL != f_pcm) { fclose(f_pcm); f_pcm = NULL; } if (NULL != p_pcm) { free(p_pcm); p_pcm = NULL; } QISRSessionEnd(session_id, hints); }
void run_iat(const char* src_wav_filename , const char* des_text_filename , const char* param) { bool error = false; int ret = MSP_SUCCESS; int i = 0; FILE* fp = NULL; FILE* fout = NULL; char buff[BUFFER_NUM]; unsigned int len; int status = MSP_AUDIO_SAMPLE_CONTINUE, ep_status = -1, rec_status = -1, rslt_status = -1; ///第二个参数为传递的参数,使用会话模式,使用speex编解码,使用16k16bit的音频数据 ///第三个参数为返回码 const char* sess_id = QISRSessionBegin(NULL, param, &ret); if ( MSP_SUCCESS != ret ) { printf("QISRSessionBegin err %d\n", ret); error = true; } ///模拟录音,输入音频 if (error == false) { fp = fopen( src_wav_filename , "rb"); if ( NULL == fp ) { printf("failed to open file,please check the file.\n"); error = true; } } ///结果输出到文件 if (error == false) { fout = fopen( des_text_filename , "ab"); if( NULL == fout ) { printf("failed to open file,please check the file.\n"); error = true; } } if (error == false) { printf("writing audio...\n"); char param_value[32] = "";//参数值的字符串形式 unsigned int value_len = 32; //字符串长度或buffer长度 int volume = 0;//音量数值 while ( !feof(fp) ) { len = (unsigned int)fread(buff, 1, BUFFER_NUM, fp); printf("."); feof(fp) ? status = MSP_AUDIO_SAMPLE_LAST : status = MSP_AUDIO_SAMPLE_CONTINUE;//最后一块音频要使用last ///开始向服务器发送音频数据 ret = QISRAudioWrite(sess_id, buff, len, status, &ep_status, &rec_status); if ( ret != MSP_SUCCESS ) { printf("\nQISRAudioWrite err %d\n", ret); error = true; break; } /*********获取当前发送音频的音量信息**********/ value_len = 32;//value_len既是传入参数,又是传出参数,每次调用QTTSGetParam时要调整为buffer长度 ret = QISRGetParam(sess_id,"volume",param_value,&value_len);//获取音量信息,获取上行流量和下行流量的例子见ttsdemo if ( ret != MSP_SUCCESS ) { printf("QISRGetParam: qisr get param failed Error code %d.\n",ret); char key = getchar(); break; } volume = atoi(param_value);//获取到的音量信息可以用于在界面上用不同的图片展示动态效果 //printf("volume== %d \n",volume); for (int i=0;i<volume;i++) { printf("."); } printf("\n"); /*******获取当前发送音频的音量信息结束**********/ if (ep_status == MSP_EP_AFTER_SPEECH)//检测到音频后端点 { printf("QISRAudioWrite: ep_status == MSP_EP_AFTER_SPEECH.\n"); break; } ///服务器返回部分结果 if ( rec_status == MSP_REC_STATUS_SUCCESS ) { const char* result = QISRGetResult(sess_id, &rslt_status, 0, &ret); if( rslt_status == MSP_REC_STATUS_NO_MATCH ) printf("get result nomatch\n"); else { if ( result != NULL ) fwrite(result, 1, strlen(result), fout); printf("get result[%d/%d]: %s\n", ret, rslt_status, result); } } usleep(200000);//因为是模拟录音,为了避免数据发送太快造成缓冲区溢出,所以这里暂停200ms,如果是实时录音,不必暂停 } printf("\n"); fclose(fp); } ///最后一块数据发完之后,循环从服务器端获取结果 ///考虑到网络环境不好的情况下,可以对循环次数作限定 if (error == false) { printf("get reuslt\n"); int loop_count = 0; do { const char* result = QISRGetResult(sess_id, &rslt_status, 0, &ret); if ( ret != MSP_SUCCESS ) { printf("QISRGetResult err %d\n", ret); error = true; break; } if( rslt_status == MSP_REC_STATUS_NO_MATCH ) printf("get result nomatch\n"); else { if ( result != NULL ) fwrite(result, 1, strlen(result), fout); printf("[%d]:get result[%d/%d]: %s\n", (loop_count), ret, rslt_status, result); } usleep(500000); } while (rslt_status != MSP_REC_STATUS_COMPLETE && loop_count++ < 30); } if( NULL != fout ) { const char* result = "\r\n"; fwrite(result, 1, strlen(result), fout); fclose(fout); } ret = QISRSessionEnd(sess_id, NULL); if ( ret != MSP_SUCCESS ) { printf("QISRSessionEnd err %d\n", ret); return; } printf("QISRSessionEnd.\n"); return; }
/*! function to open the asr interface */ static switch_status_t pocketsphinx_asr_open(switch_asr_handle_t *ah, const char *codec, int rate, const char *dest, switch_asr_flag_t *flags) { pocketsphinx_t *ps; const char* session_begin_params = "sub = iat, domain = iat, language = zh_ch, accent = mandarin, sample_rate = 16000, result_type = plain, result_encoding = utf8"; int errcode; /* chen */ switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, ">>>>>>>>pocketsphinx_asr_open<<<<<<<<<\n"); if (!(ps = (pocketsphinx_t *) switch_core_alloc(ah->memory_pool, sizeof(*ps)))) { return SWITCH_STATUS_MEMERR; } switch_mutex_init(&ps->flag_mutex, SWITCH_MUTEX_NESTED, ah->memory_pool); ah->private_info = ps; if (rate == 8000) { ah->rate = 8000; } else if (rate == 16000) { ah->rate = 16000; } else { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid rate %d. Only 8000 and 16000 are supported.\n", rate); } codec = "L16"; ah->codec = switch_core_strdup(ah->memory_pool, codec); ps->thresh = globals.thresh; ps->silence_hits = globals.silence_hits; ps->listen_hits = globals.listen_hits; ps->org_silence_hits = ps->silence_hits; ps->start_input_timers = globals.start_input_timers; ps->no_input_timeout = globals.no_input_timeout; ps->speech_timeout = globals.speech_timeout; ps->confidence_threshold = globals.confidence_threshold; /* ify login*/ ps->ifly_session_id = NULL; //ps->ifly_rec_result = {NULL}; //ps->ifly_hints = {NULL}; //hints为结束本次会话的原因描述,由用户自定义 ps->ifly_total_len = 0; ps->ifly_aud_stat = MSP_AUDIO_SAMPLE_CONTINUE ; //音频状态 ps->ifly_ep_stat = MSP_EP_LOOKING_FOR_SPEECH; //端点检测 ps->ifly_rec_stat = MSP_REC_STATUS_SUCCESS ; //识别状态 ps->ifly_wait_result = SWITCH_FALSE; /* ifly open session */ ps->ifly_session_id = QISRSessionBegin(NULL, session_begin_params, &errcode); if (MSP_SUCCESS != errcode) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, ">>>>>>>>QISRSessionBegin fail error !!!<<<<<<<<<\n"); } return SWITCH_STATUS_SUCCESS; }