int IflySTT::stt(void *data, int len) { if (NULL == data || len <= 0) { return -1; } int ret = MSP_SUCCESS; int errcode = MSP_SUCCESS; int ep_stat = MSP_EP_LOOKING_FOR_SPEECH; // 端点检测 int rec_stat = MSP_REC_STATUS_SUCCESS; // 识别状态 ret = QISRAudioWrite(session_id_, data, len, audio_stat_, &ep_stat, &rec_stat); if (MSP_SUCCESS != ret) { QISRSessionEnd(session_id_, "QISRAudioWrite error"); MSPLogout(); return ret; } if (MSP_AUDIO_SAMPLE_FIRST == audio_stat_) { audio_stat_ = MSP_AUDIO_SAMPLE_CONTINUE; } return 0; }
int main(int argc, char* argv[]) { ///APPID请勿随意改动 const char* login_configs = " appid = 5392db98, work_dir = . "; const char* text = "10086"; const char* filename = "text_to_speech_withnumber_test.pcm"; const char* param = "aue = speex-wb;3, vcn=xiaoyan, spd = 50, vol = 50, tte = utf8, rdn = 2"; int ret = 0; char key = 0; //用户登录 ret = MSPLogin(NULL, NULL, login_configs); if ( ret != MSP_SUCCESS ) { printf("MSPLogin failed , Error code %d.\n",ret); } //音频合成 ret = text_to_speech(text,filename,param); if ( ret != MSP_SUCCESS ) { printf("text_to_speech: failed , Error code %d.\n",ret); } //退出登录 MSPLogout(); return 0; }
int main(int argc, char* argv[]) { const char* login_config = "appid = 55dbb5fb,work_dir = . "; const char* param = "rst=plain,rse=utf8,sub=asr,aue=speex-wb,auf=audio/L16;rate=16000,ent=sms16k"; //注意sub=asr,16k音频aue=speex-wb,8k音频识别aue=speex, int ret = 0 ; char key = 0 ; int grammar_flag = 0;//0:不上传词表;1:上传词表 const char* asrfile = get_audio_file();//选择 ret = MSPLogin(NULL, NULL, login_config); if ( ret != MSP_SUCCESS ) { printf("MSPLogin failed , Error code %d.\n",ret); return 0 ; } memset(GrammarID, 0, sizeof(GrammarID)); ret = get_grammar_id(grammar_flag); if(ret != MSP_SUCCESS) { printf("get_grammar_id with errorCode: %d \n", ret); return 0 ; } ret = run_asr(asrfile, param); if(ret != MSP_SUCCESS) { printf("run_asr with errorCode: %d \n", ret); return 0; } MSPLogout(); return 0; }
int main(int argc, char* argv[]) { ///APPID请勿随意改动 const char* login_configs = " appid = 55dbb5fb, work_dir = . "; const char* text = "10086"; const char* filename = "text_to_speech_withnumber_test.pcm"; const char* param = "aue = speex-wb,auf=audio/L16;rate=16000, vcn=xiaoyan, spd = 5, vol = 5, tte = utf8, rdn = 2";//8k音频合成参数:aue=speex,auf=audio/L16;rate=8000,其他参数意义参考参数列表 int ret = 0; char key = 0; //用户登录 ret = MSPLogin(NULL, NULL, login_configs); if ( ret != MSP_SUCCESS ) { printf("MSPLogin failed , Error code %d.\n",ret); } //音频合成 ret = text_to_speech(text,filename,param); if ( ret != MSP_SUCCESS ) { printf("text_to_speech: failed , Error code %d.\n",ret); } //退出登录 MSPLogout(); return 0; }
char *get_from_server(char *file) { const char* login_config = "appid = 55801297,work_dir = . "; const char* param = "rst=plain,rse=utf8,sub=asr,aue=speex-wb,auf=audio/L16;rate=16000,ent=sms16k"; //注意sub=asr,16k音频aue=speex-wb,8k音频识别aue=speex, int ret = 0 ; char *result; char key = 0 ; int grammar_flag = 0;//0:不上传词表;1:上传词表 ret = MSPLogin(NULL, NULL, login_config); if ( ret != MSP_SUCCESS ) { printf("MSPLogin failed , Error code %d.\n",ret); return 0 ; } else { strcpy(GrammarID, "e7eb1a443ee143d5e7ac52cb794810fe"); result = run_asr(file, param); if(result == NULL) { printf("run_asr with errorCode: %d \n", ret); } MSPLogout(); } return result; }
int main(int argc, char* argv[]) { const char* login_config = "appid = 5392db98, work_dir = . "; const char* param = "rst=plain,rse=gb2312,sub=asr,ssm=1,aue=speex,auf=audio/L16;rate=16000,ent=sms16k";//注意sub=asr const char* grammar = NULL; int ret = 0 ; char key = 0 ; const char* asrfile = get_audio_file(); ret = MSPLogin(NULL, NULL, login_config); if ( ret != MSP_SUCCESS ) { printf("MSPLogin failed , Error code %d.\n",ret); return 0 ; } grammar = get_grammar( "gm_continuous_digit.abnf" ); if(ret != MSP_SUCCESS) { printf("getExID with errorCode: %d \n", ret); return 0; } ret = run_asr(asrfile, param, grammar); if(ret != MSP_SUCCESS) { printf("run_asr with errorCode: %d \n", ret); return 0; } release_grammar(&grammar); MSPLogout(); return 0; }
int main(int argc, char* argv[]) { ///APPID请勿随意改动 const char* login_configs = " appid = 55dbb5fb, work_dir = . "; const char* text = "讯飞语音,沟通无限。"; const char* filename = "text_to_speech_test_withspeechmark.pcm"; const char* param = "aue = speex-wb;3, vcn=xiaoyan, spd = 5, vol = 5, tte = utf8, smk=3";//带参数smk=3,获取详细的语音标注,json格式,请自行解析 int ret = 0; char key = 0; //用户登录 ret = MSPLogin(NULL, NULL, login_configs); if ( ret != MSP_SUCCESS ) { printf("MSPLogin failed , Error code %d.\n",ret); } //音频合成 ret = text_to_speech(text,filename,param); if ( ret != MSP_SUCCESS ) { printf("text_to_speech: failed , Error code %d.\n",ret); } //退出登录 MSPLogout(); return 0; }
int IflySTT::close() { run_ = false; QISRSessionEnd(session_id_, "Normal"); MSPLogout(); // 退出登录 return 0; }
int main(int argc, char* argv[]) { int ret = MSP_SUCCESS; const char* login_params = "appid = 5652758a, work_dir = ."; //登录参数,appid与msc库绑定,请勿随意改动 /* * sub: 请求业务类型 * result_type: 识别结果格式 * result_encoding: 结果编码格式 * * 详细参数说明请参阅《iFlytek MSC Reference Manual》 */ const char* session_begin_params = "sub = asr, result_type = plain, result_encoding = utf8"; char* grammar_id = NULL; /* 用户登录 */ ret = MSPLogin(NULL, NULL, login_params); //第一个参数是用户名,第二个参数是密码,均传NULL即可,第三个参数是登录参数 if (MSP_SUCCESS != ret) { printf("MSPLogin failed, error code: %d.\n",ret); goto exit; //登录失败,退出登录 } printf("\n##################################################\n"); printf("## 语音识别(Automatic Speech Recognition)技术 ##\n"); printf("## 能够从语音中识别出特定的命令词或语句模式。 ##\n"); printf("##################################################\n\n"); grammar_id = (char*)malloc(GRAMID_LEN);//动态分配内存 if (NULL == grammar_id) { printf("out of memory !\n"); goto exit; } memset(grammar_id, 0, GRAMID_LEN); printf("上传语法 ...\n"); ret = get_grammar_id(grammar_id, GRAMID_LEN); if (MSP_SUCCESS != ret) goto exit; printf("上传语法成功\n"); run_asr("wav/iflytek01.wav", session_begin_params, grammar_id); //iflytek01对应的音频内容:“18012345678” exit: if (NULL != grammar_id) { free(grammar_id); grammar_id = NULL; } printf("按任意键退出 ...\n"); getchar(); MSPLogout(); //退出登录 return 0; }
int text_to_speech(byte *buf, int len) { const char* sess_id = NULL; char *obuf = NULL; unsigned int audio_len = 0; unsigned int olen = 0; int synth_status = 1; int ret = 0; byte erlret = 0; debug("Texting to speech %d bytes, %s", len, buf); ret = MSPLogin(NULL, NULL, login_configs); if ( ret != MSP_SUCCESS ) { debug("MSPLogin failed: %d", ret); return ret; } sess_id = QTTSSessionBegin(tts_params, &ret); if ( ret != MSP_SUCCESS ) { debug("QTTSSessionBegin failed: %d", ret); return ret; } ret = QTTSTextPut(sess_id, buf, len, NULL ); if ( ret != MSP_SUCCESS ) { debug("QTTSTextPut failed: %d", ret); QTTSSessionEnd(sess_id, "TextPutError"); return ret; } while (1) { const void *data = QTTSAudioGet(sess_id, &audio_len, &synth_status, &ret); if (NULL != data) { obuf = realloc(obuf, olen+audio_len); memcpy(obuf+olen, data, audio_len); olen += audio_len; } usleep(15000); if (synth_status == 2 || ret != 0) break; } debug("got %d bytes speech", olen); write_head(sizeof(erlret)+olen); write_exact(&erlret, sizeof(erlret)); write_exact(obuf, olen); free(obuf); QTTSSessionEnd(sess_id, NULL); MSPLogout(); return 0; }
int main(int argc, char* argv[]) { if(argc != 3) { printf("usage: ./tts_sample \"hello world\" hello.wav\n"); return -1; } int ret = MSP_SUCCESS; const char* login_params = "appid = 568a1829, work_dir = .";//登录参数,appid与msc库绑定,请勿随意改动 /* * rdn: 合成音频数字发音方式 * volume: 合成音频的音量 * pitch: 合成音频的音调 * speed: 合成音频对应的语速 * voice_name: 合成发音人 * sample_rate: 合成音频采样率 * text_encoding: 合成文本编码格式 * * 详细参数说明请参阅《iFlytek MSC Reference Manual》 */ const char* session_begin_params = "voice_name = xiaoyan, text_encoding = UTF8, sample_rate = 16000, speed = 30, volume = 50, pitch = 50, rdn = 2"; const char* filename = argv[2]; //合成的语音文件名称 const char* text = argv[1]; //合成文本 /* 用户登录 */ ret = MSPLogin(NULL, NULL, login_params);//第一个参数是用户名,第二个参数是密码,第三个参数是登录参数,用户名和密码可在http://open.voicecloud.cn注册获取 if (MSP_SUCCESS != ret) { printf("MSPLogin failed, error code: %d.\n", ret); goto exit ;//登录失败,退出登录 } printf("\n###########################################################################\n"); printf("## 语音合成(Text To Speech,TTS)技术能够自动将任意文字实时转换为连续的 ##\n"); printf("## 自然语音,是一种能够在任何时间、任何地点,向任何人提供语音信息服务的 ##\n"); printf("## 高效便捷手段,非常符合信息时代海量数据、动态更新和个性化查询的需求。 ##\n"); printf("###########################################################################\n\n"); /* 文本合成 */ printf("开始合成 ...\n"); ret = text_to_speech(text, filename, session_begin_params); if (MSP_SUCCESS != ret) { printf("text_to_speech failed, error code: %d.\n", ret); } printf("合成完毕\n"); exit: printf("按任意键退出 ...\n"); getchar(); MSPLogout(); //退出登录 return 0; }
int main(int argc, char* argv[]) { int ret = MSP_SUCCESS; int upload_on = 1; //是否上传用户词表 const char* login_params = "appid = 5638844a, work_dir = ."; // 登录参数,appid与msc库绑定,请勿随意改动 /* * sub: 请求业务类型 * domain: 领域 * language: 语言 * accent: 方言 * sample_rate: 音频采样率 * result_type: 识别结果格式 * result_encoding: 结果编码格式 * * 详细参数说明请参阅《iFlytek MSC Reference Manual》 */ const char* session_begin_params = "sub = iat, domain = iat, language = zh_ch, accent = mandarin, sample_rate = 16000, result_type = plain, result_encoding = gb2312, audio/L16; rate =1600"; /* 用户登录 */ ret = MSPLogin(NULL, NULL, login_params); //第一个参数是用户名,第二个参数是密码,均传NULL即可,第三个参数是登录参数 if (MSP_SUCCESS != ret) { printf("MSPLogin failed , Error code %d.\n",ret); goto exit; //登录失败,退出登录 } printf("\n########################################################################\n"); printf("## 语音听写(iFly Auto Transform)技术能够实时地将语音转换成对应的文字。##\n"); printf("########################################################################\n\n"); printf("演示示例选择:是否上传用户词表?\n0:不使用\n1:使用\n"); scanf("%d", &upload_on); if (upload_on) { printf("上传用户词表 ...\n"); ret = upload_userwords(); if (MSP_SUCCESS != ret) goto exit; printf("上传用户词表成功\n"); } run_iat(session_begin_params); //iflytek02音频内容为“中美数控”;如果上传了用户词表,识别结果为:“中美速控”。 exit: printf("按任意键退出 ...\n"); _getch(); MSPLogout(); //退出登录 return 0; }
bool asr(speechrecog::Command::Request &req, speechrecog::Command::Response &res) { const char *login_config = "appid = 55b1aeb0"; //登录参数 UserData asr_data; int ret = 0 ; char c; ret = MSPLogin(NULL, NULL, login_config); //第一个参数为用户名,第二个参数为密码,传NULL即可,第三个参数是登录参数 if (MSP_SUCCESS != ret) { printf("登录失败:%d\n", ret); goto exit; } memset(&asr_data, 0, sizeof(UserData)); printf("构建离线识别语法网络...\n"); ret = build_grammar(&asr_data); //第一次使用某语法进行识别,需要先构建语法网络,获取语法ID,之后使用此语法进行识别,无需再次构建 if (MSP_SUCCESS != ret) { printf("构建语法调用失败!\n"); goto exit; } while (1 != asr_data.build_fini) usleep(300 * 1000); if (MSP_SUCCESS != asr_data.errcode) goto exit; printf("离线识别语法网络构建完成,开始识别...\n"); ret = run_asr(&asr_data); if (MSP_SUCCESS != ret) { printf("离线语法识别出错: %d \n", ret); goto exit; } /////////////////////////////将识别结果rec_rslt传递给trigger////////////////////////////////////////// res.command=rec_rslt; if(flag1) res.command=tmpt1; if(flag2) res.command=tmpt2; exit: MSPLogout(); return true; }
void play(const char *string,const char *filename) { const char* login_configs = " appid = 55801297, work_dir = . "; const char* text = "科大讯飞作为中国最大的智能语音技术提供商,在智能语音技术领域有着长期的研究积累,并在中文语音合成、语音识别、口语评测等多项技术上拥有国际领先的成果。"; const char* param = "vcn=xiaoyan,aue = speex-wb,auf=audio/L16;rate=16000,spd = 5,vol = 5,tte = utf8";//8k音频合成参数:aue=speex,auf=audio/L16;rate=8000,其他参数意义参考参数列表 int ret = 0; char key = 0; ret = MSPLogin(NULL, NULL, login_configs); if ( ret != MSP_SUCCESS ) { printf("MSPLogin failed , Error code %d.\n",ret); } ret = text_to_speech(string,filename,param); if ( ret != MSP_SUCCESS ) { printf("text_to_speech: failed , Error code %d.\n",ret); } MSPLogout(); }
int IflySTT::open() { // sub: 请求业务类型 // domain: 领域 // language: 语言 // accent: 方言 // sample_rate: 音频采样率 // result_type: 识别结果格式 // result_encoding: 结果编码格式 // 详细参数说明请参阅《iFlytek MSC Reference Manual》 int ret = MSP_SUCCESS; int errcode = MSP_SUCCESS; const char* login_params = "appid = 5743ed12, work_dir = ."; // 登录参数,appid与msc库绑定,请勿随意改动 const char* session_begin_params = "sub = iat, domain = iat, language = zh_ch, accent = mandarin, sample_rate = 1411000, result_type = plain, result_encoding = gb2312"; // 用户登录 ret = MSPLogin(NULL, NULL, login_params); // 第一个参数是用户名,第二个参数是密码,均传NULL即可,第三个参数是登录参数 if (MSP_SUCCESS != ret) { return ret; } session_id_ = QISRSessionBegin(NULL, session_begin_params, &errcode); // 听写不需要语法,第一个参数为NULL if (MSP_SUCCESS != errcode) { MSPLogout(); return errcode; } run_ = true; _beginthread(iflystt_get_text_thread, 0, this); return 0; }
int main(int argc, char* argv[]) { ///APPID请勿随意改动 const char* login_config = "appid = 5392db98,work_dir = . "; const char* param1 = "sub=iat,ssm=1,auf=audio/L16;rate=16000,aue=speex,ent=sms16k,nbest=5";//最多5个候选项,格式只能为json,编码只能为utf8 const char* param2 = "sub=iat,ssm=1,auf=audio/L16;rate=16000,aue=speex,ent=sms16k,nbest=1";//最多2个候选项,格式只能为json,编码只能为utf8,nbest的取值范围为1~5 const char* output_file = "iat_result.txt"; int ret = 0; char key = 0; //用户登录 ret = MSPLogin(NULL, NULL, login_config); if ( ret != MSP_SUCCESS ) { printf("MSPLogin failed , Error code %d.\n",ret); } //开始一路转写会话 run_iat("wav/iflytek04.wav" , output_file , param1); //iflytek04对应的音频内容"一二三四五六七八九十" run_iat("wav/iflytek04.wav" , output_file , param2); //iflytek04对应的音频内容"一二三四五六七八九十" //退出登录 MSPLogout(); return 0; }
int main(int argc, char* argv[]) { ///APPID请勿随意改动 const char* login_configs = "appid = 5392db98, work_dir = . "; const char* param1 = "sub=iat,ssm=1,auf=audio/L16;rate=16000,aue=speex,ent=sms16k,rst=plain,rse=utf8";//直接转写,默认编码为gb2312,可以通过rse参数指定为utf8或unicode const char* param2 = "sub=iat,ssm=1,auf=audio/L16;rate=16000,aue=speex,ent=sms16k,rst=json,rse=utf8";//转写为json格式,编码只能为utf8 const char* output_file = "iat_result.txt"; int ret = 0; char key = 0; //用户登录 ret = MSPLogin(NULL, NULL, login_configs); if ( ret != MSP_SUCCESS ) { printf("MSPLogin failed , Error code %d.\n",ret); } //开始一路转写会话 run_iat(argv[1] , param1); //iflytek09对应的音频内容“沉舟侧畔千帆过,病树前头万木春。” //退出登录 MSPLogout(); return 0; }
int text2wav(char* text) { int ret = MSP_SUCCESS; const char* login_params = "appid = 573b34ce, work_dir = .";//登录参数,appid与msc库绑定,请勿随意改动 /* * rdn: 合成音频数字发音方式 * volume: 合成音频的音量 * pitch: 合成音频的音调 * speed: 合成音频对应的语速 * voice_name: 合成发音人 * sample_rate: 合成音频采样率 * text_encoding: 合成文本编码格式 * * 详细参数说明请参阅《iFlytek MSC Reference Manual》 */ const char* session_begin_params = "voice_name = xiaoyan, text_encoding = UTF8, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 0"; const char* filename = "tts.wav"; //合成的语音文件名称 /* 用户登录 */ ret = MSPLogin(NULL, NULL, login_params);//第一个参数是用户名,第二个参数是密码,第三个参数是登录参数,用户名和密码可在http://open.voicecloud.cn注册获取 if (MSP_SUCCESS != ret) { printf("MSPLogin failed, error code: %d.\n", ret); return 1; } /* 文本合成 */ printf("开始合成 ...\n"); ret = text_to_speech(text, filename, session_begin_params); if (MSP_SUCCESS != ret) { printf("text_to_speech failed, error code: %d.\n", ret); } printf("合成完毕\n"); MSPLogout(); //退出登录 return 0; }
/* 回调函数 */ void chatterCallback(const cob_perception_msgs::DetectionArray &face_position_msg_out) { int ret = MSP_SUCCESS; const char* login_params = "appid = 56f0f034, work_dir = .";//登录参数,appid与msc库绑定,请勿随意改动 const char* session_begin_params = "voice_name = xiaoyan, text_encoding = UTF8, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 2"; const char* filename = "/home/exbot/catkin/src/audio_common/sound_play/sounds/name.wav"; //合成的语音文件名称+路径 ros::NodeHandle nh; ros::Publisher chatter_pub = nh.advertise<std_msgs::String>("voice",10); //ros::Rate loop_rate(0.2);//循环频率为0.2Hz for (int i=0; i<(int)face_position_msg_out.detections.size(); i++) { ROS_INFO("I see: %s",face_position_msg_out.detections[i].label.c_str()); //读取文本转语音 char* text = new char[500]; FILE* fp=fopen("/home/exbot/catkin/src/word/src/1.txt","r"); text=fgets(text,500,fp); fclose(fp); fp=NULL; /* 用户登录 */ ret = MSPLogin(NULL, NULL, login_params);//第一个参数是用户名,第二个参数是密码,第三个参数是登录参数,用户名和密码可在http://open.voicecloud.cn注册获取 ret = text_to_speech(text, filename, session_begin_params); std_msgs::String msg; std::stringstream ss; ss<<"hello!"; msg.data = ss.str(); ROS_INFO("%s",msg.data.c_str());//输出与printf和cout等同 chatter_pub.publish(msg);//向话题chatter发布消息 //loop_rate.sleep(); ros::Duration(5.0).sleep(); MSPLogout(); //退出登录 } }
int main(int argc, char* argv[]) { int ret = MSP_SUCCESS; //const char* login_params = "appid = 574e69b2, work_dir = .";//登录参数,appid与msc库绑定,请勿随意改动 /* * rdn: 合成音频数字发音方式 * volume: 合成音频的音量 * pitch: 合成音频的音调 * speed: 合成音频对应的语速 * voice_name: 合成发音人 * sample_rate: 合成音频采样率 * text_encoding: 合成文本编码格式 * * 详细参数说明请参阅《iFlytek MSC Reference Manual》 */ // const char* session_begin_params = "voice_name = xiaoyan, text_encoding = UTF8, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 2"; // const char* filename = "tts_sample.wav"; //合成的语音文件名称 // const char* text = "亲爱的用户,您好,这是一个语音合成示例,感谢您对科大讯飞语音技术的支持!科大讯飞是亚太地区最大的语音上市公司,股票代码:002230"; //合成文本 char login_params[1024]; char session_begin_params[1024]; char filename[512]; char text[4096]; int i; for (i = 0; i < argc; i++) { printf("%s\n", argv[i]); } if (argc<8) { printf("usage: THE_CMD appid speaker speed volume pitch filename \"text\"\n"); return 1; } sprintf(login_params, "appid = %s, work_dir = .", argv[1]); sprintf(session_begin_params, "voice_name = %s, text_encoding = UTF8, sample_rate = 16000, speed = %s, volume = %s, pitch = %s, rdn = 2", argv[2], argv[3], argv[4], argv[5]); sprintf(filename, "%s", argv[6]); sprintf(text, "%s", argv[7]); printf("login %s\n", login_params); printf("params %s\n", session_begin_params); printf("filename %s\n", filename); printf("text %s\n", text); /* 用户登录 */ ret = MSPLogin(NULL, NULL, login_params);//第一个参数是用户名,第二个参数是密码,第三个参数是登录参数,用户名和密码可在http://open.voicecloud.cn注册获取 if (MSP_SUCCESS != ret) { printf("MSPLogin failed, error code: %d.\n", ret); goto exit ;//登录失败,退出登录 } /* 文本合成 */ printf("开始合成 ...\n"); ret = text_to_speech(text, filename, session_begin_params); if (MSP_SUCCESS != ret) { printf("text_to_speech failed, error code: %d.\n", ret); } printf("合成完毕\n"); exit: MSPLogout(); //退出登录 return 0; }