// Copyright 2020-2021 Alpha Cephei Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/* This header contains the C API for Vosk speech recognition system */
#ifndef VOSK_API_H
#define VOSK_API_H
#ifdef __cplusplus
extern "C" {
#endif
/** Model stores all the data required for recognition
* it contains static data and can be shared across processing
* threads. */
typedef struct VoskModel VoskModel;
/** Speaker model is the same as model but contains the data
* for speaker identification. */
typedef struct VoskSpkModel VoskSpkModel;
/** Recognizer object is the main object which processes data.
* Each recognizer usually runs in own thread and takes audio as input.
* Once audio is processed recognizer returns JSON object as a string
* which represent decoded information - words, confidences, times, n-best lists,
* speaker information and so on */
typedef struct VoskRecognizer VoskRecognizer;
/**
* Batch model object
*/
typedef struct VoskBatchModel VoskBatchModel;
/**
* Batch recognizer object
*/
typedef struct VoskBatchRecognizer VoskBatchRecognizer;
/** Loads model data from the file and returns the model object
*
* @param model_path: the path of the model on the filesystem
* @returns model object or NULL if problem occured */
VoskModel *vosk_model_new(const char *model_path);
/** Releases the model memory
*
* The model object is reference-counted so if some recognizer
* depends on this model, model might still stay alive. When
* last recognizer is released, model will be released too. */
void vosk_model_free(VoskModel *model);
/** Check if a word can be recognized by the model
* @param word: the word
* @returns the word symbol if @param word exists inside the model
* or -1 otherwise.
* Reminding that word symbol 0 is for <epsilon> */
int vosk_model_find_word(VoskModel *model, const char *word);
/** Loads speaker model data from the file and returns the model object
*
* @param model_path: the path of the model on the filesystem
* @returns model object or NULL if problem occured */
VoskSpkModel *vosk_spk_model_new(const char *model_path);
/** Releases the model memory
*
* The model object is reference-counted so if some recognizer
* depends on this model, model might still stay alive. When
* last recognizer is released, model will be released too. */
void vosk_spk_model_free(VoskSpkModel *model);
/** Creates the recognizer object
*
* The recognizers process the speech and return text using shared model data
* @param model VoskModel containing static data for recognizer. Model can be
* shared across recognizers, even running in different threads.
* @param sample_rate The sample rate of the audio you going to feed into the recognizer.
* Make sure this rate matches the audio content, it is a common
* issue causing accuracy problems.
* @returns recognizer object or NULL if problem occured */
VoskRecognizer *vosk_recognizer_new(VoskModel *model, float sample_rate);
/** Creates the recognizer object with speaker recognition
*
* With the speaker recognition mode the recognizer not just recognize
* text but also return speaker vectors one can use for speaker identification
*
* @param model VoskModel containing static data for recognizer. Model can be
* shared across recognizers, even running in different threads.
* @param sample_rate The sample rate of the audio you going to feed into the recognizer.
* Make sure this rate matches the audio content, it is a common
* issue causing accuracy problems.
* @param spk_model speaker model for speaker identification
* @returns recognizer object or NULL if problem occured */
VoskRecognizer *vosk_recognizer_new_spk(VoskModel *model, float sample_rate, VoskSpkModel *spk_model);
/** Creates the recognizer object with the phrase list
*
* Sometimes when you want to improve recognition accuracy and when you don't need
* to recognize large vocabulary you can specify a list of phrases to recognize. This
* will improve recognizer speed and accuracy but might return [unk] if user said
* something different.
*
* Only recognizers with lookahead models support this type of quick configuration.
* Precompiled HCLG graph models are not supported.
*
* @param model VoskModel containing static data for recognizer. Model can be
* shared across recognizers, even running in different threads.
* @param sample_rate The sample rate of the audio you going to feed into the recognizer.
* Make sure this rate matches the audio content, it is a common
* issue causing accuracy problems.
* @param grammar The string with the list of phrases to recognize as JSON array of strings,
* for example "["one two three four five", "[unk]"]".
*
* @returns recognizer object or NULL if problem occured */
VoskRecognizer *vosk_recognizer_new_grm(VoskModel *model, float sample_rate, const char *grammar);
/** Adds speaker model to already initialized recognizer
*
* Can add speaker recognition model to already created recognizer. Helps to initialize
* speaker recognition for grammar-based recognizer.
*
* @param spk_model Speaker recognition model */
void vosk_recognizer_set_spk_model(VoskRecognizer *recognizer, VoskSpkModel *spk_model);
/** Configures recognizer to output n-best results
*
* <pre>
* {
* "alternatives": [
* { "text": "one two three four five", "confidence": 0.97 },
* { "text": "one two three for five", "confidence": 0.03 },
* ]
* }
* </pre>
*
* @param max_alternatives - maximum alternatives to return from recognition results
*/
void vosk_recognizer_set_max_alternatives(VoskRecognizer *recognizer, int max_alternatives);
/** Enables words with times in the output
*
* <pre>
* "result" : [{
* "conf" : 1.000000,
* "end" : 1.110000,
* "start" : 0.870000,
* "word" : "what"
* }, {
* "conf" : 1.000000,
* "end" : 1.530000,
* "start" : 1.110000,
* "word" : "zero"
* }, {
* "conf" : 1.000000,
* "end" : 1.950000,
* "start" : 1.530000,
* "word" : "zero"
* }, {
* "conf" : 1.000000,
* "end" : 2.340000,
* "start" : 1.950000,
* "word" : "zero"
* }, {
* "conf" : 1.000000,
* "end" : 2.610000,
* "start" : 2.340000,
* "word" : "one"
* }],
* </pre>
*
* @param words - boolean value
*/
void vosk_recognizer_set_words(VoskRecognizer *recognizer, int words);
/** Like above return words and confidences in partial results
*
* @param partial_words - boolean value
*/
void vosk_recognizer_set_partial_words(VoskRecognizer *recognizer, int partial_words);
/** Set NLSML output
* @param nlsml - boolean value
*/
void vosk_recognizer_set_nlsml(VoskRecognizer *recognizer, int nlsml);
/** Accept voice data
*
* accept and process new chunk of voice data
*
* @param data - audio data in PCM 16-bit mono format
* @param length - length of the audio data
* @returns 1 if silence is occured and you can retrieve a new utterance with result method
* 0 if decoding continues
* -1 if exception occured */
int vosk_recognizer_accept_waveform(VoskRecognizer *recognizer, const char *data, int length);
/** Sa
我喜欢就喜欢
- 粉丝: 502
- 资源: 57
最新资源
- 畅捷通 应用商店应用 通过 API 生成销售订单和付款单
- 党员教育和管理-JAVA-基于springBoot党员教育和管理系统设计与实现(毕业论文)
- 科研工作量管理-JAVA-基于springBoot科研工作量管理系统的设计与实现(毕业论文)
- 没有黑色的盒子和篮球检测3-YOLO(v5至v9)、COCO、CreateML、Darknet、Paligemma、TFRecord数据集合集.rar
- 基于 ChatGPT API 的划词翻译浏览器插件和跨平台桌面端应用、全部资料+详细文档+源码.zip
- 纺织品企业财务-JAVA-基于spring boot的纺织品企业财务管理系统设计与实现(毕业论文)
- 基于ExcelDNA开发、全部资料+详细文档+源码.zip
- 基于JDK8 AI 聊天机器人、微信公众号 Midjourney画图、卡密兑换、web 支持ChatGPT,卡密兑换,易支付,公众号引流,邮件注册、全部资料+详
- 医院后台管理-JAVA-基于springboot的医院后台管理系统的设计与实现(毕业论文)
- 疫情隔离管理-JAVA-基于springboot + vue的疫情隔离管理系统设计与实现(毕业论文)
- IMG_20241223_165547.jpg
- 图书电子商务网站-JAVA-基于springBoot图书电子商务网站的设计与实现(毕业论文+PPT)
- 蜡笔小新-去掉动效.zip
- 游戏人物分数检测7-YOLO(v5至v11)、COCO、CreateML、Paligemma、TFRecord、VOC数据集合集.rar
- 二手车交易系统-JAVA-基于springboot的二手车交易系统的设计与实现(毕业论文+PPT+开题+任务书)
- VBA视频教程 0004
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈