I'm very close to completing a personal project for modifying the Windows speech dictionary via C# (SAPI 5.4). The last bit I'm working on is how to get the SAPI phone set for a given word. I've found a way to do this via a C# form and spoken recognition obtained through
SpSharedRecoContext. However I'm trying to get the recognition to work with a voice file (*.wav) as the input. I understand that this needs to be done via an
SpInprocRecoContext.
Nearly every example from Microsoft I've found regarding SAPI 5.4 recognition (like
this one for VB) is for SpSharedRecoContext and not SpInprocRecoContext (and I believe I've seen comments that some of these examples are missing details). Additionally, I've found multiple topics on Stack Overflow (mostly answered by Eric Brown, see
topic 1,
topic 2,
topic 3) that mention using a SpInprocRecoContext requires more setup than SpSharedRecoContext, but
I have yet to find a definitive answer for how to capture voice recognition events when using SpInprocRecoContext in C#.
How can I proceed on this? Any help would be extremely appreciated!!
What I have tried:
Here is my code so far:
using SpeechLib;
using System;
namespace SpeechTest
{
class Program
{
static void Main(string[] args)
{
string MyText = "dolphins";
string WaveFile = @"C:\Reco\MYWAVE.wav";
SpInprocRecognizer Recognizer;
SpInProcRecoContext RecoContext;
ISpeechRecoGrammar grammar;
ISpeechFileStream MyFileStream;
ISpeechVoice MyVoice;
RecoContext = new SpInProcRecoContext();
Recognizer = (SpInprocRecognizer)RecoContext.Recognizer;
RecoContext.Recognition += new _ISpeechRecoContextEvents_RecognitionEventHandler(RecoContext_Recognition);
grammar = RecoContext.CreateGrammar();
grammar.DictationLoad("", SpeechLoadOption.SLOStatic);
grammar.DictationSetState(SpeechRuleState.SGDSActive);
SpObjectTokenCategory Category = new SpObjectTokenCategory();
Category.SetId(SpeechStringConstants.SpeechCategoryAudioIn);
SpObjectToken AudioToken = new SpObjectToken();
AudioToken.SetId(Category.Default);
Recognizer.AudioInput = AudioToken;
MyFileStream = new SpFileStream();
TextToWave(MyText, WaveFile);
MyFileStream.Open(WaveFile, SpeechStreamFileMode.SSFMOpenForRead, true);
RecoContext.State = SpeechRecoContextState.SRCS_Enabled;
Recognizer.State = SpeechRecognizerState.SRSActive;
Recognizer.AudioInputStream = MyFileStream;
Console.WriteLine(MyText + " = " + SAPIPhonemes);
MyFileStream.Close();
Console.ReadLine();
}
static void TextToWave(string text, string file)
{
SpFileStream fileStream = new SpFileStream();
SpVoice voice = new SpVoice();
fileStream.Open(file, SpeechStreamFileMode.SSFMCreateForWrite, true);
voice.AudioOutputStream = fileStream;
voice.Speak(text);
fileStream.Close();
}
public static string SAPIPhonemes = null;
public static void RecoContext_Recognition(int StreamNumber, object StreamPosition, SpeechRecognitionType RecognitionType, ISpeechRecoResult Result)
{
Console.WriteLine(Result.ToString());
string SAPIPhonemes = null;
SpPhoneConverter MyPhoneConverter = new SpPhoneConverter();
MyPhoneConverter.LanguageId = 1033;
foreach (ISpeechPhraseElement MyPhrase in Result.PhraseInfo.Elements)
{
SAPIPhonemes += " " + MyPhoneConverter.IdToPhone(MyPhrase.Pronunciation);
}
}
}
}
For reference here is the form-based SpSharedRecoContext code that works:
using SpeechLib;
using System;
using System.Windows.Forms;
namespace RecoForm
{
public partial class Form1 : Form
{
SpSharedRecoContext listener;
ISpeechRecoGrammar grammar;
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
}
public string ps;
private void button1_Click(object sender, EventArgs e)
{
if (btnListen.Text == "Start Listening")
{
try
{
listener = new SpSharedRecoContext();
listener.Recognition += new _ISpeechRecoContextEvents_RecognitionEventHandler(listener_Reco);
grammar = listener.CreateGrammar(0);
grammar.DictationLoad("", SpeechLoadOption.SLOStatic);
grammar.DictationSetState(SpeechRuleState.SGDSActive);
btnListen.Text = "Stop Listening";
if (ps == "1")
{
listener.Resume();
ps = "0";
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
else if (btnListen.Text == "Stop Listening")
{
listener.Pause();
btnListen.Text = "Start Listening";
if (ps == "0")
{
ps = "1";
}
}
}
public void listener_Reco(int StreamNumber, object StreamPosition, SpeechRecognitionType RecognitionType, ISpeechRecoResult Result)
{
string heard = Result.PhraseInfo.GetText(0, -1, true);
textBox1.Text += " " + heard;
SpPhoneConverter MyPhoneConverter = new SpPhoneConverter();
MyPhoneConverter.LanguageId = 1033;
foreach (ISpeechPhraseElement MyPhrase in Result.PhraseInfo.Elements)
textBox2.Text += " " + MyPhoneConverter.IdToPhone(MyPhrase.Pronunciation);
}
}
}
Here is another example (in VB) that combines the Microsoft examples (
here and
here) which STILL doesn't work (see the comments in Command1_Click to find the location where I encounter a runtime error):
Imports SpeechLib
Public Class Form1
Const WaveFile = "C:\Reco\MYWAVE.wav"
Dim WithEvents RC As SpInProcRecoContext
Dim Recognizer As SpInprocRecognizer
Dim myGrammar As ISpeechRecoGrammar
Dim MyFileStream As SpeechLib.SpFileStream
Dim MyVoice As SpeechLib.SpVoice
Dim MyText As String
Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load
On Error GoTo EH
RC = New SpInProcRecoContext
Recognizer = RC.Recognizer
myGrammar = RC.CreateGrammar
myGrammar.DictationSetState(SpeechRuleState.SGDSActive)
MyVoice = New SpVoice
MyVoice.Voice = MyVoice.GetVoices("gender=male").Item(0)
Dim Category As SpObjectTokenCategory
Category = New SpObjectTokenCategory
Category.SetId(SpeechStringConstants.SpeechCategoryAudioIn)
Dim Token As SpObjectToken
Token = New SpObjectToken
Token.SetId(Category.Default)
Recognizer.AudioInput = Token
TextBox1.Text = "play the eight of clubs"
EH:
If Err.Number Then ShowErrMsg()
End Sub
Private Sub Command1_Click(sender As Object, e As EventArgs) Handles Command1.Click
MyFileStream = MakeWAVFileFromText(TextBox1.Text, WaveFile)
MyFileStream.Open(WaveFile)
Recognizer.AudioInputStream = MyFileStream ' ==> produces a runtime error!!!
End Sub
Private Sub RC_Recognition(ByVal StreamNumber As Long, ByVal StreamPosition As Object, ByVal RecognitionType As SpeechLib.SpeechRecognitionType, ByVal Result As SpeechLib.ISpeechRecoResult)
On Error GoTo EH
TextBox2.Text = Result.PhraseInfo.GetText
EH:
If Err.Number Then ShowErrMsg()
End Sub
Private Sub ShowErrMsg()
' Declare identifiers:
Const NL = vbNewLine
Dim T As String
T = "Desc: " & Err.Description & NL
T = T & "Err #: " & Err.Number
MsgBox(T, vbExclamation, "Run-Time Error")
End
End Sub
Private Function MakeWAVFileFromText(ByVal strText As String, ByVal strFName As String) As SpFileStream
On Error GoTo EH
' Declare identifiers:
Dim FileStream As SpFileStream
Dim Voice As SpVoice
' Instantiate Voice and FileStream objects:
Voice = New SpVoice
FileStream = New SpFileStream
' Open specified .wav file, set voice output
' to file, and speak synchronously:
FileStream.Open(strFName, SpeechStreamFileMode.SSFMCreateForWrite, True)
Voice.AudioOutputStream = FileStream
Voice.Speak(strText, SpeechVoiceSpeakFlags.SVSFIsXML)
' Close file and return reference to FileStream object:
FileStream.Close()
MakeWAVFileFromText = FileStream
EH:
If Err.Number Then ShowErrMsg()
End Function
End Class
' https:
' https://msdn.microsoft.com/en-us/library/ee125344(v=vs.85).aspx