Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 62 additions & 60 deletions samples/main.go
Original file line number Diff line number Diff line change
@@ -1,60 +1,62 @@
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.

// Package main demonstrates usages for the speech recognizer and dialog service connector
package main

import (
"fmt"
"os"

"github.com/Microsoft/cognitive-services-speech-sdk-go/samples/conversation_transcriber"
"github.com/Microsoft/cognitive-services-speech-sdk-go/samples/dialog_service_connector"
"github.com/Microsoft/cognitive-services-speech-sdk-go/samples/recognizer"
"github.com/Microsoft/cognitive-services-speech-sdk-go/samples/synthesizer"
)

type functionMap = map[string]func(string, string, string)

func printHelp(executableName string, samples functionMap) {
fmt.Println("Input not valid")
fmt.Println("Usage: ")
fmt.Println(executableName, " <subscription> <region> <file> <sample>")
fmt.Println("Where sample is of the format <scenario>:<sample>")
fmt.Println("Available samples:")
for id, _ := range samples {
fmt.Println(" -- ", id)
}
}

func main() {
samples := functionMap{
"speech_recognizer:RecognizeOnceFromWavFile": recognizer.RecognizeOnceFromWavFile,
"speech_recognizer:RecognizeOnceFromCompressedFile": recognizer.RecognizeOnceFromCompressedFile,
"speech_recognizer:RecognizeOnceFromALAWFile": recognizer.RecognizeOnceFromALAWFile,
"speech_recognizer:ContinuousFromMicrophone": recognizer.ContinuousFromMicrophone,
"speech_recognizer:RecognizeContinuousUsingWrapper": recognizer.RecognizeContinuousUsingWrapper,
"conversation_transcriber:ContinuousFromMicrophone": conversation_transcriber.ContinuousFromMicrophone,
"conversation_transcriber:TranscribeFromFile": conversation_transcriber.TranscribeFromFile,
"dialog_service_connector:ListenOnce": dialog_service_connector.ListenOnce,
"dialog_service_connector:KWS": dialog_service_connector.KWS,
"dialog_service_connector:ListenOnceFromStream": dialog_service_connector.ListenOnceFromStream,
"speech_synthesizer:SynthesisToSpeaker": synthesizer.SynthesisToSpeaker,
"speech_synthesizer:SynthesisToAudioDataStream": synthesizer.SynthesisToAudioDataStream,
}
args := os.Args[1:]
if len(args) != 4 {
printHelp(os.Args[0], samples)
return
}
subscription := args[0]
region := args[1]
file := args[2]
sample := args[3]
sampleFunction := samples[sample]
if sampleFunction == nil {
printHelp(os.Args[0], samples)
return
}
sampleFunction(subscription, region, file)
}
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.

// Package main demonstrates usages for the speech recognizer and dialog service connector
package main

import (
"fmt"
"os"

"github.com/Microsoft/cognitive-services-speech-sdk-go/samples/conversation_transcriber"
"github.com/Microsoft/cognitive-services-speech-sdk-go/samples/dialog_service_connector"
"github.com/Microsoft/cognitive-services-speech-sdk-go/samples/recognizer"
"github.com/Microsoft/cognitive-services-speech-sdk-go/samples/synthesizer"
)

type functionMap = map[string]func(string, string, string)

func printHelp(executableName string, samples functionMap) {
fmt.Println("Input not valid")
fmt.Println("Usage: ")
fmt.Println(executableName, " <subscription> <region> <file> <sample>")
fmt.Println("Where sample is of the format <scenario>:<sample>")
fmt.Println("Available samples:")
for id, _ := range samples {
fmt.Println(" -- ", id)
}
}

func main() {
samples := functionMap{
"speech_recognizer:RecognizeOnceFromWavFile": recognizer.RecognizeOnceFromWavFile,
"speech_recognizer:RecognizeOnceFromCompressedFile": recognizer.RecognizeOnceFromCompressedFile,
"speech_recognizer:RecognizeOnceFromALAWFile": recognizer.RecognizeOnceFromALAWFile,
"speech_recognizer:RecognizeOnceFromAutoDetectSourceLangConfig": recognizer.RecognizeOnceFromAutoDetectSourceLangConfig,
"speech_recognizer:ContinuousFromMicrophone": recognizer.ContinuousFromMicrophone,
"speech_recognizer:RecognizeContinuousUsingWrapper": recognizer.RecognizeContinuousUsingWrapper,
"conversation_transcriber:ContinuousFromMicrophone": conversation_transcriber.ContinuousFromMicrophone,
"conversation_transcriber:TranscribeFromFile": conversation_transcriber.TranscribeFromFile,
"dialog_service_connector:ListenOnce": dialog_service_connector.ListenOnce,
"dialog_service_connector:KWS": dialog_service_connector.KWS,
"dialog_service_connector:ListenOnceFromStream": dialog_service_connector.ListenOnceFromStream,
"speech_synthesizer:SynthesisToSpeaker": synthesizer.SynthesisToSpeaker,
"speech_synthesizer:SynthesisToAudioDataStream": synthesizer.SynthesisToAudioDataStream,
"speech_synthesizer:SynthesisFromAutoDetectSourceLangConfig": synthesizer.SynthesisFromAutoDetectSourceLangConfig,
}
args := os.Args[1:]
if len(args) != 4 {
printHelp(os.Args[0], samples)
return
}
subscription := args[0]
region := args[1]
file := args[2]
sample := args[3]
sampleFunction := samples[sample]
if sampleFunction == nil {
printHelp(os.Args[0], samples)
return
}
sampleFunction(subscription, region, file)
}
54 changes: 54 additions & 0 deletions samples/recognizer/auto_detect.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.

package recognizer

import (
"fmt"
"time"

"github.com/Microsoft/cognitive-services-speech-sdk-go/audio"
"github.com/Microsoft/cognitive-services-speech-sdk-go/speech"
)

func RecognizeOnceFromAutoDetectSourceLangConfig(subscription string, region string, file string) {
audioConfig, err := audio.NewAudioConfigFromWavFileInput(file)
if err != nil {
fmt.Println("Got an error: ", err)
return
}
defer audioConfig.Close()
config, err := speech.NewSpeechConfigFromSubscription(subscription, region)
if err != nil {
fmt.Println("Got an error: ", err)
return
}
defer config.Close()
languageConfig, err := speech.NewAutoDetectSourceLanguageConfigFromLanguages([]string{"en-US", "de-DE"})
if err != nil {
fmt.Println("Got an error: ", err)
return
}
defer languageConfig.Close()
speechRecognizer, err := speech.NewSpeechRecognizerFromAutoDetectSourceLangConfig(config, languageConfig, audioConfig)
if err != nil {
fmt.Println("Got an error: ", err)
return
}
defer speechRecognizer.Close()
task := speechRecognizer.RecognizeOnceAsync()
var outcome speech.SpeechRecognitionOutcome
select {
case outcome = <-task:
case <-time.After(10 * time.Second):
fmt.Println("Timed out")
return
}
defer outcome.Close()
if outcome.Error != nil {
fmt.Println("Got an error: ", outcome.Error)
return
}
fmt.Println("Got a recognition!")
fmt.Println(outcome.Result.Text)
}
52 changes: 52 additions & 0 deletions samples/synthesizer/auto_detect.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.

package synthesizer

import (
"fmt"
"time"

"github.com/Microsoft/cognitive-services-speech-sdk-go/common"
"github.com/Microsoft/cognitive-services-speech-sdk-go/speech"
)

func SynthesisFromAutoDetectSourceLangConfig(subscription string, region string, file string) {
_ = file
config, err := speech.NewSpeechConfigFromSubscription(subscription, region)
if err != nil {
fmt.Println("Got an error: ", err)
return
}
defer config.Close()
languageConfig, err := speech.NewAutoDetectSourceLanguageConfigFromOpenRange()
if err != nil {
fmt.Println("Got an error: ", err)
return
}
defer languageConfig.Close()
speechSynthesizer, err := speech.NewSpeechSynthesizerFromAutoDetectSourceLangConfig(config, languageConfig, nil)
if err != nil {
fmt.Println("Got an error: ", err)
return
}
defer speechSynthesizer.Close()
task := speechSynthesizer.SpeakTextAsync("Hello, world.")
var outcome speech.SpeechSynthesisOutcome
select {
case outcome = <-task:
case <-time.After(30 * time.Second):
fmt.Println("Timed out")
return
}
defer outcome.Close()
if outcome.Error != nil {
fmt.Println("Got an error: ", outcome.Error)
return
}
if outcome.Result.Reason != common.SynthesizingAudioCompleted {
fmt.Println("Synthesis failed with reason:", outcome.Result.Reason)
return
}
fmt.Println("Speech synthesized successfully with auto-detect source language config.")
}
10 changes: 8 additions & 2 deletions speech/speech_recognizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ func NewSpeechRecognizerFromConfig(config *SpeechConfig, audioConfig *audio.Audi
return newSpeechRecognizerFromHandle(handle)
}

// NewSpeechRecognizerFomAutoDetectSourceLangConfig creates a speech recognizer from a speech config, auto detection source language config and audio config
func NewSpeechRecognizerFomAutoDetectSourceLangConfig(config *SpeechConfig, langConfig *AutoDetectSourceLanguageConfig, audioConfig *audio.AudioConfig) (*SpeechRecognizer, error) {
// NewSpeechRecognizerFromAutoDetectSourceLangConfig creates a speech recognizer from a speech config, auto detection source language config and audio config
func NewSpeechRecognizerFromAutoDetectSourceLangConfig(config *SpeechConfig, langConfig *AutoDetectSourceLanguageConfig, audioConfig *audio.AudioConfig) (*SpeechRecognizer, error) {
var handle C.SPXHANDLE
if config == nil {
return nil, common.NewCarbonError(uintptr(C.SPXERR_INVALID_ARG))
Expand All @@ -97,6 +97,12 @@ func NewSpeechRecognizerFomAutoDetectSourceLangConfig(config *SpeechConfig, lang
return newSpeechRecognizerFromHandle(handle)
}

// NewSpeechRecognizerFomAutoDetectSourceLangConfig is a deprecated alias for NewSpeechRecognizerFromAutoDetectSourceLangConfig.
// Deprecated: Use NewSpeechRecognizerFromAutoDetectSourceLangConfig instead.
func NewSpeechRecognizerFomAutoDetectSourceLangConfig(config *SpeechConfig, langConfig *AutoDetectSourceLanguageConfig, audioConfig *audio.AudioConfig) (*SpeechRecognizer, error) {
return NewSpeechRecognizerFromAutoDetectSourceLangConfig(config, langConfig, audioConfig)
}

// NewSpeechRecognizerFromSourceLanguageConfig creates a speech recognizer from a speech config, source language config and audio config
func NewSpeechRecognizerFromSourceLanguageConfig(config *SpeechConfig, sourceLanguageConfig *SourceLanguageConfig, audioConfig *audio.AudioConfig) (*SpeechRecognizer, error) {
var handle C.SPXHANDLE
Expand Down
51 changes: 51 additions & 0 deletions speech/speech_recognizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"time"

"github.com/Microsoft/cognitive-services-speech-sdk-go/audio"
"github.com/Microsoft/cognitive-services-speech-sdk-go/common"
)

func createSpeechRecognizerFromSubscriptionRegionAndAudioConfig(t *testing.T, subscription string, region string, audioConfig *audio.AudioConfig) *SpeechRecognizer {
Expand Down Expand Up @@ -274,3 +275,53 @@ func TestPhraseListGrammarWithoutGrammar(t *testing.T) {
func TestPhraseListGrammarWithGrammar(t *testing.T) {
testPhraseList(t, true)
}

func TestRecognitionWithLanguageAutoDetection(t *testing.T) {
subscription := os.Getenv("SPEECH_SUBSCRIPTION_KEY")
region := os.Getenv("SPEECH_SUBSCRIPTION_REGION")
config, err := NewSpeechConfigFromSubscription(subscription, region)
if err != nil {
t.Error("Got an error: ", err)
return
}
defer config.Close()
languageConfig, err := NewAutoDetectSourceLanguageConfigFromLanguages([]string{"en-US", "de-DE"})
if err != nil {
t.Error("Got an error: ", err)
return
}
defer languageConfig.Close()
audioConfig, err := audio.NewAudioConfigFromWavFileInput("../test_files/turn_on_the_lamp.wav")
if err != nil {
t.Error("Got an error: ", err)
return
}
defer audioConfig.Close()
recognizer, err := NewSpeechRecognizerFromAutoDetectSourceLangConfig(config, languageConfig, audioConfig)
if err != nil {
t.Error("Got an error: ", err)
return
}
if recognizer == nil {
t.Error("Recognizer creation failed")
return
}
defer recognizer.Close()
select {
case outcome := <-recognizer.RecognizeOnceAsync():
if outcome.Error != nil {
t.Error("Got an error: ", outcome.Error)
return
}
defer outcome.Result.Close()
if outcome.Result.Reason != common.RecognizedSpeech {
t.Error("Expected RecognizedSpeech reason, got: ", outcome.Result.Reason)
}
if outcome.Result.Text == "" {
t.Error("Expected non-empty recognition result")
}
t.Log("Recognized: ", outcome.Result.Text)
case <-time.After(5 * time.Second):
t.Error("Timeout waiting for recognition result.")
}
}
10 changes: 8 additions & 2 deletions speech/speech_synthesizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ func NewSpeechSynthesizerFromConfig(config *SpeechConfig, audioConfig *audio.Aud
return newSpeechSynthesizerFromHandle(handle)
}

// NewSpeechSynthesizerFomAutoDetectSourceLangConfig creates a speech synthesizer from a speech config, auto detection source language config and audio config
func NewSpeechSynthesizerFomAutoDetectSourceLangConfig(config *SpeechConfig, langConfig *AutoDetectSourceLanguageConfig, audioConfig *audio.AudioConfig) (*SpeechSynthesizer, error) {
// NewSpeechSynthesizerFromAutoDetectSourceLangConfig creates a speech synthesizer from a speech config, auto detection source language config and audio config
func NewSpeechSynthesizerFromAutoDetectSourceLangConfig(config *SpeechConfig, langConfig *AutoDetectSourceLanguageConfig, audioConfig *audio.AudioConfig) (*SpeechSynthesizer, error) {
var handle C.SPXHANDLE
if config == nil {
return nil, common.NewCarbonError(uintptr(C.SPXERR_INVALID_ARG))
Expand All @@ -87,6 +87,12 @@ func NewSpeechSynthesizerFomAutoDetectSourceLangConfig(config *SpeechConfig, lan
return newSpeechSynthesizerFromHandle(handle)
}

// NewSpeechSynthesizerFomAutoDetectSourceLangConfig is a deprecated alias for NewSpeechSynthesizerFromAutoDetectSourceLangConfig.
// Deprecated: Use NewSpeechSynthesizerFromAutoDetectSourceLangConfig instead.
func NewSpeechSynthesizerFomAutoDetectSourceLangConfig(config *SpeechConfig, langConfig *AutoDetectSourceLanguageConfig, audioConfig *audio.AudioConfig) (*SpeechSynthesizer, error) {
return NewSpeechSynthesizerFromAutoDetectSourceLangConfig(config, langConfig, audioConfig)
}

// SpeakTextAsync executes the speech synthesis on plain text, asynchronously.
func (synthesizer SpeechSynthesizer) SpeakTextAsync(text string) chan SpeechSynthesisOutcome {
outcome := make(chan SpeechSynthesisOutcome)
Expand Down
6 changes: 3 additions & 3 deletions speech/speech_synthesizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ func TestSynthesizerEvents(t *testing.T) {
defer event.Close()
t.Logf("SynthesisCompleted, audio length %d", len(event.Result.AudioData))
checkSynthesisResult(t, &event.Result, common.SynthesizingAudioCompleted)
durationFromProperty := (float64)(event.Result.AudioDuration/time.Millisecond)
durationFromAudioBuffer := (float64)(len(event.Result.AudioData)/32)
durationFromProperty := (float64)(event.Result.AudioDuration / time.Millisecond)
durationFromAudioBuffer := (float64)(len(event.Result.AudioData) / 32)
if !almostEqual(durationFromProperty, durationFromAudioBuffer, 150) {
t.Errorf("Synthesis duration incorrect (%.2f vs %.2f)", durationFromProperty, durationFromAudioBuffer)
}
Expand Down Expand Up @@ -461,7 +461,7 @@ func TestSynthesisWithLanguageAutoDetection(t *testing.T) {
t.Error("Got an error: ", err)
}
defer languageConfig.Close()
synthesizer, err := NewSpeechSynthesizerFomAutoDetectSourceLangConfig(config, languageConfig, nil)
synthesizer, err := NewSpeechSynthesizerFromAutoDetectSourceLangConfig(config, languageConfig, nil)
if err != nil {
t.Error("Got an error: ", err)
}
Expand Down
Loading