How to change this app to desable input from command line?

Question

1.00/5 (1 vote)

See more:

This is the Original code:

#include <stdio.h>
#include <string.h>
#include <assert.h>

#if defined(_WIN32) && !defined(__CYGWIN__)
#include <windows.h>
#else
#include <sys/select.h>
#endif

#include <sphinxbase/err.h>
#include <sphinxbase/ad.h>

#include "pocketsphinx.h"

static const arg_t cont_args_def[] = {
    POCKETSPHINX_OPTIONS,
    /* Argument file. */
    {"-argfile",
     ARG_STRING,
     NULL,
     "Argument file giving extra arguments."},
    {"-adcdev",
     ARG_STRING,
     NULL,
     "Name of audio device to use for input."},
    {"-infile",
     ARG_STRING,
     NULL,
     "Audio file to transcribe."},
    {"-inmic",
     ARG_BOOLEAN,
     "no",
     "Transcribe audio from microphone."},
    {"-time",
     ARG_BOOLEAN,
     "no",
     "Print word times in file transcription."},
    CMDLN_EMPTY_OPTION
};

static ps_decoder_t *ps;
static cmd_ln_t *config;
static FILE *rawfd;

static void
print_word_times()
{
    int frame_rate = cmd_ln_int32_r(config, "-frate");
    ps_seg_t *iter = ps_seg_iter(ps);
    while (iter != NULL) {
        int32 sf, ef, pprob;
        float conf;

        ps_seg_frames(iter, &sf, &ef);
        pprob = ps_seg_prob(iter, NULL, NULL, NULL);
        conf = logmath_exp(ps_get_logmath(ps), pprob);
        printf("%s %.3f %.3f %f\n", ps_seg_word(iter), ((float)sf / frame_rate),
               ((float) ef / frame_rate), conf);
        iter = ps_seg_next(iter);
    }
}

static int
check_wav_header(char *header, int expected_sr)
{
    int sr;

    if (header[34] != 0x10) {
        E_ERROR("Input audio file has [%d] bits per sample instead of 16\n", header[34]);
        return 0;
    }
    if (header[20] != 0x1) {
        E_ERROR("Input audio file has compression [%d] and not required PCM\n", header[20]);
        return 0;
    }
    if (header[22] != 0x1) {
        E_ERROR("Input audio file has [%d] channels, expected single channel mono\n", header[22]);
        return 0;
    }
    sr = ((header[24] & 0xFF) | ((header[25] & 0xFF) << 8) | ((header[26] & 0xFF) << 16) | ((header[27] & 0xFF) << 24));
    if (sr != expected_sr) {
        E_ERROR("Input audio file has sample rate [%d], but decoder expects [%d]\n", sr, expected_sr);
        return 0;
    }
    return 1;
}

/*
 * Continuous recognition from a file
 */
static void
recognize_from_file()
{
    int16 adbuf[2048];
    const char *fname;
    const char *hyp;
    int32 k;
    uint8 utt_started, in_speech;
    int32 print_times = cmd_ln_boolean_r(config, "-time");

    fname = cmd_ln_str_r(config, "-infile");
    if ((rawfd = fopen(fname, "rb")) == NULL) {
        E_FATAL_SYSTEM("Failed to open file '%s' for reading",
                       fname);
    }

    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) {
        char waveheader[44];
    fread(waveheader, 1, 44, rawfd);
    if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate")))
            E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname);
    }

    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".mp3") == 0) {
    E_FATAL("Can not decode mp3 files, convert input file to WAV 16kHz 16-bit mono before decoding.\n");
    }

    ps_start_utt(ps);
    utt_started = FALSE;

    while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) {
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL);
            if (hyp != NULL)
            printf("%s\n", hyp);
            if (print_times)
            print_word_times();
            fflush(stdout);

            ps_start_utt(ps);
            utt_started = FALSE;
        }
    }
    ps_end_utt(ps);
    if (utt_started) {
        hyp = ps_get_hyp(ps, NULL);
        if (hyp != NULL) {
            printf("%s\n", hyp);
            if (print_times) {
            print_word_times();
        }
    }
    }

    fclose(rawfd);
}

/* Sleep for specified msec */
static void
sleep_msec(int32 ms)
{
#if (defined(_WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
    Sleep(ms);
#else
    /* ------------------- Unix ------------------ */
    struct timeval tmo;

    tmo.tv_sec = 0;
    tmo.tv_usec = ms * 1000;

    select(0, NULL, NULL, NULL, &tmo);
#endif
}

/*
 * Main utterance processing loop:
 *     for (;;) {
 *        start utterance and wait for speech to process
 *        decoding till end-of-utterance silence will be detected
 *        print utterance result;
 *     }
 */
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char const *hyp;

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int) cmd_ln_float32_r(config,
                                                 "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");

    if (ps_start_utt(ps) < 0)
        E_FATAL("Failed to start utterance\n");
    utt_started = FALSE;
    E_INFO("Ready....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            E_INFO("Listening...\n");
        }
        if (!in_speech && utt_started) {
            /* speech -> silence transition, time to start new utterance  */
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL );
            if (hyp != NULL) {
                printf("%s\n", hyp);
                fflush(stdout);
            }

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            E_INFO("Ready....\n");
        }
        sleep_msec(100);
    }
    ad_close(ad);
}

int
main(int argc, char *argv[])
{
    char const *cfg;

    config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE);

    /* Handle argument file as -argfile. */
    if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
        config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
    }

    if (config == NULL || (cmd_ln_str_r(config, "-infile") == NULL && cmd_ln_boolean_r(config, "-inmic") == FALSE)) {
    E_INFO("Specify '-infile <file.wav>' to recognize from file or '-inmic yes' to recognize from microphone.\n");
        cmd_ln_free_r(config);
    return 1;
    }

    ps_default_search_args(config);
    ps = ps_init(config);
    if (ps == NULL) {
        cmd_ln_free_r(config);
        return 1;
    }

    E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__);

    if (cmd_ln_str_r(config, "-infile") != NULL) {
        recognize_from_file();
    } else if (cmd_ln_boolean_r(config, "-inmic")) {
        recognize_from_microphone();
    }

    ps_free(ps);
    cmd_ln_free_r(config);

    return 0;
}

#if defined(_WIN32_WCE)
#pragma comment(linker,"/entry:mainWCRTStartup")
#include <windows.h>
//Windows Mobile has the Unicode main only
int
wmain(int32 argc, wchar_t * wargv[])
{
    char **argv;
    size_t wlen;
    size_t len;
    int i;

    argv = malloc(argc * sizeof(char *));
    for (i = 0; i < argc; i++) {
        wlen = lstrlenW(wargv[i]);
        len = wcstombs(NULL, wargv[i], wlen);
        argv[i] = malloc(len + 1);
        wcstombs(argv[i], wargv[i], wlen);
    }

    //assuming ASCII parameters
    return main(argc, argv);
}
#endif

I can compile it by this command:

g++ -o output continuous.cpp         -DMODELDIR=\"`pkg-config --variable=modeldir pocketsphinx`\"     `pkg-config --cflags --libs pocketsphinx sphinxbase`

And run it by this command :

output -inmic yes

What I have tried:

But I like to convert the code as it has no need to get inmic yes and it automatically starts the program from microphone. But I got segmentation fault(core dumped) error when I changed these parts:

static const arg_t cont_args_def= {"-inmic",
     ARG_BOOLEAN,
     "no",
     "Transcribe audio from microphone."};

int main(int argc, char *argv[])
{
    config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE);

 if (cmd_ln_boolean_r(config, "-inmic")) {
        recognize_from_microphone();
    }



   // recognize_from_microphone();
    ps_free(ps);
    cmd_ln_free_r(config);



    return 0;

}

I searched a lot and red the documentation but couldn't understand what's the problem?

EDIT: I changed the code like this:

static const arg_t cont_args_def[] = {
    POCKETSPHINX_OPTIONS,

    {"-inmic",
     ARG_BOOLEAN,
     "no",
     "Transcribe audio from microphone."},

    CMDLN_EMPTY_OPTION
};

int main(int argc, char *argv[])
{
    config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE);

// if (cmd_ln_boolean_r(config, "-inmic")) {
        recognize_from_microphone();
//    }



   // recognize_from_microphone();
    ps_free(ps);
    cmd_ln_free_r(config);



    return 0;

}

But the result is:

Arguments list definition:
[NAME]			[DEFLT]		[DESCR]
-agc			none		Automatic gain control for c0 ('max', 'emax', 'noise', or 'none')
-agcthresh		2.0		Initial threshold for automatic gain control
-allphone				Perform phoneme decoding with phonetic lm
-allphone_ci		no		Perform phoneme decoding with phonetic lm and context-independent units only
-alpha			0.97		Preemphasis parameter
-ascale			20.0		Inverse of acoustic model scale for confidence score calculation
-aw			1		Inverse weight applied to acoustic scores.
-backtrace		no		Print results and backtraces to log.
-beam			1e-48		Beam width applied to every frame in Viterbi search (smaller values mean wider beam)
-bestpath		yes		Run bestpath (Dijkstra) search over word lattice (3rd pass)
-bestpathlw		9.5		Language model probability weight for bestpath search
-ceplen			13		Number of components in the input feature vector
-cmn			live		Cepstral mean normalization scheme ('live', 'batch', or 'none')
-cmninit		40,3,-1		Initial values (comma-separated) for cepstral mean when 'live' is used
-compallsen		no		Compute all senone scores in every frame (can be faster when there are many senones)
-debug					Verbosity level for debugging messages
-dict					Main pronunciation dictionary (lexicon) input file
-dictcase		no		Dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only)
-dither			no		Add 1/2-bit noise
-doublebw		no		Use double bandwidth filters (same center freq)
-ds			1		Frame GMM computation downsampling ratio
-fdict					Noise word pronunciation dictionary input file
-feat			1s_c_d_dd	Feature stream type, depends on the acoustic model
-featparams				File containing feature extraction parameters.
-fillprob		1e-8		Filler word transition probability
-frate			100		Frame rate
-fsg					Sphinx format finite state grammar file
-fsgusealtpron		yes		Add alternate pronunciations to FSG
-fsgusefiller		yes		Insert filler words at each state.
-fwdflat		yes		Run forward flat-lexicon search over word lattice (2nd pass)
-fwdflatbeam		1e-64		Beam width applied to every frame in second-pass flat search
-fwdflatefwid		4		Minimum number of end frames for a word to be searched in fwdflat search
-fwdflatlw		8.5		Language model probability weight for flat lexicon (2nd pass) decoding
-fwdflatsfwin		25		Window of frames in lattice to search for successor words in fwdflat search 
-fwdflatwbeam		7e-29		Beam width applied to word exits in second-pass flat search
-fwdtree		yes		Run forward lexicon-tree search (1st pass)
-hmm					Directory containing acoustic model files.
-inmic			no		Transcribe audio from microphone.
-input_endian		little		Endianness of input data, big or little, ignored if NIST or MS Wav
-jsgf					JSGF grammar file
-keyphrase				Keyphrase to spot
-kws					A file with keyphrases to spot, one per line
-kws_delay		10		Delay to wait for best detection score
-kws_plp		1e-1		Phone loop probability for keyphrase spotting
-kws_threshold		1		Threshold for p(hyp)/p(alternatives) ratio
-latsize		5000		Initial backpointer table size
-lda					File containing transformation matrix to be applied to features (single-stream features only)
-ldadim			0		Dimensionality of output of feature transformation (0 to use entire matrix)
-lifter			0		Length of sin-curve for liftering, or 0 for no liftering.
-lm					Word trigram language model input file
-lmctl					Specify a set of language model
-lmname					Which language model in -lmctl to use by default
-logbase		1.0001		Base in which all log-likelihoods calculated
-logfn					File to write log messages in
-logspec		no		Write out logspectral files instead of cepstra
-lowerf			133.33334	Lower edge of filters
-lpbeam			1e-40		Beam width applied to last phone in words
-lponlybeam		7e-29		Beam width applied to last phone in single-phone words
-lw			6.5		Language model probability weight
-maxhmmpf		30000		Maximum number of active HMMs to maintain at each frame (or -1 for no pruning)
-maxwpf			-1		Maximum number of distinct word exits at each frame (or -1 for no pruning)
-mdef					Model definition input file
-mean					Mixture gaussian means input file
-mfclogdir				Directory to log feature files to
-min_endfr		0		Nodes ignored in lattice construction if they persist for fewer than N frames
-mixw					Senone mixture weights input file (uncompressed)
-mixwfloor		0.0000001	Senone mixture weights floor (applied to data from -mixw file)
-mllr					MLLR transformation to apply to means and variances
-mmap			yes		Use memory-mapped I/O (if possible) for model files
-ncep			13		Number of cep coefficients
-nfft			512		Size of FFT
-nfilt			40		Number of filter banks
-nwpen			1.0		New word transition penalty
-pbeam			1e-48		Beam width applied to phone transitions
-pip			1.0		Phone insertion penalty
-pl_beam		1e-10		Beam width applied to phone loop search for lookahead
-pl_pbeam		1e-10		Beam width applied to phone loop transitions for lookahead
-pl_pip			1.0		Phone insertion penalty for phone loop
-pl_weight		3.0		Weight for phoneme lookahead penalties
-pl_window		5		Phoneme lookahead window size, in frames
-rawlogdir				Directory to log raw audio files to
-remove_dc		no		Remove DC offset from each frame
-remove_noise		yes		Remove noise with spectral subtraction in mel-energies
-remove_silence		yes		Enables VAD, removes silence frames from processing
-round_filters		yes		Round mel filter frequencies to DFT points
-samprate		16000		Sampling rate
-seed			-1		Seed for random number generator; if less than zero, pick our own
-sendump				Senone dump (compressed mixture weights) input file
-senlogdir				Directory to log senone score files to
-senmgau				Senone to codebook mapping input file (usually not needed)
-silprob		0.005		Silence word transition probability
-smoothspec		no		Write out cepstral-smoothed logspectral files
-svspec					Subvector specification (e.g., 24,0-11/25,12-23/26-38 or 0-12/13-25/26-38)
-tmat					HMM state transition matrix input file
-tmatfloor		0.0001		HMM state transition probability floor (applied to -tmat file)
-topn			4		Maximum number of top Gaussians to use in scoring.
-topn_beam		0		Beam width used to determine top-N Gaussians (or a list, per-feature)
-toprule				Start rule for JSGF (first public rule is default)
-transform		legacy		Which type of transform to use to calculate cepstra (legacy, dct, or htk)
-unit_area		yes		Normalize mel filters to unit area
-upperf			6855.4976	Upper edge of filters
-uw			1.0		Unigram weight
-vad_postspeech		50		Num of silence frames to keep after from speech to silence.
-vad_prespeech		20		Num of speech frames to keep before silence to speech.
-vad_startspeech	10		Num of speech frames to trigger vad from silence to speech.
-vad_threshold		2.0		Threshold for decision between noise and silence frames. Log-ratio between signal level and noise level.
-var					Mixture gaussian variances input file
-varfloor		0.0001		Mixture gaussian variance floor (applied to data from -var file)
-varnorm		no		Variance normalize each utterance (only if CMN == current)
-verbose		no		Show input filenames
-warp_params				Parameters defining the warping function
-warp_type		inverse_linear	Warping function type (or shape)
-wbeam			7e-29		Beam width applied to word exits
-wip			0.65		Word insertion penalty
-wlen			0.025625	Hamming window length

Segmentation fault (core dumped)

Posted 29-Nov-17 17:50pm

Member 13376650

Updated 30-Nov-17 8:36am

v2

Add a Solution

1 solution

Add a Solution

Add your solution here

Treat my content as plain text, not as HTML

Preview 0

…

Existing Members

Sign in to your account

...or Join us

Download, Vote, Comment, Publish.

Your Email
Password
Forgot your password?

Your Email
This email is in use. Do you need your password?
Optional Password

I have read and agree to the Terms of Service and Privacy Policy
Please subscribe me to the CodeProject newsletters

When answering a question please:

Read the question carefully.
Understand that English isn't everyone's first language so be lenient of bad spelling and grammar.
If a question is poorly phrased then either ask for clarification, ignore it, or edit the question and fix the problem. Insults are not welcome.
Don't tell someone to read the manual. Chances are they have and don't get it. Provide an answer or move on to the next question.

Let's work to help developers, not make them feel stupid.

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Jochen Arndt · Answer 1 · 2017-11-29T21:39:00

Compare your arg_t variable with those of the original code and read the documentation of the cmd_ln_parse_r() function or have a look at the implementation if there is no documentation.

arg_t must be an array with a terminating entry so that the function knows when to stop parsing the supported arguments:

// Must be an array!
//static const arg_t cont_args_def= {"-inmic",
static const arg_t cont_args_def[] = {
    {"-inmic",
     ARG_BOOLEAN,
     "no",
     "Transcribe audio from microphone."},
    // This must be always the last array entry
    CMDLN_EMPTY_OPTION
};

[EDIT]
To always use the microphone, remove the -inmic and -infile options from the command list and always call recognize_from_microphone().

But before doing so, you have to prepare the command options:

ps_default_search_args(config);
ps = ps_init(config);
if (ps == NULL) {
    cmd_ln_free_r(config);
    return 1;
}
/* Always use microphone */
recognize_from_microphone();

If you still get a seg fault, that is sourced somewhere else.

When modifying existing code to adapt it for your own needs, it is essential to understand what the existing code is doing.

If you for example does not include the above block which assigns a value to ps, ps is indeterminate. Then don't call free(ps) too.
[/EDIT]