ref: 397265f5791291defa76487c2388e1898e2e433c
dir: /main/flite_main.c/
/*************************************************************************/ /* */ /* Language Technologies Institute */ /* Carnegie Mellon University */ /* Copyright (c) 2001 */ /* All Rights Reserved. */ /* */ /* Permission is hereby granted, free of charge, to use and distribute */ /* this software and its documentation without restriction, including */ /* without limitation the rights to use, copy, modify, merge, publish, */ /* distribute, sublicense, and/or sell copies of this work, and to */ /* permit persons to whom this work is furnished to do so, subject to */ /* the following conditions: */ /* 1. The code must retain the above copyright notice, this list of */ /* conditions and the following disclaimer. */ /* 2. Any modifications must be clearly marked as such. */ /* 3. Original authors' names are not deleted. */ /* 4. The authors' names are not used to endorse or promote products */ /* derived from this software without specific prior written */ /* permission. */ /* */ /* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */ /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ /* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */ /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ /* THIS SOFTWARE. */ /* */ /*************************************************************************/ /* Author: Alan W Black ([email protected]) */ /* Date: January 2001 */ /*************************************************************************/ /* */ /* Simple top level program */ /* */ /*************************************************************************/ #include <stdio.h> #include <string.h> #include <sys/time.h> #include <unistd.h> #include "flite.h" #include "flite_version.h" cst_val *flite_set_voice_list(const char *voxdir); #ifdef WASM32_WASI void flite_set_lang_list(void); #else void *flite_set_lang_list(void); #endif void cst_alloc_debug_summary(); /* Its not very appropriate that these are declared here */ void usenglish_init(cst_voice *v); cst_lexicon *cmu_lex_init(void); static void flite_version() { printf(" Carnegie Mellon University, Copyright (c) 1999-2016, all rights reserved\n"); printf(" version: %s-%s-%s %s (http://cmuflite.org)\n", FLITE_PROJECT_PREFIX, FLITE_PROJECT_VERSION, FLITE_PROJECT_STATE, FLITE_PROJECT_DATE); } static void flite_usage() { printf("flite: a small simple speech synthesizer\n"); flite_version(); printf("usage: flite TEXT/FILE [WAVEFILE]\n" " Converts text in TEXTFILE to a waveform in WAVEFILE\n" " If text contains a space the it is treated as a literal\n" " textstring and spoken, and not as a file name\n" " if WAVEFILE is unspecified or \"play\" the result is\n" " played on the current systems audio device. If WAVEFILE\n" " is \"none\" the waveform is discarded (good for benchmarking)\n" " Other options must appear before these options\n" " --version Output flite version number\n" " --help Output usage string\n" " -o WAVEFILE Explicitly set output filename\n" " -f TEXTFILE Explicitly set input filename\n" " -t TEXT Explicitly set input textstring\n" " -p PHONES Explicitly set input textstring and synthesize as phones\n" " --set F=V Set feature (guesses type)\n" " -s F=V Set feature (guesses type)\n" " --seti F=V Set int feature\n" " --setf F=V Set float feature\n" " --sets F=V Set string feature\n" " -ssml Read input text/file in ssml mode\n" " -b Benchmark mode\n" " -l Loop endlessly\n" " -voice NAME Use voice NAME (NAME can be pathname/url to flitevox file)\n" " -voicedir NAME Directory containing (clunit) voice data\n" " -lv List voices available\n" " -add_lex FILENAME add lex addenda from FILENAME\n" " -pw Print words\n" " -ps Print segments\n" " -psdur Print segments and their durations (end-time)\n" " -pr RelName Print relation RelName\n" " -voicedump FILENAME Dump selected (cg) voice to FILENAME\n" " -v Verbose mode\n"); exit(0); } static void flite_voice_list_print(void) { cst_voice *voice; const cst_val *v; printf("Voices available: "); for (v=flite_voice_list; v; v=val_cdr(v)) { voice = val_voice(val_car(v)); printf("%s ",voice->name); } printf("\n"); return; } static cst_utterance *print_info(cst_utterance *u) { cst_item *item; const char *relname; int printEndTime = 0; int printStress = 0; relname = utt_feat_string(u,"print_info_relation"); if (cst_streq(relname, "SegmentEndTime")) { relname = "Segment"; printEndTime = 1; } if (cst_streq(relname, "SegmentStress")) { relname = "Segment"; printStress = 1; } for (item=relation_head(utt_relation(u,relname)); item; item=item_next(item)) { if (!printEndTime) printf("%s",item_feat_string(item,"name")); else printf("%s:%1.3f",item_feat_string(item,"name"), item_feat_float(item,"end")); if (printStress == 1) { if (cst_streq("+",ffeature_string(item,"ph_vc"))) printf("%s",ffeature_string(item,"R:SylStructure.parent.stress")); } printf(" "); } printf("\n"); return u; } static void ef_set(cst_features *f,const char *fv,const char *type) { /* set feature from fv (F=V), guesses type if not explicit type given */ const char *val; char *feat; const char *fname; if ((val = strchr(fv,'=')) == 0) { fprintf(stderr, "flite: can't find '=' in featval \"%s\", ignoring it\n", fv); } else { feat = cst_strdup(fv); feat[cst_strlen(fv)-cst_strlen(val)] = '\0'; fname=feat_own_string(f,feat); val = val+1; if ((type && cst_streq("int",type)) || ((type == 0) && (cst_regex_match(cst_rx_int,val)))) feat_set_int(f,fname,atoi(val)); else if ((type && cst_streq("float",type)) || ((type == 0) && (cst_regex_match(cst_rx_double,val)))) feat_set_float(f,fname,atof(val)); else feat_set_string(f,fname,val); cst_free(feat); } } int main(int argc, char **argv) { struct timeval tv; cst_voice *v; const char *filename; const char *outtype; cst_voice *desired_voice = 0; const char *voicedir = NULL; int i; float durs; double time_start, time_end; int flite_verbose, flite_loop, flite_bench; int explicit_filename, explicit_text, explicit_phones, ssml_mode; #define ITER_MAX 3 int bench_iter = 0; cst_features *extra_feats; const char *lex_addenda_file = NULL; const char *voicedumpfile = NULL; cst_audio_streaming_info *asi; filename = 0; outtype = "play"; /* default is to play */ flite_verbose = FALSE; flite_loop = FALSE; flite_bench = FALSE; explicit_text = explicit_filename = explicit_phones = FALSE; ssml_mode = FALSE; extra_feats = new_features(); flite_init(); flite_set_lang_list(); /* defined at compilation time */ for (i=1; i<argc; i++) { if (cst_streq(argv[i],"--version")) { flite_version(); return 1; } else if (cst_streq(argv[i],"-h") || cst_streq(argv[i],"--help") || cst_streq(argv[i],"-?")) flite_usage(); else if (cst_streq(argv[i],"-v")) flite_verbose = TRUE; else if (cst_streq(argv[i],"-lv")) { if (flite_voice_list == NULL) flite_set_voice_list(voicedir); flite_voice_list_print(); exit(0); } else if (cst_streq(argv[i],"-l")) flite_loop = TRUE; else if (cst_streq(argv[i],"-b")) { flite_bench = TRUE; break; /* ignore other arguments */ } else if ((cst_streq(argv[i],"-o")) && (i+1 < argc)) { outtype = argv[i+1]; i++; } else if ((cst_streq(argv[i],"-voice")) && (i+1 < argc)) { if (flite_voice_list == NULL) flite_set_voice_list(voicedir); desired_voice = flite_voice_select(argv[i+1]); i++; } else if ((cst_streq(argv[i],"-voicedir")) && (i+1 < argc)) { voicedir = argv[i+1]; if (flite_voice_list == NULL) flite_set_voice_list(voicedir); i++; } else if ((cst_streq(argv[i],"-add_lex")) && (i+1 < argc)) { lex_addenda_file = argv[i+1]; i++; } else if (cst_streq(argv[i],"-f") && (i+1 < argc)) { filename = argv[i+1]; explicit_filename = TRUE; i++; } else if (cst_streq(argv[i],"-pw")) { feat_set_string(extra_feats,"print_info_relation","Word"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-ps")) { feat_set_string(extra_feats,"print_info_relation","Segment"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-psdur")) { // Added by AUP Mar 2013 for extracting durations (end-time) of segments // (useful in talking heads, etc.) feat_set_string(extra_feats,"print_info_relation","SegmentEndTime"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-psstress")) { feat_set_string(extra_feats,"print_info_relation","SegmentStress"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-ssml")) { ssml_mode = TRUE; } else if (cst_streq(argv[i],"-pr") && (i+1 < argc)) { feat_set_string(extra_feats,"print_info_relation",argv[i+1]); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); i++; } else if (cst_streq(argv[i],"-voicedump") && (i+1 < argc)) { voicedumpfile = argv[i+1]; i++; } else if ((cst_streq(argv[i],"-set") || cst_streq(argv[i],"-s")) && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],0); i++; } else if (cst_streq(argv[i],"--seti") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"int"); i++; } else if (cst_streq(argv[i],"--setf") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"float"); i++; } else if (cst_streq(argv[i],"--sets") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"string"); i++; } else if (cst_streq(argv[i],"-p") && (i+1 < argc)) { filename = argv[i+1]; explicit_phones = TRUE; i++; } else if (cst_streq(argv[i],"-t") && (i+1 < argc)) { filename = argv[i+1]; explicit_text = TRUE; i++; } else if (filename) outtype = argv[i]; else filename = argv[i]; } if (filename == NULL) filename = "-"; /* stdin */ if (flite_voice_list == NULL) flite_set_voice_list(voicedir); if (desired_voice == 0) desired_voice = flite_voice_select(NULL); v = desired_voice; feat_copy_into(extra_feats,v->features); durs = 0.0; if (voicedumpfile != NULL) { flite_voice_dump(v,voicedumpfile); exit(0); } if (lex_addenda_file) flite_voice_add_lex_addenda(v,lex_addenda_file); if (cst_streq("stream",outtype)) { asi = new_audio_streaming_info(); asi->asc = audio_stream_chunk; feat_set(v->features,"streaming_info",audio_streaming_info_val(asi)); } if (flite_bench) { outtype = "none"; filename = "A whole joy was reaping, but they've gone south, you should fetch azure mike."; explicit_text = TRUE; } loop: gettimeofday(&tv,NULL); time_start = (double)(tv.tv_sec)+(((double)tv.tv_usec)/1000000.0); if (explicit_phones) durs = flite_phones_to_speech(filename,v,outtype); else if ((strchr(filename,' ') && !explicit_filename) || explicit_text) { if (ssml_mode) durs = flite_ssml_text_to_speech(filename,v,outtype); else durs = flite_text_to_speech(filename,v,outtype); } else { if (ssml_mode) durs = flite_ssml_file_to_speech(filename,v,outtype); else durs = flite_file_to_speech(filename,v,outtype); } gettimeofday(&tv,NULL); time_end = ((double)(tv.tv_sec))+((double)tv.tv_usec/1000000.0); if (flite_verbose || (flite_bench && bench_iter == ITER_MAX)) printf("times faster than real-time: %f\n(%f seconds of speech synthesized in %f)\n", durs/(float)(time_end-time_start), durs, (float)(time_end-time_start)); if (flite_loop || (flite_bench && bench_iter++ < ITER_MAX)) goto loop; delete_features(extra_feats); delete_val(flite_voice_list); flite_voice_list=0; /* cst_alloc_debug_summary(); */ return 0; }