Path: blob/master/examples/cppwin/TensorflowTTSCppInference/TensorflowTTSCppInference.cpp
1559 views
1#include <iostream>2#include "Voice.h"3#define LOGF(txt) std::cout << txt << "\n"4#include "phonemizer.h"5#include "ext/ZCharScanner.h"6#include <algorithm>7#include <cctype>8#include <string>9#include "ext/cxxopts.hpp"1011std::vector<std::string> GetTxtFile(const std::string& InFn) {1213std::vector<std::string> Ret;14std::ifstream InFile(InFn);1516if (!InFile.good())17return Ret;181920std::string Line;21while (std::getline(InFile, Line))22{23Ret.push_back(Line);24252627}28InFile.close();2930return Ret;3132333435}3637std::vector<std::string> SuperWordSplit(const std::string& InStr, int MaxLen)38{39ZStringDelimiter Del1(InStr);40Del1.AddDelimiter(" ");4142std::vector<std::string> RawWords = Del1.GetTokens();43int AmtWords = RawWords.size();4445int Idx = 0;46std::string CurrentStr = "";4748std::vector<std::string> SplitStrs;4950while (Idx < AmtWords)51{52if (CurrentStr.size() > 0)53CurrentStr.append(" ");5455std::string CuWord = RawWords[Idx];56// phonetic input has to be uppercase57if (CuWord.find("@") == std::string::npos)58{59std::transform(CuWord.begin(), CuWord.end(), CuWord.begin(),60[](unsigned char c) { return std::tolower(c); });61}626364CurrentStr.append(CuWord);6566if (CurrentStr.length() > MaxLen) {67SplitStrs.push_back(CurrentStr);68CurrentStr = "";6970}717273Idx += 1;7475// Add the last string76if (Idx == AmtWords)77SplitStrs.push_back(CurrentStr);78798081828384}8586return SplitStrs;8788}8990int main(int argc, char* argv[])91{92cxxopts::Options options("TFTTSInfer", "Inference with TensorflowTTS models in command line");93options.add_options()94("v,voice", "Path to the voice folder", cxxopts::value<std::string>()->default_value("LJ")) // a bool parameter95("l,language", "Path to the language folder for G2P", cxxopts::value<std::string>()->default_value("g2p/English"))96("o,output", "Name of .wav file output of all infers", cxxopts::value<std::string>()->default_value("AllAud.wav"))97("m,maxlen", "Optional, max length of split for TTS. Default is 180", cxxopts::value<int>()->default_value("180"))98;99100auto Args = options.parse(argc, argv);101102std::string Name = Args["voice"].as<std::string>();103std::string Lang = Args["language"].as<std::string>();104std::string OutputFileName = Args["output"].as<std::string>();105int MaxLen = Args["maxlen"].as<int>();106107if (OutputFileName.find(".wav") == std::string::npos)108OutputFileName += ".wav";109110111112LOGF("Loading voice...");113114// Load phonemizer115Phonemizer StdPh;116117bool G2pInit = StdPh.Initialize(Lang);118if (!G2pInit) {119LOGF("Could not initialize language and/or G2P model! See if the path is correct and try again!");120return -2;121122}123124// Load the voice itself125Voice CurrentVox(Name,Name,&StdPh);126std::vector<float> AllAud;127128// Begin interactive console129bool Running = true;130while (Running)131{132std::string Prompt = "";133134LOGF("Type a prompt, or type EXIT to exit ");135136std::getline(std::cin, Prompt);137if (Prompt == "EXIT") {138Running = false;139break;140}141std::vector<float> Audata;142143// Split the prompt into chunks (if the user inputs like that)144for (const auto& Spli : SuperWordSplit(Prompt, MaxLen)) {145std::vector<float> ImmediateAudata = CurrentVox.Vocalize(Prompt + CurrentVox.GetInfo().EndPadding);146// Insert the audio data to the end of the mid-level audata vector147Audata.insert(Audata.end(), ImmediateAudata.begin(), ImmediateAudata.end());148149150}151152153154155std::string Filename = Prompt.substr(0, std::min(16, (int)Prompt.size())) + ".wav";156157VoxUtil::ExportWAV(Filename, Audata, CurrentVox.GetInfo().SampleRate);158159// Insert the audio into the AllAud vector160AllAud.insert(AllAud.end(), Audata.begin(), Audata.end());161162LOGF("Saved to " + Filename);163164165166167}168169170// Export all the audio171VoxUtil::ExportWAV(OutputFileName, AllAud, CurrentVox.GetInfo().SampleRate);172LOGF("Saved ALL to " + OutputFileName);173174std::cout << "Hello TensorflowTTS!\n";175return 0;176177}178179180