you can use the code below to efficiently convert any length of text into audio with pico2wave; the script splits the input text into segments of maximum length 3500words, then converts each segment into audio, finally the segments are concatenated into the final audio.mp3 file. Enjoy!! (Note: you should follow up the requirements of the pyrubberband lib especially if you want to tune tune the output by stretching, pitch-shifting,....)
import os
import subprocess
from pydub import AudioSegment
import soundfile as sf
import pyrubberband as pyrb
import subprocess
# Not the requirements of pyrb (sudo apt install libttspico-utils sox)
# pip install numpy Audiosegment pysox sox sndfile
def process_text_segment(text_segment, output_folder, segment_index):
# Use pico2wave for audio generation
wav_output_path = os.path.join(output_folder, f"audio{segment_index}.wav")
wav_output_path1 = os.path.join(output_folder, f"audio{segment_index}.wav")
subprocess.run(['pico2wave', '-w', wav_output_path, '-l', 'en-GB', text_segment])
# Use pydub for audio processing
audio = AudioSegment.from_wav(wav_output_path)
duration = audio.duration_seconds
# Additional processing steps if needed
y, sr = sf.read(wav_output_path)
# Play back at extra low speed
y_stretch = pyrb.time_stretch(y, sr, 0.73)
sf.write(wav_output_path1, y_stretch, sr, format='wav')
# Play back extra low tones
y, sr = sf.read(wav_output_path1)
y_shift = pyrb.pitch_shift(y, sr, 1.5)
sf.write(wav_output_path1, y_shift, sr, format='wav')
sound = AudioSegment.from_wav(wav_output_path1)
sound = sound + 12
# Convert the processed audio to MP3
mp3_output_path = os.path.join(output_folder, f"audio{segment_index}.mp3")
sound.export(mp3_output_path, format="mp3")
# Clean up temporary WAV files
os.remove(wav_output_path)
os.remove(wav_output_path1)
return mp3_output_path
def concatenate_audio_segments(output_folder, segment_count, mp3_output_path):
# Concatenate audio segments
segments = [AudioSegment.from_mp3(os.path.join(output_folder, f"audio{i}.mp3")) for i in range(1, segment_count + 1)]
final_audio = sum(segments)
# Export the final concatenated audio to MP3
final_audio.export(mp3_output_path, format="mp3")
# Clean up intermediate audio segments
for i in range(1, segment_count + 1):
os.remove(os.path.join(output_folder, f"audio{i}.mp3"))
def process_large_text(input_file_path, output_folder):
with open(input_file_path, "r") as file:
text_content = file.read()
# Split text content into segments of maximum 3500 words
word_limit = 3500
text_segments = [text_content[i:i+word_limit] for i in range(0, len(text_content), word_limit)]
segment_count = len(text_segments)
for i, text_segment in enumerate(text_segments, start=1):
process_text_segment(text_segment, output_folder, i)
# Concatenate audio segments
mp3_output_path = os.path.join(output_folder, "audio.mp3")
concatenate_audio_segments(output_folder, segment_count, mp3_output_path)
print(f"Audio generated and saved to {mp3_output_path}")
# Example usage:
input_file_path = "path/to/your/input/file.txt"
output_folder = "path/to/your/output/folder"
process_large_text(input_file_path, output_folder)
cat Why-Did-John-Nash-Stop-His-Medication.txt | pico2wave --lang=en_GB --wave=Why-Did-John-Nash-Stop-His-Medication.wav
? – Jos Dec 05 '22 at 13:12txt
file contains non-ASCII characters? – Jos Dec 05 '22 at 13:39