From c25be210c894cd79cbe8d40b33c73c154ab39eed Mon Sep 17 00:00:00 2001 From: Andy Date: Sun, 17 Aug 2025 08:51:53 -0400 Subject: [PATCH] first commit --- .gitignore | 49 +++++++++++++++++ README.md | 0 podcast-tts-generator/README.md | 52 +++++++++++++++++++ .../prompts/sample_prompt.txt | 3 ++ podcast-tts-generator/src/chatgpt_api.py | 17 ++++++ podcast-tts-generator/src/main.py | 33 ++++++++++++ .../src/outline_generator.py | 7 +++ podcast-tts-generator/src/script_creator.py | 4 ++ podcast-tts-generator/src/tts_engine.py | 16 ++++++ podcast-tts-generator/src/utils.py | 7 +++ prompts/sample_prompt.txt | 1 + requirements.txt | 4 ++ src/chatgpt_api.py | 15 ++++++ src/main.py | 34 ++++++++++++ src/script_creator.py | 15 ++++++ src/tts_engine.py | 5 ++ src/utils.py | 1 + 17 files changed, 263 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 podcast-tts-generator/README.md create mode 100644 podcast-tts-generator/prompts/sample_prompt.txt create mode 100644 podcast-tts-generator/src/chatgpt_api.py create mode 100644 podcast-tts-generator/src/main.py create mode 100644 podcast-tts-generator/src/outline_generator.py create mode 100644 podcast-tts-generator/src/script_creator.py create mode 100644 podcast-tts-generator/src/tts_engine.py create mode 100644 podcast-tts-generator/src/utils.py create mode 100644 prompts/sample_prompt.txt create mode 100644 requirements.txt create mode 100644 src/chatgpt_api.py create mode 100644 src/main.py create mode 100644 src/script_creator.py create mode 100644 src/tts_engine.py create mode 100644 src/utils.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..554b577 --- /dev/null +++ b/.gitignore @@ -0,0 +1,49 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +.venv/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Jupyter Notebook checkpoints +.ipynb_checkpoints + +# VS Code settings +.vscode/ + +# Output folders +outputs/ + +# OS files +.DS_Store +Thumbs.db + +# API keys and environment variables +.env +.env.* diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/podcast-tts-generator/README.md b/podcast-tts-generator/README.md new file mode 100644 index 0000000..0c0ad58 --- /dev/null +++ b/podcast-tts-generator/README.md @@ -0,0 +1,52 @@ +# Podcast TTS Generator + +## Overview +The Podcast TTS Generator is a Python application that automates the process of generating text-to-speech output for podcasts. It takes prompts from text files, generates detailed outlines using the ChatGPT API, creates scripts from those outlines, and finally synthesizes speech to produce audio files. + +## Project Structure +``` +podcast-tts-generator +├── src +│ ├── main.py # Entry point of the application +│ ├── chatgpt_api.py # Functions to interact with the ChatGPT API +│ ├── outline_generator.py # Class to create outlines from prompts +│ ├── script_creator.py # Class to generate scripts from outlines +│ ├── tts_engine.py # Class to synthesize speech from scripts +│ └── utils.py # Utility functions for file handling +├── prompts +│ └── sample_prompt.txt # Sample text prompt for testing +├── outputs +│ ├── outlines # Directory for generated outlines +│ ├── scripts # Directory for generated scripts +│ └── audio # Directory for generated audio files +├── requirements.txt # Project dependencies +└── README.md # Project documentation +``` + +## Installation +1. Clone the repository: + ``` + git clone + cd podcast-tts-generator + ``` + +2. Install the required dependencies: + ``` + pip install -r requirements.txt + ``` + +## Usage +1. Place your text prompts in the `prompts` directory. You can use the provided `sample_prompt.txt` as a template. + +2. Run the application: + ``` + python src/main.py + ``` + +3. The generated outlines, scripts, and audio files will be saved in the respective directories under `outputs`. + +## Contributing +Contributions are welcome! Please feel free to submit a pull request or open an issue for any enhancements or bug fixes. + +## License +This project is licensed under the MIT License. See the LICENSE file for more details. \ No newline at end of file diff --git a/podcast-tts-generator/prompts/sample_prompt.txt b/podcast-tts-generator/prompts/sample_prompt.txt new file mode 100644 index 0000000..f4a63be --- /dev/null +++ b/podcast-tts-generator/prompts/sample_prompt.txt @@ -0,0 +1,3 @@ +This file contains a sample text prompt that will be used to test the functionality of the application. + +Welcome to our podcast! In this episode, we will explore the fascinating world of artificial intelligence and its impact on our daily lives. We'll discuss the latest advancements, ethical considerations, and what the future holds for AI technology. Join us as we dive deep into this exciting topic! \ No newline at end of file diff --git a/podcast-tts-generator/src/chatgpt_api.py b/podcast-tts-generator/src/chatgpt_api.py new file mode 100644 index 0000000..4ee48f9 --- /dev/null +++ b/podcast-tts-generator/src/chatgpt_api.py @@ -0,0 +1,17 @@ +import openai +import os + +# Load your OpenAI API key from an environment variable or directly +API_KEY = os.getenv("OPENAI_API_KEY") + +openai.api_key = API_KEY + +def generate_outline(prompt: str) -> str: + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "user", "content": prompt} + ] + ) + outline = response.choices[0].message['content'] + return outline.strip() \ No newline at end of file diff --git a/podcast-tts-generator/src/main.py b/podcast-tts-generator/src/main.py new file mode 100644 index 0000000..e388db4 --- /dev/null +++ b/podcast-tts-generator/src/main.py @@ -0,0 +1,33 @@ +import os +from chatgpt_api import generate_outline +from outline_generator import OutlineGenerator +from script_creator import ScriptCreator +from tts_engine import TTSEngine +from utils import read_prompt, save_output + +def main(): + # Define paths + prompt_file_path = 'prompts/sample_prompt.txt' + outline_output_path = 'outputs/outlines/outline.txt' + script_output_path = 'outputs/scripts/script.txt' + audio_output_path = 'outputs/audio/output.mp3' + + # Read prompt from file + prompt = read_prompt(prompt_file_path) + + # Generate outline + outline_generator = OutlineGenerator() + outline = outline_generator.create_outline(prompt) + save_output(outline_output_path, outline) + + # Generate script from outline + script_creator = ScriptCreator() + script = script_creator.generate_script(outline) + save_output(script_output_path, script) + + # Convert script to speech + tts_engine = TTSEngine() + tts_engine.synthesize_speech(script, audio_output_path) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/podcast-tts-generator/src/outline_generator.py b/podcast-tts-generator/src/outline_generator.py new file mode 100644 index 0000000..20c7b01 --- /dev/null +++ b/podcast-tts-generator/src/outline_generator.py @@ -0,0 +1,7 @@ +class OutlineGenerator: + def __init__(self, chatgpt_api): + self.chatgpt_api = chatgpt_api + + def create_outline(self, prompt: str) -> str: + outline = self.chatgpt_api.generate_outline(prompt) + return outline \ No newline at end of file diff --git a/podcast-tts-generator/src/script_creator.py b/podcast-tts-generator/src/script_creator.py new file mode 100644 index 0000000..3f5db5b --- /dev/null +++ b/podcast-tts-generator/src/script_creator.py @@ -0,0 +1,4 @@ +class ScriptCreator: + def generate_script(self, outline: str) -> str: + script = f"Welcome to our podcast! Today, we will discuss the following topics:\n\n{outline}\n\nThank you for listening!" + return script \ No newline at end of file diff --git a/podcast-tts-generator/src/tts_engine.py b/podcast-tts-generator/src/tts_engine.py new file mode 100644 index 0000000..f5c21e0 --- /dev/null +++ b/podcast-tts-generator/src/tts_engine.py @@ -0,0 +1,16 @@ +class TTSEngine: + def synthesize_speech(self, script: str, output_file: str) -> None: + import pyttsx3 + + # Initialize the text-to-speech engine + engine = pyttsx3.init() + + # Set properties before adding anything to speak + engine.setProperty('rate', 150) # Speed percent (can go over 100) + engine.setProperty('volume', 1) # Volume 0-1 + + # Save the speech to a file + engine.save_to_file(script, output_file) + + # Wait for the speech to finish + engine.runAndWait() \ No newline at end of file diff --git a/podcast-tts-generator/src/utils.py b/podcast-tts-generator/src/utils.py new file mode 100644 index 0000000..3d89eff --- /dev/null +++ b/podcast-tts-generator/src/utils.py @@ -0,0 +1,7 @@ +def read_prompt(file_path: str) -> str: + with open(file_path, 'r') as file: + return file.read() + +def save_output(file_path: str, content: str) -> None: + with open(file_path, 'w') as file: + file.write(content) \ No newline at end of file diff --git a/prompts/sample_prompt.txt b/prompts/sample_prompt.txt new file mode 100644 index 0000000..ca57e38 --- /dev/null +++ b/prompts/sample_prompt.txt @@ -0,0 +1 @@ +Explain the Physical Layer (Layer 1) of the OSI Model in detail. Include its purpose, examples of technologies that operate at this layer, the common challenges or misunderstandings, what devices might be used at this layer and lastly, what tools, if any would be used to troubleshoot this layer diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e5f84c1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +openai>=1.0.0 +gTTS==2.2.3 +pydub==0.25.1 +numpy>=1.23,<2.0 diff --git a/src/chatgpt_api.py b/src/chatgpt_api.py new file mode 100644 index 0000000..bbc7eb3 --- /dev/null +++ b/src/chatgpt_api.py @@ -0,0 +1,15 @@ +import openai +import os + +def generate_outline(prompt: str) -> str: + openai.api_key = os.getenv('OPENAI_API_KEY') + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": "You are a helpful assistant that creates detailed podcast outlines."}, + {"role": "user", "content": f"Create a detailed outline for a podcast episode based on this prompt: {prompt}"} + ], + max_tokens=500 + ) + outline = response.choices[0].message['content'].strip() + return outline diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..dc3c5ad --- /dev/null +++ b/src/main.py @@ -0,0 +1,34 @@ +import os +from src.chatgpt_api import generate_outline +from src.script_creator import generate_script +from src.tts_engine import text_to_speech + +PROMPT_PATH = 'prompts/sample_prompt.txt' +OUTLINE_PATH = 'outputs/outlines/outline.txt' +SCRIPT_PATH = 'outputs/scripts/script.txt' +AUDIO_PATH = 'outputs/audio/podcast.mp3' + +def main(): + # Step 1: Read prompt + with open(PROMPT_PATH, 'r', encoding='utf-8') as f: + prompt = f.read() + + # Step 2: Generate outline + outline = generate_outline(prompt) + os.makedirs(os.path.dirname(OUTLINE_PATH), exist_ok=True) + with open(OUTLINE_PATH, 'w', encoding='utf-8') as f: + f.write(outline) + + # Step 3: Generate script + script = generate_script(outline) + os.makedirs(os.path.dirname(SCRIPT_PATH), exist_ok=True) + with open(SCRIPT_PATH, 'w', encoding='utf-8') as f: + f.write(script) + + # Step 4: Generate audio + os.makedirs(os.path.dirname(AUDIO_PATH), exist_ok=True) + text_to_speech(script, AUDIO_PATH) + print(f'Podcast audio saved to {AUDIO_PATH}') + +if __name__ == '__main__': + main() diff --git a/src/script_creator.py b/src/script_creator.py new file mode 100644 index 0000000..213ef70 --- /dev/null +++ b/src/script_creator.py @@ -0,0 +1,15 @@ +import openai +import os + +def generate_script(outline: str) -> str: + openai.api_key = os.getenv('OPENAI_API_KEY') + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": "You are a helpful assistant that writes podcast scripts based on outlines."}, + {"role": "user", "content": f"Write a detailed podcast script based on this outline: {outline}"} + ], + max_tokens=1500 + ) + script = response.choices[0].message['content'].strip() + return script diff --git a/src/tts_engine.py b/src/tts_engine.py new file mode 100644 index 0000000..ed3b621 --- /dev/null +++ b/src/tts_engine.py @@ -0,0 +1,5 @@ +from gtts import gTTS + +def text_to_speech(text: str, output_path: str): + tts = gTTS(text) + tts.save(output_path) diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000..9506290 --- /dev/null +++ b/src/utils.py @@ -0,0 +1 @@ +# Utility functions can be added here as needed