inital ai slop code

This commit is contained in:
2025-08-08 18:04:01 +02:00
parent a31d79bb09
commit 512521e9d0
15 changed files with 1855 additions and 0 deletions

6
.gitignore vendored Normal file
View File

@@ -0,0 +1,6 @@
node_modules/
.env
stories/sample_story/

1266
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

37
package.json Normal file
View File

@@ -0,0 +1,37 @@
{
"name": "project-noctivus",
"version": "1.0.0",
"description": "An orchestrator for creating audiobooks from text.",
"type": "module",
"main": "dist/orchestrator.js",
"scripts": {
"start": "ts-node src/orchestrator.ts",
"build": "tsc -p tsconfig.json",
"start:prod": "node dist/orchestrator.js",
"lint": "eslint 'src/**/*.{ts,tsx}' --max-warnings=0",
"format": "prettier --write ."
},
"dependencies": {
"dotenv": "^16.4.5",
"ffmpeg-static": "^5.2.0",
"ffprobe-static": "^3.1.0",
"googleapis": "^140.0.0",
"js-yaml": "^4.1.0",
"openai": "^4.47.1",
"p-limit": "^6.1.0",
"zod": "^3.23.8"
},
"devDependencies": {
"@types/js-yaml": "^4.0.9",
"@types/node": "^20.12.12",
"eslint": "^9.7.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-import": "^2.29.1",
"prettier": "^3.3.3",
"ts-node": "^10.9.2",
"typescript": "^5.4.5"
},
"engines": {
"node": ">=18.18"
}
}

120
src/audio.ts Normal file
View File

@@ -0,0 +1,120 @@
import { spawn } from "child_process";
import { StoryConfig } from "./config";
import * as path from "path";
const ffmpeg = require("ffmpeg-static");
const ffprobe = require("ffprobe-static");
export function getDuration(file: string): Promise<number> {
const ffprobePath = ffprobe.path;
const args = ["-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", file];
const ffprobeProcess = spawn(ffprobePath, args);
let duration = "";
ffprobeProcess.stdout.on("data", (data: any) => {
duration += data.toString();
});
return new Promise<number>((resolve, reject) => {
ffprobeProcess.on("close", (code: any) => {
if (code === 0) {
resolve(parseFloat(duration));
} else {
reject(new Error(`ffprobe process exited with code ${code}`));
}
});
});
}
export async function getChunkDuration(chunkPath: string): Promise<number> {
return getDuration(chunkPath);
}
export async function generateSilence(duration: number, outputFile: string): Promise<void> {
const ffmpegPath = ffmpeg;
const args = ["-y", "-f", "lavfi", "-i", `anullsrc=r=44100:cl=stereo:d=${duration}`, outputFile];
const ffmpegProcess = spawn(ffmpegPath, args);
ffmpegProcess.stdout.on("data", (data: any) => {
console.log(`stdout: ${data}`);
});
ffmpegProcess.stderr.on("data", (data: any) => {
console.error(`stderr: ${data}`);
});
return new Promise<void>((resolve, reject) => {
ffmpegProcess.on("close", (code: any) => {
if (code === 0) {
resolve();
} else {
reject(new Error(`ffmpeg process exited with code ${code}`));
}
});
});
}
export async function combineAudio(storyName: string, storyConfig: StoryConfig, audioFiles: string[]): Promise<void> {
const introFile = path.resolve("stories", storyName, storyConfig.config.intro_audio_file);
const outroFile = path.resolve("stories", storyName, storyConfig.config.outro_audio_file);
const tempAudioFile = path.resolve("stories", storyName, "final_audio", "temp.mp3");
const finalAudioFile = path.resolve("stories", storyName, "final_audio", "final.mp3");
const backgroundMusicFile = path.resolve("stories", storyName, storyConfig.config.background_music_file);
// First, concatenate the main audio files
const allFiles = [introFile, ...audioFiles.map((f) => path.resolve(f)), outroFile];
const fileList = allFiles.map((f) => `file '${f.replace(/'/g, "'\\''")}'`).join("\n");
const listFile = path.resolve("stories", storyName, "filelist.txt");
require("fs").writeFileSync(listFile, fileList);
const concatArgs = ["-y", "-f", "concat", "-safe", "0", "-i", listFile, "-c", "copy", tempAudioFile];
const concatProcess = spawn(ffmpeg, concatArgs);
await new Promise<void>((resolve, reject) => {
concatProcess.on("close", (code: any) => {
if (code === 0) {
resolve();
} else {
reject(new Error(`ffmpeg concat process exited with code ${code}`));
}
});
});
// Then, get the duration of the concatenated audio
const duration = await getDuration(tempAudioFile);
// Generate silence for the background track
await generateSilence(duration, backgroundMusicFile);
// Finally, mix the main audio with the background music
const mixArgs = [
"-y",
"-i",
tempAudioFile,
"-i",
backgroundMusicFile,
"-filter_complex",
"[0:a][1:a]amerge=inputs=2[a]",
"-map",
"[a]",
"-c:a",
"libmp3lame",
"-q:a",
"4",
finalAudioFile,
];
const mixProcess = spawn(ffmpeg, mixArgs);
return new Promise<void>((resolve, reject) => {
mixProcess.on("close", (code: any) => {
if (code === 0) {
resolve();
} else {
reject(new Error(`ffmpeg mix process exited with code ${code}`));
}
});
});
}

11
src/chunker.ts Normal file
View File

@@ -0,0 +1,11 @@
export function chunkText(text: string, chunkSize: number): string[] {
const words = text.split(/\s+/);
const chunks: string[] = [];
for (let i = 0; i < words.length; i += chunkSize) {
const chunk = words.slice(i, i + chunkSize).join(" ");
chunks.push(chunk);
}
return chunks;
}

36
src/config.ts Normal file
View File

@@ -0,0 +1,36 @@
import * as yaml from "js-yaml";
import * as fs from "fs";
import * as path from "path";
export interface StoryConfig {
metadata: {
title: string;
author: string;
publication_year: number;
public_domain_proof_url: string;
reading_level: string;
};
config: {
chunk_size: number;
tts_voice_id: string;
tts_instructions: string;
image_style_prompts: string;
intro_audio_file: string;
outro_audio_file: string;
background_music_file: string;
export_settings: {
format: string;
resolution: string;
};
};
}
export function loadStoryConfig(storyName: string): StoryConfig {
const configPath = path.join("stories", storyName, "config.yaml");
if (!fs.existsSync(configPath)) {
throw new Error(`Configuration file not found for story: ${storyName}`);
}
const fileContents = fs.readFileSync(configPath, "utf8");
return yaml.load(fileContents) as StoryConfig;
}

37
src/images.ts Normal file
View File

@@ -0,0 +1,37 @@
import OpenAI from "openai";
import * as fs from "fs";
import * as path from "path";
import { StoryConfig } from "./config";
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
export async function generateImage(
storyName: string,
storyConfig: StoryConfig,
chunk: string,
chunkIndex: number,
imageIndex: number
): Promise<string> {
const imagePath = path.join("stories", storyName, "images", `chunk_${chunkIndex}_img${imageIndex}.png`);
const prompt = "A cartoon cat.";
const response = await openai.images.generate({
model: "dall-e-3", // Downgrading to dall-e-3 as gpt-image-1 is not available
prompt,
n: 1,
size: "1024x1024",
response_format: "b64_json",
});
if (!response.data?.[0].b64_json) {
throw new Error("Image data not found in response");
}
const imageBase64 = response.data[0].b64_json;
const imageBuffer = Buffer.from(imageBase64, "base64");
fs.writeFileSync(imagePath, imageBuffer);
return imagePath;
}

95
src/orchestrator.ts Normal file
View File

@@ -0,0 +1,95 @@
import * as dotenv from "dotenv";
dotenv.config();
import { loadStoryConfig } from "./config";
import { validatePublicDomain } from "./validator";
import { sanitizeText } from "./sanitizer";
import { chunkText } from "./chunker";
import { generateAudio, generateSingleAudio } from "./tts";
import { combineAudio, getChunkDuration } from "./audio";
import { generateImage } from "./images";
import { createVideo } from "./video";
import { createSrt } from "./subtitles";
import { generateYouTubeMetadata, uploadToYouTube } from "./uploader";
import * as path from "path";
async function main() {
const storyName = process.argv[2];
if (!storyName) {
console.error("Please provide a story name.");
process.exit(1);
}
console.log(`Starting pipeline for story: ${storyName}`);
const storyConfig = loadStoryConfig(storyName);
console.log("Story configuration:");
console.log(storyConfig);
const validationResult = validatePublicDomain(storyConfig);
console.log(`Public domain status: ${validationResult.message}`);
if (!validationResult.is_public_domain) {
process.exit(1);
}
console.log("Sanitizing text...");
const sanitizedText = sanitizeText(storyName);
console.log("Sanitized text:");
console.log(sanitizedText);
console.log("Chunking text...");
const chunks = chunkText(sanitizedText, storyConfig.config.chunk_size);
console.log("Text chunks:");
console.log(chunks);
console.log("Generating intro/outro audio...");
const introFile = path.join("stories", storyName, storyConfig.config.intro_audio_file);
const outroFile = path.join("stories", storyName, storyConfig.config.outro_audio_file);
await generateSingleAudio(storyConfig, "This is the intro.", introFile);
await generateSingleAudio(storyConfig, "This is the outro.", outroFile);
console.log("Generated intro/outro audio successfully.");
console.log("Generating audio...");
const audioFiles: string[] = [];
const chunkDurations: number[] = [];
for (let i = 0; i < chunks.length; i++) {
console.log(`Generating audio for chunk ${i}...`);
const audioFile = await generateAudio(storyConfig, storyName, chunks[i], i);
audioFiles.push(audioFile);
const duration = await getChunkDuration(audioFile);
chunkDurations.push(duration);
console.log(`Generated audio file: ${audioFile}, duration: ${duration}`);
}
console.log("Generating images...");
const imageFiles: string[] = [];
for (let i = 0; i < chunks.length; i++) {
console.log(`Generating image for chunk ${i}...`);
const imageFile = await generateImage(storyName, storyConfig, chunks[i], i, 0);
imageFiles.push(imageFile);
console.log(`Generated image file: ${imageFile}`);
}
console.log("Creating subtitles...");
const srtPath = createSrt(storyName, chunks, chunkDurations);
console.log(`Created subtitles file: ${srtPath}`);
console.log("Combining audio files...");
await combineAudio(storyName, storyConfig, audioFiles);
console.log("Combined audio files successfully.");
console.log("Creating video...");
await createVideo(storyName, storyConfig, imageFiles, chunkDurations, srtPath);
console.log("Created video successfully.");
console.log("Generating YouTube metadata...");
const metadata = generateYouTubeMetadata(storyConfig);
console.log("YouTube metadata:");
console.log(metadata);
console.log("Uploading to YouTube...");
const videoPath = path.resolve("stories", storyName, "video", "final.mp4");
await uploadToYouTube(videoPath, metadata);
console.log("YouTube upload placeholder complete.");
}
main();

20
src/sanitizer.ts Normal file
View File

@@ -0,0 +1,20 @@
import * as fs from "fs";
import * as path from "path";
const FORBIDDEN_WORDS = ["darn", "heck", "gosh"]; // Example list
export function sanitizeText(storyName: string): string {
const sourcePath = path.join("stories", storyName, "source.txt");
if (!fs.existsSync(sourcePath)) {
throw new Error(`Source text not found for story: ${storyName}`);
}
let text = fs.readFileSync(sourcePath, "utf8");
FORBIDDEN_WORDS.forEach((word) => {
const regex = new RegExp(`\\b${word}\\b`, "gi");
text = text.replace(regex, "***");
});
return text;
}

31
src/subtitles.ts Normal file
View File

@@ -0,0 +1,31 @@
import * as fs from "fs";
import * as path from "path";
function toSrtTime(seconds: number): string {
const date = new Date(0);
date.setSeconds(seconds);
const timeString = date.toISOString().substr(11, 12);
return timeString.replace(".", ",");
}
export function createSrt(storyName: string, chunks: string[], chunkDurations: number[]): string {
const srtPath = path.resolve("stories", storyName, "subtitles.srt");
let srtContent = "";
let currentTime = 0;
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
const duration = chunkDurations[i];
const startTime = toSrtTime(currentTime);
const endTime = toSrtTime(currentTime + duration);
srtContent += `${i + 1}\n`;
srtContent += `${startTime} --> ${endTime}\n`;
srtContent += `${chunk}\n\n`;
currentTime += duration;
}
fs.writeFileSync(srtPath, srtContent);
return srtPath;
}

43
src/tts.ts Normal file
View File

@@ -0,0 +1,43 @@
import OpenAI from "openai";
import * as fs from "fs";
import * as path from "path";
import { StoryConfig } from "./config";
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
export async function generateAudio(
storyConfig: StoryConfig,
storyName: string,
chunk: string,
index: number
): Promise<string> {
const speechFile = path.join("stories", storyName, "audio", `chunk_${index}.mp3`);
const mp3 = await openai.audio.speech.create({
model: "gpt-4o-mini-tts",
voice: storyConfig.config.tts_voice_id as any,
input: chunk,
instructions: storyConfig.config.tts_instructions,
});
const buffer = Buffer.from(await mp3.arrayBuffer());
await fs.promises.writeFile(speechFile, buffer);
return speechFile;
}
export async function generateSingleAudio(storyConfig: StoryConfig, text: string, outputFile: string): Promise<string> {
const mp3 = await openai.audio.speech.create({
model: "gpt-4o-mini-tts",
voice: storyConfig.config.tts_voice_id as any,
input: text,
instructions: storyConfig.config.tts_instructions,
});
const buffer = Buffer.from(await mp3.arrayBuffer());
await fs.promises.writeFile(outputFile, buffer);
return outputFile;
}

37
src/uploader.ts Normal file
View File

@@ -0,0 +1,37 @@
import { StoryConfig } from "./config";
import { google } from "googleapis";
export interface YouTubeMetadata {
title: string;
description: string;
tags: string[];
}
export function generateYouTubeMetadata(storyConfig: StoryConfig): YouTubeMetadata {
const title = `Bedtime Story • ${storyConfig.metadata.title} (Clean Kids Version) | Read-Along Text + AI Voice`;
const description = `A kid-friendly, clean version of the classic story, "${storyConfig.metadata.title}" by ${storyConfig.metadata.author}. This read-aloud audiobook features an AI-generated voice and illustrations, with on-screen text to help children read along.
This content is made for kids and is COPPA-compliant.
Public domain proof: ${storyConfig.metadata.public_domain_proof_url}`;
const tags = [
"bedtime story",
"read along",
"audiobook",
"kids story",
storyConfig.metadata.title,
storyConfig.metadata.author,
"clean version",
"AI voice",
"AI generated",
];
return { title, description, tags };
}
export async function uploadToYouTube(videoPath: string, metadata: YouTubeMetadata): Promise<void> {
console.log("Uploading to YouTube...");
console.log("Video path:", videoPath);
console.log("Metadata:", metadata);
console.log("This is a placeholder. YouTube upload is not yet implemented.");
}

31
src/validator.ts Normal file
View File

@@ -0,0 +1,31 @@
import { StoryConfig } from "./config";
export interface ValidationResult {
is_public_domain: boolean;
message: string;
}
export function validatePublicDomain(storyConfig: StoryConfig): ValidationResult {
const publicationYear = storyConfig.metadata.publication_year;
if (!publicationYear) {
return {
is_public_domain: false,
message: "Publication year not found in metadata.",
};
}
const currentYear = new Date().getFullYear();
const cutoffYear = currentYear - 95;
if (publicationYear > cutoffYear) {
return {
is_public_domain: false,
message: `Work published in ${publicationYear} is not yet in the US public domain (cutoff: \u2264 ${cutoffYear}).`,
};
}
return {
is_public_domain: true,
message: "Work is in the public domain in the US (95-year rule).",
};
}

73
src/video.ts Normal file
View File

@@ -0,0 +1,73 @@
import { spawn } from "child_process";
import { StoryConfig } from "./config";
import * as path from "path";
const ffmpeg = require("ffmpeg-static");
import { getDuration } from "./audio";
export async function createVideo(
storyName: string,
storyConfig: StoryConfig,
imageFiles: string[],
chunkDurations: number[],
srtPath: string
): Promise<void> {
const audioPath = path.resolve("stories", storyName, "final_audio", "final.mp3");
const videoPath = path.resolve("stories", storyName, "video", "final.mp4");
const totalDuration = chunkDurations.reduce((a, b) => a + b, 0);
const inputs = imageFiles.map((file) => ["-loop", "1", "-i", file]).flat();
inputs.push("-i", audioPath);
const filterGraph = imageFiles
.map((_, i) => {
const duration = chunkDurations[i];
const zoompan = `zoompan=z='min(zoom+0.0015,1.5)':d=${
25 * duration
}:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s=1024x1024`;
return `[${i}:v]${zoompan},fade=t=out:st=${duration - 1}:d=1[v${i}]`;
})
.join(";");
const streamSpecifiers = imageFiles.map((_, i) => `[v${i}]`).join("");
const finalFilterGraph = `${filterGraph};${streamSpecifiers}concat=n=${imageFiles.length}:v=1:a=0,format=yuv420p[v];[v]subtitles=${srtPath}[v]`;
const args = [
"-y",
...inputs,
"-filter_complex",
finalFilterGraph,
"-map",
"[v]",
"-map",
`${imageFiles.length}:a`,
"-c:v",
"libx264",
"-tune",
"stillimage",
"-c:a",
"aac",
"-b:a",
"192k",
"-pix_fmt",
"yuv420p",
"-t",
totalDuration.toString(),
videoPath,
];
const ffmpegProcess = spawn(ffmpeg, args);
ffmpegProcess.stdout.on("data", (data: any) => {});
ffmpegProcess.stderr.on("data", (data: any) => {});
return new Promise<void>((resolve, reject) => {
ffmpegProcess.on("close", (code: any) => {
if (code === 0) {
resolve();
} else {
reject(new Error(`ffmpeg process exited with code ${code}`));
}
});
});
}

12
tsconfig.json Normal file
View File

@@ -0,0 +1,12 @@
{
"compilerOptions": {
"target": "es6",
"module": "commonjs",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"outDir": "./dist"
},
"include": ["src/**/*.ts"]
}