migrate to root dir

This commit is contained in:
2025-08-08 19:26:21 +02:00
parent cf8219691b
commit 8720500442
41 changed files with 2478 additions and 4440 deletions

125
lib/pipeline/audio.ts Normal file
View File

@@ -0,0 +1,125 @@
import { spawn } from "child_process";
import { StoryConfig } from "./config";
import * as path from "path";
import * as fs from "fs";
import ffmpeg from "ffmpeg-static";
import ffprobe from "ffprobe-static";
export function getDuration(file: string): Promise<number> {
const ffprobePath: string = (ffprobe as unknown as { path: string }).path;
const args = ["-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", file];
const ffprobeProcess = spawn(ffprobePath, args);
let duration = "";
ffprobeProcess.stdout.on("data", (data: Buffer) => {
duration += data.toString();
});
return new Promise<number>((resolve, reject) => {
ffprobeProcess.on("close", (code: number | null) => {
if (code === 0) {
resolve(parseFloat(duration));
} else {
reject(new Error(`ffprobe process exited with code ${code}`));
}
});
});
}
export async function getChunkDuration(chunkPath: string): Promise<number> {
return getDuration(chunkPath);
}
export async function generateSilence(duration: number, outputFile: string): Promise<void> {
const ffmpegPath: string = (ffmpeg as unknown as string) || "ffmpeg";
const args = ["-y", "-f", "lavfi", "-i", `anullsrc=r=44100:cl=stereo:d=${duration}`, outputFile];
const ffmpegProcess = spawn(ffmpegPath, args);
return new Promise<void>((resolve, reject) => {
ffmpegProcess.on("close", (code: number | null) => {
if (code === 0) {
resolve();
} else {
reject(new Error(`ffmpeg process exited with code ${code}`));
}
});
});
}
export async function combineAudio(storyName: string, storyConfig: StoryConfig, audioFiles: string[]): Promise<void> {
const ffmpegPath: string = (ffmpeg as unknown as string) || "ffmpeg";
const introFile = path.resolve("stories", storyName, storyConfig.config.intro_audio_file);
const outroFile = path.resolve("stories", storyName, storyConfig.config.outro_audio_file);
const finalAudioDir = path.resolve("stories", storyName, "final_audio");
const tempAudioFile = path.join(finalAudioDir, "temp.mp3");
const finalAudioFile = path.join(finalAudioDir, "final.mp3");
const backgroundMusicFile = path.resolve("stories", storyName, storyConfig.config.background_music_file);
fs.mkdirSync(finalAudioDir, { recursive: true });
const allFiles = [introFile, ...audioFiles.map((f) => path.resolve(f)), outroFile];
const fileList = allFiles.map((f) => `file '${f.replace(/'/g, "'\\''")}'`).join("\n");
const listFile = path.resolve("stories", storyName, "filelist.txt");
fs.writeFileSync(listFile, fileList);
const concatArgs = ["-y", "-f", "concat", "-safe", "0", "-i", listFile, "-c", "copy", tempAudioFile];
const concatProcess = spawn(ffmpegPath, concatArgs);
await new Promise<void>((resolve, reject) => {
concatProcess.on("close", (code: number | null) => {
if (code === 0) {
resolve();
} else {
reject(new Error(`ffmpeg concat process exited with code ${code}`));
}
});
});
const duration = await getDuration(tempAudioFile);
if (!fs.existsSync(backgroundMusicFile)) {
await new Promise<void>((resolve, reject) => {
const args = ["-y", "-i", tempAudioFile, "-c:a", "libmp3lame", "-q:a", "4", finalAudioFile];
const p = spawn(ffmpegPath, args);
p.on("close", (code: number | null) =>
code === 0 ? resolve() : reject(new Error(`ffmpeg copy failed ${code}`))
);
});
return;
}
const bgVolume = "0.2";
const mixArgs = [
"-y",
"-i",
tempAudioFile,
"-stream_loop",
"-1",
"-i",
backgroundMusicFile,
"-filter_complex",
`[1:a]volume=${bgVolume},atrim=0:${duration},asetpts=N/SR/TB[bg];[0:a][bg]amix=inputs=2:duration=first:dropout_transition=0[a]`,
"-map",
"[a]",
"-c:a",
"libmp3lame",
"-q:a",
"4",
finalAudioFile,
];
const mixProcess = spawn(ffmpegPath, mixArgs);
return new Promise<void>((resolve, reject) => {
mixProcess.on("close", (code: number | null) => {
if (code === 0) {
resolve();
} else {
reject(new Error(`ffmpeg mix process exited with code ${code}`));
}
});
});
}

11
lib/pipeline/chunker.ts Normal file
View File

@@ -0,0 +1,11 @@
export function chunkText(text: string, chunkSize: number): string[] {
const words = text.split(/\s+/);
const chunks: string[] = [];
for (let i = 0; i < words.length; i += chunkSize) {
const chunk = words.slice(i, i + chunkSize).join(" ");
chunks.push(chunk);
}
return chunks;
}

67
lib/pipeline/config.ts Normal file
View File

@@ -0,0 +1,67 @@
import * as yaml from "js-yaml";
import * as fs from "fs";
import * as path from "path";
import { z } from "zod";
export interface StoryConfig {
metadata: {
title: string;
author: string;
publication_year: number;
public_domain_proof_url: string;
reading_level: string;
};
config: {
chunk_size: number;
tts_voice_id: string;
tts_instructions?: string;
image_style_prompts?: string;
intro_audio_file: string;
outro_audio_file: string;
background_music_file: string;
export_settings: {
format?: string;
resolution: string;
};
};
}
const StoryConfigSchema = z.object({
metadata: z.object({
title: z.string().min(1),
author: z.string().min(1),
publication_year: z.number().int(),
public_domain_proof_url: z.string().min(1),
reading_level: z.string().min(1),
}),
config: z.object({
chunk_size: z.number().int().positive(),
tts_voice_id: z.string().min(1),
tts_instructions: z.string().optional().default(""),
image_style_prompts: z.string().optional().default(""),
intro_audio_file: z.string().min(1),
outro_audio_file: z.string().min(1),
background_music_file: z.string().min(1),
export_settings: z
.object({
format: z.string().optional().default("mp4"),
resolution: z
.string()
.regex(/^\d+x\d+$/)
.default("1024x1024"),
})
.default({ format: "mp4", resolution: "1024x1024" }),
}),
});
export function loadStoryConfig(storyName: string): StoryConfig {
const configPath = path.join("stories", storyName, "config.yaml");
if (!fs.existsSync(configPath)) {
throw new Error(`Configuration file not found for story: ${storyName}`);
}
const fileContents = fs.readFileSync(configPath, "utf8");
const loaded = yaml.load(fileContents);
const parsed = StoryConfigSchema.parse(loaded);
return parsed as unknown as StoryConfig;
}

70
lib/pipeline/images.ts Normal file
View File

@@ -0,0 +1,70 @@
import OpenAI from "openai";
import * as fs from "fs";
import * as path from "path";
import { StoryConfig } from "./config";
let openaiClient: OpenAI | null = null;
function getOpenAI(): OpenAI {
if (!openaiClient) {
openaiClient = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
}
return openaiClient;
}
const allowedSizesValues = [
"256x256",
"512x512",
"1024x1024",
"1536x1024",
"1024x1536",
"1792x1024",
"1024x1792",
] as const;
type AllowedSize = (typeof allowedSizesValues)[number];
function pickImageSize(resolution?: string): AllowedSize {
if (!resolution) return "1024x1024";
const match = resolution.match(/^(\d+)x(\d+)$/);
if (!match) return "1024x1024";
const width = parseInt(match[1], 10);
const height = parseInt(match[2], 10);
if (!Number.isFinite(width) || !Number.isFinite(height)) return "1024x1024";
if (width === height) return "1024x1024";
const landscapeCandidates: AllowedSize[] = ["1536x1024", "1792x1024"];
const portraitCandidates: AllowedSize[] = ["1024x1536", "1024x1792"];
return width > height ? landscapeCandidates[0] : portraitCandidates[0];
}
export async function generateImage(
storyName: string,
storyConfig: StoryConfig,
chunk: string,
chunkIndex: number,
imageIndex: number
): Promise<string> {
const imagePath = path.join("stories", storyName, "images", `chunk_${chunkIndex}_img${imageIndex}.png`);
const prompt = `${(storyConfig.config.image_style_prompts || "").trim()}
Illustration for the following passage:
"${chunk.slice(0, 500)}"`;
const size = pickImageSize(storyConfig.config.export_settings?.resolution);
const response = await getOpenAI().images.generate({
model: "dall-e-3",
prompt,
n: 1,
size,
response_format: "b64_json",
});
if (!response.data?.[0].b64_json) {
throw new Error("Image data not found in response");
}
const imageBase64 = response.data[0].b64_json;
const imageBuffer = Buffer.from(imageBase64, "base64");
fs.writeFileSync(imagePath, imageBuffer);
return imagePath;
}

151
lib/pipeline/pipeline.ts Normal file
View File

@@ -0,0 +1,151 @@
import { loadStoryConfig } from "./config";
import { validatePublicDomain } from "./validator";
import { sanitizeText } from "./sanitizer";
import { chunkText } from "./chunker";
import { generateAudio, generateSingleAudio } from "./tts";
import { combineAudio, getChunkDuration } from "./audio";
import { generateImage } from "./images";
import { createVideo } from "./video";
import { createSrt } from "./subtitles";
import { generateYouTubeMetadata, uploadToYouTube, YouTubeMetadata } from "./uploader";
import * as path from "path";
import * as fs from "fs";
export interface RunPipelineOptions {
force?: boolean;
skipUpload?: boolean;
concurrency?: number;
baseDir?: string;
}
export interface RunPipelineResult {
storyName: string;
audioFiles: string[];
imageFiles: string[];
srtPath: string;
finalAudioPath: string;
videoPath: string;
metadata: YouTubeMetadata;
}
async function mapWithConcurrency<T>(
items: T[],
limit: number,
mapper: (item: T, index: number) => Promise<void>
): Promise<void> {
if (items.length === 0) return;
let nextIndex = 0;
const inFlight: Promise<void>[] = [];
const launchNext = () => {
if (nextIndex >= items.length) return;
const current = nextIndex++;
const p = mapper(items[current], current).finally(() => {
const idx = inFlight.indexOf(p);
if (idx >= 0) inFlight.splice(idx, 1);
});
inFlight.push(p);
};
for (let i = 0; i < Math.min(limit, items.length); i++) {
launchNext();
}
while (inFlight.length > 0 || nextIndex < items.length) {
while (inFlight.length < limit && nextIndex < items.length) {
launchNext();
}
await Promise.race(inFlight);
}
}
export async function runStoryPipeline(
storyName: string,
options: RunPipelineOptions = {}
): Promise<RunPipelineResult> {
const force = !!options.force;
const skipUpload = !!options.skipUpload;
const concurrency = Math.max(1, options.concurrency ?? 3);
const originalCwd = process.cwd();
const targetCwd = options.baseDir || originalCwd;
if (targetCwd && targetCwd !== originalCwd) {
process.chdir(targetCwd);
}
try {
const storyConfig = loadStoryConfig(storyName);
const validationResult = validatePublicDomain(storyConfig);
if (!validationResult.is_public_domain) {
throw new Error(validationResult.message);
}
const storyRoot = path.resolve("stories", storyName);
["audio", "images", "final_audio", "video"].forEach((d) =>
fs.mkdirSync(path.join(storyRoot, d), { recursive: true })
);
const sanitizedText = sanitizeText(storyName);
const chunks = chunkText(sanitizedText, storyConfig.config.chunk_size);
const introFile = path.join("stories", storyName, storyConfig.config.intro_audio_file);
const outroFile = path.join("stories", storyName, storyConfig.config.outro_audio_file);
if (!fs.existsSync(introFile) || force) {
await generateSingleAudio(storyConfig, "This is the intro.", introFile);
}
if (!fs.existsSync(outroFile) || force) {
await generateSingleAudio(storyConfig, "This is the outro.", outroFile);
}
const audioFiles: string[] = new Array(chunks.length);
const chunkDurations: number[] = new Array(chunks.length);
await mapWithConcurrency(chunks, concurrency, async (chunk, i) => {
const audioPath = path.join("stories", storyName, "audio", `chunk_${i}.mp3`);
if (!fs.existsSync(audioPath) || force) {
await generateAudio(storyConfig, storyName, chunk, i);
}
const duration = await getChunkDuration(audioPath);
audioFiles[i] = audioPath;
chunkDurations[i] = duration;
});
const imageFiles: string[] = new Array(chunks.length);
await mapWithConcurrency(chunks, concurrency, async (chunk, i) => {
const imagePath = path.join("stories", storyName, "images", `chunk_${i}_img0.png`);
if (!fs.existsSync(imagePath) || force) {
const generated = await generateImage(storyName, storyConfig, chunk, i, 0);
imageFiles[i] = generated;
} else {
imageFiles[i] = imagePath;
}
});
const srtPath = createSrt(storyName, chunks, chunkDurations);
await combineAudio(storyName, storyConfig, audioFiles);
const finalAudioPath = path.resolve("stories", storyName, "final_audio", "final.mp3");
await createVideo(storyName, storyConfig, imageFiles, chunkDurations, srtPath);
const videoPath = path.resolve("stories", storyName, "video", "final.mp4");
const metadata = generateYouTubeMetadata(storyConfig);
if (!skipUpload) {
await uploadToYouTube(videoPath, metadata);
}
return {
storyName,
audioFiles,
imageFiles,
srtPath,
finalAudioPath,
videoPath,
metadata,
};
} finally {
if (targetCwd !== originalCwd) {
process.chdir(originalCwd);
}
}
}

20
lib/pipeline/sanitizer.ts Normal file
View File

@@ -0,0 +1,20 @@
import * as fs from "fs";
import * as path from "path";
const FORBIDDEN_WORDS = ["darn", "heck", "gosh"];
export function sanitizeText(storyName: string): string {
const sourcePath = path.join("stories", storyName, "source.txt");
if (!fs.existsSync(sourcePath)) {
throw new Error(`Source text not found for story: ${storyName}`);
}
let text = fs.readFileSync(sourcePath, "utf8");
FORBIDDEN_WORDS.forEach((word) => {
const regex = new RegExp(`\\b${word}\\b`, "gi");
text = text.replace(regex, "***");
});
return text;
}

34
lib/pipeline/subtitles.ts Normal file
View File

@@ -0,0 +1,34 @@
import * as fs from "fs";
import * as path from "path";
function toSrtTime(secondsFloat: number): string {
const totalMs = Math.max(0, Math.round(secondsFloat * 1000));
const hours = Math.floor(totalMs / 3600000);
const minutes = Math.floor((totalMs % 3600000) / 60000);
const seconds = Math.floor((totalMs % 60000) / 1000);
const ms = totalMs % 1000;
const pad = (n: number, w: number) => n.toString().padStart(w, "0");
return `${pad(hours, 2)}:${pad(minutes, 2)}:${pad(seconds, 2)},${pad(ms, 3)}`;
}
export function createSrt(storyName: string, chunks: string[], chunkDurations: number[]): string {
const srtPath = path.resolve("stories", storyName, "subtitles.srt");
let srtContent = "";
let currentTime = 0;
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
const duration = chunkDurations[i];
const startTime = toSrtTime(currentTime);
const endTime = toSrtTime(currentTime + duration);
srtContent += `${i + 1}\n`;
srtContent += `${startTime} --> ${endTime}\n`;
srtContent += `${chunk}\n\n`;
currentTime += duration;
}
fs.writeFileSync(srtPath, srtContent);
return srtPath;
}

47
lib/pipeline/tts.ts Normal file
View File

@@ -0,0 +1,47 @@
import OpenAI from "openai";
import * as fs from "fs";
import * as path from "path";
import { StoryConfig } from "./config";
let openaiClient: OpenAI | null = null;
function getOpenAI(): OpenAI {
if (!openaiClient) {
openaiClient = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
}
return openaiClient;
}
export async function generateAudio(
storyConfig: StoryConfig,
storyName: string,
chunk: string,
index: number
): Promise<string> {
const speechFile = path.join("stories", storyName, "audio", `chunk_${index}.mp3`);
const mp3 = await getOpenAI().audio.speech.create({
model: "gpt-4o-mini-tts",
voice: storyConfig.config.tts_voice_id as unknown as string,
input: chunk,
instructions: storyConfig.config.tts_instructions,
});
const buffer = Buffer.from(await mp3.arrayBuffer());
await fs.promises.writeFile(speechFile, buffer);
return speechFile;
}
export async function generateSingleAudio(storyConfig: StoryConfig, text: string, outputFile: string): Promise<string> {
const mp3 = await getOpenAI().audio.speech.create({
model: "gpt-4o-mini-tts",
voice: storyConfig.config.tts_voice_id as unknown as string,
input: text,
instructions: storyConfig.config.tts_instructions,
});
const buffer = Buffer.from(await mp3.arrayBuffer());
await fs.promises.writeFile(outputFile, buffer);
return outputFile;
}

36
lib/pipeline/uploader.ts Normal file
View File

@@ -0,0 +1,36 @@
import { StoryConfig } from "./config";
export interface YouTubeMetadata {
title: string;
description: string;
tags: string[];
}
export function generateYouTubeMetadata(storyConfig: StoryConfig): YouTubeMetadata {
const title = `Bedtime Story • ${storyConfig.metadata.title} (Clean Kids Version) | Read-Along Text + AI Voice`;
const description = `A kid-friendly, clean version of the classic story, "${storyConfig.metadata.title}" by ${storyConfig.metadata.author}. This read-aloud audiobook features an AI-generated voice and illustrations, with on-screen text to help children read along.
This content is made for kids and is COPPA-compliant.
Public domain proof: ${storyConfig.metadata.public_domain_proof_url}`;
const tags = [
"bedtime story",
"read along",
"audiobook",
"kids story",
storyConfig.metadata.title,
storyConfig.metadata.author,
"clean version",
"AI voice",
"AI generated",
];
return { title, description, tags };
}
export async function uploadToYouTube(videoPath: string, metadata: YouTubeMetadata): Promise<void> {
console.log("Uploading to YouTube...");
console.log("Video path:", videoPath);
console.log("Metadata:", metadata);
console.log("This is a placeholder. YouTube upload is not yet implemented.");
}

31
lib/pipeline/validator.ts Normal file
View File

@@ -0,0 +1,31 @@
import { StoryConfig } from "./config";
export interface ValidationResult {
is_public_domain: boolean;
message: string;
}
export function validatePublicDomain(storyConfig: StoryConfig): ValidationResult {
const publicationYear = storyConfig.metadata.publication_year;
if (!publicationYear) {
return {
is_public_domain: false,
message: "Publication year not found in metadata.",
};
}
const currentYear = new Date().getFullYear();
const cutoffYear = currentYear - 95;
if (publicationYear > cutoffYear) {
return {
is_public_domain: false,
message: `Work published in ${publicationYear} is not yet in the US public domain (cutoff: ≤ ${cutoffYear}).`,
};
}
return {
is_public_domain: true,
message: "Work is in the public domain in the US (95-year rule).",
};
}

74
lib/pipeline/video.ts Normal file
View File

@@ -0,0 +1,74 @@
import { spawn } from "child_process";
import { StoryConfig } from "./config";
import * as path from "path";
import ffmpeg from "ffmpeg-static";
function escapeForFilter(filePath: string): string {
return filePath.replace(/\\/g, "\\\\").replace(/:/g, "\\:").replace(/,/g, "\\,").replace(/'/g, "\\'");
}
export async function createVideo(
storyName: string,
storyConfig: StoryConfig,
imageFiles: string[],
chunkDurations: number[],
srtPath: string
): Promise<void> {
const ffmpegPath: string = (ffmpeg as unknown as string) || "ffmpeg";
const audioPath = path.resolve("stories", storyName, "final_audio", "final.mp3");
const videoPath = path.resolve("stories", storyName, "video", "final.mp4");
const totalDuration = chunkDurations.reduce((a, b) => a + b, 0);
const resolution = storyConfig.config.export_settings?.resolution || "1024x1024";
const inputs = imageFiles.map((file) => ["-loop", "1", "-i", file]).flat();
inputs.push("-i", audioPath);
const filterGraph = imageFiles
.map((_, i) => {
const duration = chunkDurations[i];
const zoompan = `zoompan=z='min(zoom+0.0015,1.5)':d=${25 * duration}:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s=${resolution}`;
return `[${i}:v]${zoompan},fade=t=out:st=${duration - 1}:d=1[v${i}]`;
})
.join(";");
const streamSpecifiers = imageFiles.map((_, i) => `[v${i}]`).join("");
const escapedSrt = escapeForFilter(srtPath);
const concatGraph = `${filterGraph};${streamSpecifiers}concat=n=${imageFiles.length}:v=1:a=0,format=yuv420p[v0]`;
const finalFilterGraph = `${concatGraph};[v0]subtitles='${escapedSrt}'[v]`;
const args = [
"-y",
...inputs,
"-filter_complex",
finalFilterGraph,
"-map",
"[v]",
"-map",
`${imageFiles.length}:a`,
"-c:v",
"libx264",
"-tune",
"stillimage",
"-c:a",
"aac",
"-b:a",
"192k",
"-pix_fmt",
"yuv420p",
"-t",
totalDuration.toString(),
videoPath,
];
const ffmpegProcess = spawn(ffmpegPath, args);
return new Promise<void>((resolve, reject) => {
ffmpegProcess.on("close", (code: number | null) => {
if (code === 0) {
resolve();
} else {
reject(new Error(`ffmpeg process exited with code ${code}`));
}
});
});
}

6
lib/utils.ts Normal file
View File

@@ -0,0 +1,6 @@
import { clsx, type ClassValue } from "clsx"
import { twMerge } from "tailwind-merge"
export function cn(...inputs: ClassValue[]) {
return twMerge(clsx(inputs))
}