ai iteration
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -4,3 +4,5 @@ node_modules/
|
||||
.env
|
||||
|
||||
stories/sample_story/
|
||||
|
||||
dist/
|
||||
|
2687
package-lock.json
generated
2687
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -2,7 +2,6 @@
|
||||
"name": "project-noctivus",
|
||||
"version": "1.0.0",
|
||||
"description": "An orchestrator for creating audiobooks from text.",
|
||||
"type": "module",
|
||||
"main": "dist/orchestrator.js",
|
||||
"scripts": {
|
||||
"start": "ts-node src/orchestrator.ts",
|
||||
|
26
src/audio.ts
26
src/audio.ts
@@ -1,6 +1,7 @@
|
||||
import { spawn } from "child_process";
|
||||
import { StoryConfig } from "./config";
|
||||
import * as path from "path";
|
||||
import * as fs from "fs";
|
||||
const ffmpeg = require("ffmpeg-static");
|
||||
const ffprobe = require("ffprobe-static");
|
||||
|
||||
@@ -58,10 +59,13 @@ export async function generateSilence(duration: number, outputFile: string): Pro
|
||||
export async function combineAudio(storyName: string, storyConfig: StoryConfig, audioFiles: string[]): Promise<void> {
|
||||
const introFile = path.resolve("stories", storyName, storyConfig.config.intro_audio_file);
|
||||
const outroFile = path.resolve("stories", storyName, storyConfig.config.outro_audio_file);
|
||||
const tempAudioFile = path.resolve("stories", storyName, "final_audio", "temp.mp3");
|
||||
const finalAudioFile = path.resolve("stories", storyName, "final_audio", "final.mp3");
|
||||
const finalAudioDir = path.resolve("stories", storyName, "final_audio");
|
||||
const tempAudioFile = path.join(finalAudioDir, "temp.mp3");
|
||||
const finalAudioFile = path.join(finalAudioDir, "final.mp3");
|
||||
const backgroundMusicFile = path.resolve("stories", storyName, storyConfig.config.background_music_file);
|
||||
|
||||
fs.mkdirSync(finalAudioDir, { recursive: true });
|
||||
|
||||
// First, concatenate the main audio files
|
||||
const allFiles = [introFile, ...audioFiles.map((f) => path.resolve(f)), outroFile];
|
||||
const fileList = allFiles.map((f) => `file '${f.replace(/'/g, "'\\''")}'`).join("\n");
|
||||
@@ -85,18 +89,28 @@ export async function combineAudio(storyName: string, storyConfig: StoryConfig,
|
||||
// Then, get the duration of the concatenated audio
|
||||
const duration = await getDuration(tempAudioFile);
|
||||
|
||||
// Generate silence for the background track
|
||||
await generateSilence(duration, backgroundMusicFile);
|
||||
if (!fs.existsSync(backgroundMusicFile)) {
|
||||
// If background music is missing, just copy the narration
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const args = ["-y", "-i", tempAudioFile, "-c:a", "libmp3lame", "-q:a", "4", finalAudioFile];
|
||||
const p = spawn(ffmpeg, args);
|
||||
p.on("close", (code: any) => (code === 0 ? resolve() : reject(new Error(`ffmpeg copy failed ${code}`))));
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Finally, mix the main audio with the background music
|
||||
// Finally, mix the main audio with the looped/trimmed background music at a lower volume
|
||||
const bgVolume = "0.2";
|
||||
const mixArgs = [
|
||||
"-y",
|
||||
"-i",
|
||||
tempAudioFile,
|
||||
"-stream_loop",
|
||||
"-1",
|
||||
"-i",
|
||||
backgroundMusicFile,
|
||||
"-filter_complex",
|
||||
"[0:a][1:a]amerge=inputs=2[a]",
|
||||
`[1:a]volume=${bgVolume},atrim=0:${duration},asetpts=N/SR/TB[bg];[0:a][bg]amix=inputs=2:duration=first:dropout_transition=0[a]`,
|
||||
"-map",
|
||||
"[a]",
|
||||
"-c:a",
|
||||
|
@@ -1,6 +1,7 @@
|
||||
import * as yaml from "js-yaml";
|
||||
import * as fs from "fs";
|
||||
import * as path from "path";
|
||||
import { z } from "zod";
|
||||
|
||||
export interface StoryConfig {
|
||||
metadata: {
|
||||
@@ -13,18 +14,43 @@ export interface StoryConfig {
|
||||
config: {
|
||||
chunk_size: number;
|
||||
tts_voice_id: string;
|
||||
tts_instructions: string;
|
||||
image_style_prompts: string;
|
||||
tts_instructions?: string;
|
||||
image_style_prompts?: string;
|
||||
intro_audio_file: string;
|
||||
outro_audio_file: string;
|
||||
background_music_file: string;
|
||||
export_settings: {
|
||||
format: string;
|
||||
format?: string;
|
||||
resolution: string;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
const StoryConfigSchema = z.object({
|
||||
metadata: z.object({
|
||||
title: z.string().min(1),
|
||||
author: z.string().min(1),
|
||||
publication_year: z.number().int(),
|
||||
public_domain_proof_url: z.string().min(1),
|
||||
reading_level: z.string().min(1),
|
||||
}),
|
||||
config: z.object({
|
||||
chunk_size: z.number().int().positive(),
|
||||
tts_voice_id: z.string().min(1),
|
||||
tts_instructions: z.string().optional().default(""),
|
||||
image_style_prompts: z.string().optional().default(""),
|
||||
intro_audio_file: z.string().min(1),
|
||||
outro_audio_file: z.string().min(1),
|
||||
background_music_file: z.string().min(1),
|
||||
export_settings: z
|
||||
.object({
|
||||
format: z.string().optional().default("mp4"),
|
||||
resolution: z.string().regex(/^\d+x\d+$/).default("1024x1024"),
|
||||
})
|
||||
.default({ format: "mp4", resolution: "1024x1024" }),
|
||||
}),
|
||||
});
|
||||
|
||||
export function loadStoryConfig(storyName: string): StoryConfig {
|
||||
const configPath = path.join("stories", storyName, "config.yaml");
|
||||
if (!fs.existsSync(configPath)) {
|
||||
@@ -32,5 +58,7 @@ export function loadStoryConfig(storyName: string): StoryConfig {
|
||||
}
|
||||
|
||||
const fileContents = fs.readFileSync(configPath, "utf8");
|
||||
return yaml.load(fileContents) as StoryConfig;
|
||||
const loaded = yaml.load(fileContents);
|
||||
const parsed = StoryConfigSchema.parse(loaded);
|
||||
return parsed as unknown as StoryConfig;
|
||||
}
|
||||
|
@@ -7,6 +7,31 @@ const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
const allowedSizes = [
|
||||
"256x256",
|
||||
"512x512",
|
||||
"1024x1024",
|
||||
"1536x1024",
|
||||
"1024x1536",
|
||||
"1792x1024",
|
||||
"1024x1792",
|
||||
] as const;
|
||||
type AllowedSize = (typeof allowedSizes)[number];
|
||||
|
||||
function pickImageSize(resolution?: string): AllowedSize {
|
||||
// Default square for simplicity
|
||||
if (!resolution) return "1024x1024";
|
||||
const match = resolution.match(/^(\d+)x(\d+)$/);
|
||||
if (!match) return "1024x1024";
|
||||
const width = parseInt(match[1], 10);
|
||||
const height = parseInt(match[2], 10);
|
||||
if (!Number.isFinite(width) || !Number.isFinite(height)) return "1024x1024";
|
||||
if (width === height) return "1024x1024";
|
||||
const landscapeCandidates: AllowedSize[] = ["1536x1024", "1792x1024"];
|
||||
const portraitCandidates: AllowedSize[] = ["1024x1536", "1024x1792"];
|
||||
return width > height ? landscapeCandidates[0] : portraitCandidates[0];
|
||||
}
|
||||
|
||||
export async function generateImage(
|
||||
storyName: string,
|
||||
storyConfig: StoryConfig,
|
||||
@@ -15,13 +40,18 @@ export async function generateImage(
|
||||
imageIndex: number
|
||||
): Promise<string> {
|
||||
const imagePath = path.join("stories", storyName, "images", `chunk_${chunkIndex}_img${imageIndex}.png`);
|
||||
const prompt = "A cartoon cat.";
|
||||
const prompt = `${(storyConfig.config.image_style_prompts || "").trim()}
|
||||
|
||||
Illustration for the following passage:
|
||||
"${chunk.slice(0, 500)}"`;
|
||||
|
||||
const size = pickImageSize(storyConfig.config.export_settings?.resolution);
|
||||
|
||||
const response = await openai.images.generate({
|
||||
model: "dall-e-3", // Downgrading to dall-e-3 as gpt-image-1 is not available
|
||||
model: "dall-e-3",
|
||||
prompt,
|
||||
n: 1,
|
||||
size: "1024x1024",
|
||||
size,
|
||||
response_format: "b64_json",
|
||||
});
|
||||
|
||||
|
@@ -12,11 +12,46 @@ import { createVideo } from "./video";
|
||||
import { createSrt } from "./subtitles";
|
||||
import { generateYouTubeMetadata, uploadToYouTube } from "./uploader";
|
||||
import * as path from "path";
|
||||
import * as fs from "fs";
|
||||
|
||||
async function mapWithConcurrency<T>(
|
||||
items: T[],
|
||||
limit: number,
|
||||
mapper: (item: T, index: number) => Promise<void>
|
||||
): Promise<void> {
|
||||
if (items.length === 0) return;
|
||||
let nextIndex = 0;
|
||||
const inFlight: Promise<void>[] = [];
|
||||
const launchNext = () => {
|
||||
if (nextIndex >= items.length) return;
|
||||
const current = nextIndex++;
|
||||
const p = mapper(items[current], current).finally(() => {
|
||||
const idx = inFlight.indexOf(p);
|
||||
if (idx >= 0) inFlight.splice(idx, 1);
|
||||
});
|
||||
inFlight.push(p);
|
||||
};
|
||||
for (let i = 0; i < Math.min(limit, items.length); i++) {
|
||||
launchNext();
|
||||
}
|
||||
while (inFlight.length > 0 || nextIndex < items.length) {
|
||||
while (inFlight.length < limit && nextIndex < items.length) {
|
||||
launchNext();
|
||||
}
|
||||
await Promise.race(inFlight);
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const storyName = process.argv[2];
|
||||
const rawArgs = process.argv.slice(2);
|
||||
const storyName = rawArgs.find((a) => !a.startsWith("--"));
|
||||
const force = rawArgs.includes("--force");
|
||||
const skipUpload = rawArgs.includes("--skip-upload");
|
||||
const concurrencyArg = rawArgs.find((a) => a.startsWith("--concurrency="));
|
||||
const concurrency = concurrencyArg ? Math.max(1, parseInt(concurrencyArg.split("=")[1], 10) || 3) : 3;
|
||||
|
||||
if (!storyName) {
|
||||
console.error("Please provide a story name.");
|
||||
console.error("Usage: ts-node src/orchestrator.ts <storyName> [--force] [--skip-upload] [--concurrency=N]");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
@@ -31,6 +66,9 @@ async function main() {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const storyRoot = path.resolve("stories", storyName);
|
||||
["audio", "images", "final_audio", "video"].forEach((d) => fs.mkdirSync(path.join(storyRoot, d), { recursive: true }));
|
||||
|
||||
console.log("Sanitizing text...");
|
||||
const sanitizedText = sanitizeText(storyName);
|
||||
console.log("Sanitized text:");
|
||||
@@ -44,30 +82,49 @@ async function main() {
|
||||
console.log("Generating intro/outro audio...");
|
||||
const introFile = path.join("stories", storyName, storyConfig.config.intro_audio_file);
|
||||
const outroFile = path.join("stories", storyName, storyConfig.config.outro_audio_file);
|
||||
await generateSingleAudio(storyConfig, "This is the intro.", introFile);
|
||||
await generateSingleAudio(storyConfig, "This is the outro.", outroFile);
|
||||
console.log("Generated intro/outro audio successfully.");
|
||||
|
||||
console.log("Generating audio...");
|
||||
const audioFiles: string[] = [];
|
||||
const chunkDurations: number[] = [];
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
console.log(`Generating audio for chunk ${i}...`);
|
||||
const audioFile = await generateAudio(storyConfig, storyName, chunks[i], i);
|
||||
audioFiles.push(audioFile);
|
||||
const duration = await getChunkDuration(audioFile);
|
||||
chunkDurations.push(duration);
|
||||
console.log(`Generated audio file: ${audioFile}, duration: ${duration}`);
|
||||
if (!fs.existsSync(introFile) || force) {
|
||||
await generateSingleAudio(storyConfig, "This is the intro.", introFile);
|
||||
} else {
|
||||
console.log(`Skipping intro generation, exists: ${introFile}`);
|
||||
}
|
||||
|
||||
console.log("Generating images...");
|
||||
const imageFiles: string[] = [];
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
console.log(`Generating image for chunk ${i}...`);
|
||||
const imageFile = await generateImage(storyName, storyConfig, chunks[i], i, 0);
|
||||
imageFiles.push(imageFile);
|
||||
console.log(`Generated image file: ${imageFile}`);
|
||||
if (!fs.existsSync(outroFile) || force) {
|
||||
await generateSingleAudio(storyConfig, "This is the outro.", outroFile);
|
||||
} else {
|
||||
console.log(`Skipping outro generation, exists: ${outroFile}`);
|
||||
}
|
||||
console.log("Intro/outro audio ready.");
|
||||
|
||||
console.log(`Generating ${chunks.length} audio chunks with concurrency=${concurrency}...`);
|
||||
const audioFiles: string[] = new Array(chunks.length);
|
||||
const chunkDurations: number[] = new Array(chunks.length);
|
||||
await mapWithConcurrency(chunks, concurrency, async (chunk, i) => {
|
||||
const audioPath = path.join("stories", storyName, "audio", `chunk_${i}.mp3`);
|
||||
if (!fs.existsSync(audioPath) || force) {
|
||||
console.log(`Generating audio for chunk ${i}...`);
|
||||
await generateAudio(storyConfig, storyName, chunk, i);
|
||||
} else {
|
||||
console.log(`Skipping audio for chunk ${i}, exists.`);
|
||||
}
|
||||
const duration = await getChunkDuration(audioPath);
|
||||
audioFiles[i] = audioPath;
|
||||
chunkDurations[i] = duration;
|
||||
console.log(`Audio chunk ${i} ready: ${audioPath}, duration: ${duration}`);
|
||||
});
|
||||
|
||||
console.log(`Generating ${chunks.length} images with concurrency=${concurrency}...`);
|
||||
const imageFiles: string[] = new Array(chunks.length);
|
||||
await mapWithConcurrency(chunks, concurrency, async (chunk, i) => {
|
||||
const imagePath = path.join("stories", storyName, "images", `chunk_${i}_img0.png`);
|
||||
if (!fs.existsSync(imagePath) || force) {
|
||||
console.log(`Generating image for chunk ${i}...`);
|
||||
const generated = await generateImage(storyName, storyConfig, chunk, i, 0);
|
||||
imageFiles[i] = generated;
|
||||
} else {
|
||||
console.log(`Skipping image for chunk ${i}, exists.`);
|
||||
imageFiles[i] = imagePath;
|
||||
}
|
||||
console.log(`Image ${i} ready: ${imageFiles[i]}`);
|
||||
});
|
||||
|
||||
console.log("Creating subtitles...");
|
||||
const srtPath = createSrt(storyName, chunks, chunkDurations);
|
||||
@@ -81,6 +138,11 @@ async function main() {
|
||||
await createVideo(storyName, storyConfig, imageFiles, chunkDurations, srtPath);
|
||||
console.log("Created video successfully.");
|
||||
|
||||
if (skipUpload) {
|
||||
console.log("Skipping upload step (--skip-upload).");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log("Generating YouTube metadata...");
|
||||
const metadata = generateYouTubeMetadata(storyConfig);
|
||||
console.log("YouTube metadata:");
|
||||
|
@@ -1,11 +1,14 @@
|
||||
import * as fs from "fs";
|
||||
import * as path from "path";
|
||||
|
||||
function toSrtTime(seconds: number): string {
|
||||
const date = new Date(0);
|
||||
date.setSeconds(seconds);
|
||||
const timeString = date.toISOString().substr(11, 12);
|
||||
return timeString.replace(".", ",");
|
||||
function toSrtTime(secondsFloat: number): string {
|
||||
const totalMs = Math.max(0, Math.round(secondsFloat * 1000));
|
||||
const hours = Math.floor(totalMs / 3600000);
|
||||
const minutes = Math.floor((totalMs % 3600000) / 60000);
|
||||
const seconds = Math.floor((totalMs % 60000) / 1000);
|
||||
const ms = totalMs % 1000;
|
||||
const pad = (n: number, w: number) => n.toString().padStart(w, "0");
|
||||
return `${pad(hours, 2)}:${pad(minutes, 2)}:${pad(seconds, 2)},${pad(ms, 3)}`;
|
||||
}
|
||||
|
||||
export function createSrt(storyName: string, chunks: string[], chunkDurations: number[]): string {
|
||||
|
18
src/video.ts
18
src/video.ts
@@ -2,7 +2,14 @@ import { spawn } from "child_process";
|
||||
import { StoryConfig } from "./config";
|
||||
import * as path from "path";
|
||||
const ffmpeg = require("ffmpeg-static");
|
||||
import { getDuration } from "./audio";
|
||||
|
||||
function escapeForFilter(filePath: string): string {
|
||||
return filePath
|
||||
.replace(/\\/g, "\\\\")
|
||||
.replace(/:/g, "\\:")
|
||||
.replace(/,/g, "\\,")
|
||||
.replace(/'/g, "\\'");
|
||||
}
|
||||
|
||||
export async function createVideo(
|
||||
storyName: string,
|
||||
@@ -14,6 +21,7 @@ export async function createVideo(
|
||||
const audioPath = path.resolve("stories", storyName, "final_audio", "final.mp3");
|
||||
const videoPath = path.resolve("stories", storyName, "video", "final.mp4");
|
||||
const totalDuration = chunkDurations.reduce((a, b) => a + b, 0);
|
||||
const resolution = storyConfig.config.export_settings?.resolution || "1024x1024";
|
||||
|
||||
const inputs = imageFiles.map((file) => ["-loop", "1", "-i", file]).flat();
|
||||
inputs.push("-i", audioPath);
|
||||
@@ -21,15 +29,15 @@ export async function createVideo(
|
||||
const filterGraph = imageFiles
|
||||
.map((_, i) => {
|
||||
const duration = chunkDurations[i];
|
||||
const zoompan = `zoompan=z='min(zoom+0.0015,1.5)':d=${
|
||||
25 * duration
|
||||
}:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s=1024x1024`;
|
||||
const zoompan = `zoompan=z='min(zoom+0.0015,1.5)':d=${25 * duration}:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s=${resolution}`;
|
||||
return `[${i}:v]${zoompan},fade=t=out:st=${duration - 1}:d=1[v${i}]`;
|
||||
})
|
||||
.join(";");
|
||||
|
||||
const streamSpecifiers = imageFiles.map((_, i) => `[v${i}]`).join("");
|
||||
const finalFilterGraph = `${filterGraph};${streamSpecifiers}concat=n=${imageFiles.length}:v=1:a=0,format=yuv420p[v];[v]subtitles=${srtPath}[v]`;
|
||||
const escapedSrt = escapeForFilter(srtPath);
|
||||
const concatGraph = `${filterGraph};${streamSpecifiers}concat=n=${imageFiles.length}:v=1:a=0,format=yuv420p[v0]`;
|
||||
const finalFilterGraph = `${concatGraph};[v0]subtitles='${escapedSrt}'[v]`;
|
||||
|
||||
const args = [
|
||||
"-y",
|
||||
|
Reference in New Issue
Block a user