From 049ee0235e7f1450484d0a009602743e8fda8091 Mon Sep 17 00:00:00 2001 From: Mikael Finstad Date: Thu, 29 Oct 2020 00:11:27 +0100 Subject: [PATCH] Audio improvements #62 #30 - Implement audio normalization #30 - Implement ducking - Implement arbitrary audio tracks with offset #86 #62 #10 --- README.md | 102 ++++++++++---- audio.js | 239 +++++++++++++++++++++----------- examples/audio-transition.json5 | 24 +++- examples/audio2.json5 | 26 ++-- examples/audio3.json5 | 15 ++ examples/audioLoop.json5 | 1 + index.js | 34 ++--- parseConfig.js | 89 +++++++++++- 8 files changed, 380 insertions(+), 150 deletions(-) create mode 100644 examples/audio3.json5 diff --git a/README.md b/README.md index 04c7b2a..c94826c 100644 --- a/README.md +++ b/README.md @@ -23,11 +23,12 @@ Inspired by [ffmpeg-concat](https://github.com/transitive-bullshit/ffmpeg-concat - Accepts custom HTML5 Canvas / Fabric.js JavaScript code for custom screens or dynamic overlays - Render custom GL shaders (for example from [shadertoy](https://www.shadertoy.com/)) - Can output GIF -- Preserve audio sources or mix multiple - Overlay transparent images or even videos with alpha channel - Show different sub-clips for parts of a clips duration (B-roll) -- Automatic audio crossfading - Picture-in-picture +- Preserve/mix multiple audio sources +- Automatic audio crossfading +- Automatic audio ducking and normalization ## Use cases @@ -35,20 +36,19 @@ Inspired by [ffmpeg-concat](https://github.com/transitive-bullshit/ffmpeg-concat - Create a fast-paced trailer or promo video - Create a tutorial video with help text - Create news stories -- Simply convert a video to a GIF +- Create an animated GIF - Resize video to any size or framerate and with automatic letterboxing/cropping (e.g. if you need to upload a video somewhere but the site complains `Video must be 1337x1000 30fps`) +- Create a podcast with multiple mixed tracks See [examples](https://github.com/mifi/editly/tree/master/examples) ## Requirements -- [Node.js installed](https://nodejs.org/en/) (Use of the latest stable version is recommended) -- Should work on Windows, MacOS and Linux. Needs at least Node.js v12.16.2 on MacOS ([see issue](https://github.com/sindresorhus/meow/issues/144)). - See also: https://github.com/stackgl/headless-gl#system-dependencies +- Windows, MacOS or Linux +- [Node.js installed](https://nodejs.org/en/) (Use of the latest LTS version is recommended, [v12.16.2 or newer on MacOS](https://github.com/sindresorhus/meow/issues/144).) +- `ffmpeg` (and `ffprobe`) [installed](http://ffmpeg.org/) and available in `PATH` - (Linux) may require some extra steps. See [headless-gl](https://github.com/stackgl/headless-gl#system-dependencies). -Make sure you have `ffmpeg` and `ffprobe` installed and available in `PATH` - ## Installing `npm i -g editly` @@ -72,21 +72,18 @@ editly \ --audio-file-path /path/to/music.mp3 ``` -Or create an MP4 (or GIF) from a JSON or JSON5 edit spec *(JSON5 is just a more friendly JSON format)*: +Or create an MP4 (or GIF) from a JSON or JSON5 edit spec *(JSON5 is just a more user friendly JSON format)*: ```sh -editly my-editly.json5 --fast --out output.gif +editly my-spec.json5 --fast --keep-source-audio --out output.gif ``` -For examples of how to make a JSON edit spec, see below or https://github.com/mifi/editly/tree/master/examples - -When you run with `--fast` or `fast: true`, it will render a much quicker low-resolution preview ⏩ - -Without `--fast` it will default to using the **width**, **height** and **frame rate** from the **first** input video. **All other clips will be converted to these dimensions.** You can of course override any or all of these parameters. +For examples of how to make a JSON edit spec, see below or [examples](https://github.com/mifi/editly/tree/master/examples). -**TIP:** Use this tool in conjunction with [LosslessCut](https://github.com/mifi/lossless-cut) +Without `--fast`, it will default to using the **width**, **height** and **frame rate** from the **first** input video. **All other clips will be converted to these dimensions.** You can of course override any or all of these parameters. -**TIP:** If you need catchy music for your video, have a look at [this YouTube](https://www.youtube.com/channel/UCht8qITGkBvXKsR1Byln-wA) or the [YouTube audio library](https://www.youtube.com/audiolibrary/music?nv=1). Then use [youtube-dl](https://github.com/ytdl-org/youtube-dl) to download the video, and then point `--audio-file-path` at the video file. *Be sure to respect their license!* +- **TIP:** Use this tool in conjunction with [LosslessCut](https://github.com/mifi/lossless-cut) +- **TIP:** If you need catchy music for your video, have a look at [this YouTube](https://www.youtube.com/channel/UCht8qITGkBvXKsR1Byln-wA) or the [YouTube audio library](https://www.youtube.com/audiolibrary/music?nv=1). Then use [youtube-dl](https://github.com/ytdl-org/youtube-dl) to download the video, and then point `--audio-file-path` at the video file. *Be sure to respect their license!* ## JavaScript library @@ -108,6 +105,7 @@ Edit specs are JavaScript / JSON objects describing the whole edit operation wit width, height, fps, + allowRemoteRequests: false, defaults: { duration: 4, transition: { @@ -127,10 +125,6 @@ Edit specs are JavaScript / JSON objects describing the whole edit operation wit // ...more per-layer-type defaults }, }, - audioFilePath, - loopAudio: false, - keepSourceAudio: false, - allowRemoteRequests: false, clips: [ { transition, @@ -145,6 +139,25 @@ Edit specs are JavaScript / JSON objects describing the whole edit operation wit } // ...more clips ], + audioFilePath, + loopAudio: false, + keepSourceAudio: false, + clipsAudioVolume: 1, + audio: [ + { + path, + mixVolume: 1, + cutFrom: 0, + cutTo, + start: 0, + }, + // ...more audio tracks + ], + audioNorm: { + enable: false, + gaussSize: 5, + maxGain: 30, + } // Testing options: enableFfmpegLog: false, @@ -161,11 +174,8 @@ Edit specs are JavaScript / JSON objects describing the whole edit operation wit | `width` | `--width` | Width which all media will be converted to | `640` | | | `height` | `--height` | Height which all media will be converted to | auto based on `width` and aspect ratio of **first video** | | | `fps` | `--fps` | FPS which all videos will be converted to | First video FPS or `25` | | -| `audioFilePath` | `--audio-file-path` | Set an audio track for the whole video | | | -| `loopAudio` | `--loop-audio` | Loop the audio track if it is shorter than video? | `false` | | -| `keepSourceAudio` | `--keep-source-audio` | Keep audio from source files | `false` | | | `allowRemoteRequests` | `--allow-remote-requests` | Allow remote URLs as paths | `false` | | -| `fast` | `--fast`, `-f` | Fast mode (low resolution and FPS, useful for getting a quick preview) | `false` | | +| `fast` | `--fast`, `-f` | Fast mode (low resolution and FPS, useful for getting a quick preview ⏩) | `false` | | | `defaults.layer.fontPath` | `--font-path` | Set default font to a .ttf | System font | | | `defaults.layer.*` | | Set any layer parameter that all layers will inherit | | | | `defaults.duration` | `--clip-duration` | Set default clip duration for clips that don't have an own duration | `4` | sec | @@ -181,6 +191,14 @@ Edit specs are JavaScript / JSON objects describing the whole edit operation wit | `clips[].layers[].type` | | Layer type, see below | | | | `clips[].layers[].visibleFrom` | | What time into the clip should this layer start | | sec | | `clips[].layers[].visibleUntil` | | What time into the clip should this layer stop | | sec | +| `audioTracks[]` | | List of arbitrary audio tracks. See [audio tracks](#arbitrary-audio-tracks). | `[]` | | +| `audioFilePath` | `--audio-file-path` | Set an audio track for the whole video. See also [audio tracks](#arbitrary-audio-tracks) | | | +| `loopAudio` | `--loop-audio` | Loop the audio track if it is shorter than video? | `false` | | +| `keepSourceAudio` | `--keep-source-audio` | Keep source audio from `clips`? | `false` | | +| `clipsAudioVolume` | | Volume of audio from `clips` relative to `audioTracks`. See [audio tracks](#arbitrary-audio-tracks). | `1` | | +| `audioNorm.enable` | | Enable audio normalization? See [audio normalization](#audio-normalization). | `false` | | +| `audioNorm.gaussSize` | | Audio normalization gauss size. See [audio normalization](#audio-normalization). | `5` | | +| `audioNorm.maxGain` | | Audio normalization max gain. See [audio normalization](#audio-normalization). | `30` | | ### Transition types @@ -220,6 +238,14 @@ Audio layers will be mixed together. If `cutFrom`/`cutTo` is set, the resulting | `cutTo` | Time value to cut to | `clip.duration` | sec | | `mixVolume` | Relative volume when mixing this audio track with others | `1` | | +#### Layer type 'detached-audio' + +This is a special case of `audioTracks` that makes it easier to start the audio relative to `clips` start times without having to calculate global start times. + +`detached-audio` has the exact same properties as [audioTracks](#arbitrary-audio-tracks), except `start` time is relative to the clip's start. + +[Example of detached audio tracks](https://github.com/mifi/editly/blob/master/examples/audio3.json5) + #### Layer type 'image' Full screen image @@ -313,6 +339,32 @@ Loads a GLSL shader. See [gl.json5](https://github.com/mifi/editly/blob/master/e - `fragmentPath` - `vertexPath` (optional) +#### Arbitrary audio tracks + +`audioTracks` property can optionally contain a list of objects which specify audio tracks that can be started at arbitrary times in the final video. These tracks will be mixed (`mixVolume` specifying a relative number for how loud each track is compared to the other tracks). `clipsAudioVolume` specifies the volume of **all** the audio from `clips` relative to the volume of **all** the `audioTracks`. + +| Parameter | Description | Default | | +|-|-|-|-| +| `audioTracks[].path` | File path for this track | | | +| `audioTracks[].mixVolume` | Relative volume for this track | `1` | | +| `audioTracks[].cutFrom` | Time value to cut source file **from** | `0` | sec | +| `audioTracks[].cutTo` | Time value to cut source file **to** | | sec | +| `audioTracks[].start` | How many seconds into video to start this audio track | `0` | sec | + +The difference between `audioTracks` and **Layer type 'audio'** is that `audioTracks` will continue to play across multiple `clips` and can start and stop whenever needed. + +See `audioTracks` [example](https://github.com/mifi/editly/blob/master/examples/audio2.json5) + +See also **Layer type 'detached-audio'**. + +#### Audio normalization + +You can enable audio normalization of the final output audio. This is useful if you want to achieve Audio Ducking (e.g. automatically lower volume of all other tracks when voice-over speaks). + +`audioNorm` parameters are [documented here.](https://ffmpeg.org/ffmpeg-filters.html#dynaudnorm) + +[Example of audio ducking](https://github.com/mifi/editly/blob/master/examples/audio2.json5) + ### Resize modes `resizeMode` - How to fit image to screen. Can be one of: diff --git a/audio.js b/audio.js index ffd859d..5133729 100644 --- a/audio.js +++ b/audio.js @@ -2,44 +2,44 @@ const pMap = require('p-map'); const { join, basename, resolve } = require('path'); const execa = require('execa'); const flatMap = require('lodash/flatMap'); -const fs = require('fs-extra'); const { getFfmpegCommonArgs, getCutFromArgs } = require('./ffmpeg'); const { readFileStreams } = require('./util'); -module.exports = ({ ffmpegPath, ffprobePath, enableFfmpegLog, verbose }) => { - async function editAudio({ clips, tmpDir }) { - if (clips.length === 0) return undefined; +module.exports = ({ ffmpegPath, ffprobePath, enableFfmpegLog, verbose, tmpDir }) => { + async function createMixedAudioClips({ clips, keepSourceAudio }) { + return pMap(clips, async (clip, i) => { + const { duration, layers, transition } = clip; - console.log('Extracting audio or creating silence from all clips'); + async function runInner() { + const clipAudioPath = join(tmpDir, `clip${i}-audio.flac`); - const mergedAudioPath = join(tmpDir, 'audio-merged.flac'); + async function createSilence() { + if (verbose) console.log('create silence', duration); + const args = [ + '-f', 'lavfi', '-i', 'anullsrc=channel_layout=stereo:sample_rate=44100', + '-sample_fmt', 's32', + '-ar', '48000', + '-t', duration, + '-c:a', 'flac', + '-y', + clipAudioPath, + ]; + await execa(ffmpegPath, args); - const clipsOut = await pMap(clips, async (clip, i) => { - const clipAudioPath = join(tmpDir, `clip${i}-audio.flac`); + return { silent: true, clipAudioPath }; + } - const { duration, layers, transition } = clip; + // Has user enabled keep source audio? + if (!keepSourceAudio) return createSilence(); - const audioLayers = layers.filter(({ type, visibleFrom, visibleUntil }) => ( - ['audio', 'video'].includes(type) - // TODO We don't support audio for visibleFrom/visibleUntil layers - && !visibleFrom && visibleUntil == null)); + const audioLayers = layers.filter(({ type, visibleFrom, visibleUntil }) => ( + ['audio', 'video'].includes(type) + // TODO: We don't support audio for visibleFrom/visibleUntil layers + && !visibleFrom && visibleUntil == null)); - async function createSilence(outPath) { - if (verbose) console.log('create silence', duration); - const args = [ - '-f', 'lavfi', '-i', 'anullsrc=channel_layout=stereo:sample_rate=44100', - '-sample_fmt', 's32', - '-ar', '48000', - '-t', duration, - '-c:a', 'flac', - '-y', - outPath, - ]; - await execa(ffmpegPath, args); - } + if (audioLayers.length === 0) return createSilence(); - if (audioLayers.length > 0) { const processedAudioLayersRaw = await pMap(audioLayers, async (audioLayer, j) => { const { path, cutFrom, cutTo, speedFactor } = audioLayer; @@ -77,78 +77,159 @@ module.exports = ({ ffmpegPath, ffprobePath, enableFfmpegLog, verbose }) => { // console.log(args); await execa(ffmpegPath, args); + + return { + layerAudioPath, + audioLayer, + }; } catch (err) { if (verbose) console.error('Cannot extract audio from video', path, err); // Fall back to silence - await createSilence(layerAudioPath); + return undefined; } - - return { layerAudioPath, audioLayer }; }, { concurrency: 4 }); const processedAudioLayers = processedAudioLayersRaw.filter((p) => p); - if (processedAudioLayers.length > 1) { - // Merge/mix all layer's audio + if (processedAudioLayers.length < 1) return createSilence(); - const weights = processedAudioLayers.map(({ audioLayer }) => (audioLayer.mixVolume != null ? audioLayer.mixVolume : 1)); - const args = [ - ...getFfmpegCommonArgs({ enableFfmpegLog }), - ...flatMap(processedAudioLayers, ({ layerAudioPath }) => ['-i', layerAudioPath]), - '-filter_complex', `amix=inputs=${processedAudioLayers.length}:duration=longest:weights=${weights.join(' ')}`, - '-c:a', 'flac', - '-y', - clipAudioPath, - ]; + if (processedAudioLayers.length === 1) return { clipAudioPath: processedAudioLayers[0].layerAudioPath }; - await execa(ffmpegPath, args); - } else if (processedAudioLayers.length > 0) { - await fs.rename(processedAudioLayers[0].layerAudioPath, clipAudioPath); - } else { - await createSilence(clipAudioPath); - } - } else { - await createSilence(clipAudioPath); + // Merge/mix all layer's audio + const weights = processedAudioLayers.map(({ audioLayer }) => (audioLayer.mixVolume != null ? audioLayer.mixVolume : 1)); + const args = [ + ...getFfmpegCommonArgs({ enableFfmpegLog }), + ...flatMap(processedAudioLayers, ({ layerAudioPath }) => ['-i', layerAudioPath]), + '-filter_complex', `amix=inputs=${processedAudioLayers.length}:duration=longest:weights=${weights.join(' ')}`, + '-c:a', 'flac', + '-y', + clipAudioPath, + ]; + + await execa(ffmpegPath, args); + return { clipAudioPath }; } + const { clipAudioPath, silent } = await runInner(); + return { path: resolve(clipAudioPath), // https://superuser.com/a/853262/658247 transition, + silent, }; }, { concurrency: 4 }); + } - if (clipsOut.length < 2) { - await fs.rename(clipsOut[0].path, mergedAudioPath); - } else { - console.log('Combining audio', clipsOut.map(({ path }) => basename(path))); - - let inStream = '[0:a]'; - const filterGraph = clipsOut.slice(0, -1).map(({ transition }, i) => { - const outStream = `[concat${i}]`; - - const epsilon = 0.0001; // If duration is 0, ffmpeg seems to default to 1 sec instead, hence epsilon. - let ret = `${inStream}[${i + 1}:a]acrossfade=d=${Math.max(epsilon, transition.duration)}:c1=${transition.audioOutCurve || 'tri'}:c2=${transition.audioInCurve || 'tri'}`; - - inStream = outStream; - - if (i < clipsOut.length - 2) ret += outStream; - return ret; - }).join(','); - - const args = [ - ...getFfmpegCommonArgs({ enableFfmpegLog }), - ...(flatMap(clipsOut, ({ path }) => ['-i', path])), - '-filter_complex', - filterGraph, - '-c', 'flac', - '-y', - mergedAudioPath, - ]; - await execa(ffmpegPath, args); + async function mergeFadeClipAudio(clipAudio) { + if (clipAudio.length < 2) { + return clipAudio[0].path; } - // TODO don't return audio if only silence? - return mergedAudioPath; + const mergedClipAudioPath = join(tmpDir, 'audio-merged.flac'); + + if (verbose) console.log('Combining audio', clipAudio.map(({ path }) => basename(path))); + + let inStream = '[0:a]'; + const filterGraph = clipAudio.slice(0, -1).map(({ transition }, i) => { + const outStream = `[concat${i}]`; + + const epsilon = 0.0001; // If duration is 0, ffmpeg seems to default to 1 sec instead, hence epsilon. + let ret = `${inStream}[${i + 1}:a]acrossfade=d=${Math.max(epsilon, transition.duration)}:c1=${transition.audioOutCurve || 'tri'}:c2=${transition.audioInCurve || 'tri'}`; + + inStream = outStream; + + if (i < clipAudio.length - 2) ret += outStream; + return ret; + }).join(','); + + const args = [ + ...getFfmpegCommonArgs({ enableFfmpegLog }), + ...(flatMap(clipAudio, ({ path }) => ['-i', path])), + '-filter_complex', + filterGraph, + '-c', 'flac', + '-y', + mergedClipAudioPath, + ]; + await execa(ffmpegPath, args); + + return mergedClipAudioPath; + } + + async function mixArbitraryAudio({ streams, audioNorm }) { + let maxGain = 30; + let gaussSize = 5; + if (audioNorm) { + if (audioNorm.gaussSize != null) gaussSize = audioNorm.gaussSize; + if (audioNorm.maxGain != null) maxGain = audioNorm.maxGain; + } + const enableAudioNorm = audioNorm && audioNorm.enable; + + // https://stackoverflow.com/questions/35509147/ffmpeg-amix-filter-volume-issue-with-inputs-of-different-duration + let filterComplex = streams.map(({ start, cutFrom, cutTo }, i) => { + const cutToArg = (cutTo != null ? `:end=${cutTo}` : ''); + const apadArg = i > 0 ? ',apad' : ''; // Don't pad the first track (audio from video clips with correct duration) + + return `[${i}]atrim=start=${cutFrom || 0}${cutToArg},adelay=delays=${Math.floor((start || 0) * 1000)}:all=1${apadArg}[a${i}]`; + }).join(';'); + + const audioNormArg = enableAudioNorm ? `,dynaudnorm=g=${gaussSize}:maxgain=${maxGain}` : ''; + filterComplex += `;${streams.map((s, i) => `[a${i}]`).join('')}amix=inputs=${streams.length}:duration=first:dropout_transition=0:weights=${streams.map((s) => (s.mixVolume != null ? s.mixVolume : 1)).join(' ')}${audioNormArg}`; + + const mixedAudioPath = join(tmpDir, 'audio-mixed.flac'); + + const args = [ + ...getFfmpegCommonArgs({ enableFfmpegLog }), + ...(flatMap(streams, ({ path, loop }) => ([ + '-stream_loop', (loop || 0), + '-i', path, + ]))), + '-filter_complex', filterComplex, + '-c:a', 'flac', + '-y', + mixedAudioPath, + ]; + + if (verbose) console.log(args.join(' ')); + + await execa(ffmpegPath, args); + + return mixedAudioPath; + } + + + async function editAudio({ keepSourceAudio, clips, arbitraryAudio, clipsAudioVolume, audioNorm }) { + // We need clips to process audio, because we need to know duration + if (clips.length === 0) return undefined; + + // No need to process audio if none of these are satisfied + if (!(keepSourceAudio || arbitraryAudio.length > 0)) return undefined; + + console.log('Extracting audio/silence from all clips'); + + // Mix audio from each clip as separate files (or silent audio of appropriate length for clips with no audio) + const clipAudio = await createMixedAudioClips({ clips, keepSourceAudio }); + + // Return no audio if only silent clips and no arbitrary audio + if (clipAudio.every((ca) => ca.silent) && arbitraryAudio.length === 0) return undefined; + + // Merge & fade the clip audio files + const mergedClipAudioPath = await mergeFadeClipAudio(clipAudio); + + const streams = [ + // The first stream is required, and it determines the length of the output audio. + // All other streams will be truncated to this length + { path: mergedClipAudioPath, mixVolume: clipsAudioVolume }, + + ...arbitraryAudio, + ]; + + console.log('Mixing clip audio with arbitrary audio'); + + if (streams.length < 2) return mergedClipAudioPath; + + const mixedFile = await mixArbitraryAudio({ streams, audioNorm }); + return mixedFile; } return { diff --git a/examples/audio-transition.json5 b/examples/audio-transition.json5 index 01fd8f9..b696de7 100644 --- a/examples/audio-transition.json5 +++ b/examples/audio-transition.json5 @@ -8,27 +8,39 @@ }, clips: [ { layers: [ - { type: 'title-background', text: 'Clip 1' }, + { type: 'title-background', text: 'Default transition' }, { type: 'audio', path: './assets/sample1.m4a' } ] }, { transition: { duration: 0.2 }, layers: [ - { type: 'title-background', text: 'Clip 2' }, + { type: 'title-background', text: 'Fast transition' }, { type: 'audio', path: './assets/sample2.m4a' } ] }, { transition: { duration: 0 }, layers: [ - { type: 'title-background', text: 'Clip 3' }, + { type: 'title-background', text: 'No transition' }, { type: 'audio', path: './assets/sample1.m4a' } ] }, { transition: { audioInCurve: 'exp', audioOutCurve: 'exp' }, layers: [ - { type: 'title-background', text: 'Clip 4' }, + { type: 'title-background', text: 'Exp curve' }, { type: 'audio', path: './assets/sample2.m4a' } ] }, { transition: { name: 'dummy' }, layers: [ - { type: 'title-background', text: 'Clip 5' }, + { type: 'title-background', text: 'Dummy' }, { type: 'audio', path: './assets/sample1.m4a' } ] }, + { transition: { duration: 2 }, layers: [ + { type: 'title-background', text: 'Too short' }, + { type: 'audio', path: './assets/sample2.m4a' } + ] }, + { duration: 1, transition: { duration: 2 }, layers: [ + { type: 'title-background', text: 'Too short' }, + { type: 'audio', path: './assets/sample2.m4a' } + ] }, + { duration: 1, transition: { duration: 2 }, layers: [ + { type: 'title-background', text: 'Too short' }, + { type: 'audio', path: './assets/sample2.m4a' } + ] }, { layers: [ - { type: 'title-background', text: 'Clip 6' }, + { type: 'title-background', text: 'THE END' }, { type: 'audio', path: './assets/sample2.m4a' } ] }, ], diff --git a/examples/audio2.json5 b/examples/audio2.json5 index bebd44c..ed886aa 100644 --- a/examples/audio2.json5 +++ b/examples/audio2.json5 @@ -1,22 +1,16 @@ { // enableFfmpegLog: true, outPath: './audio2.mp4', - keepSourceAudio: true, + width: 200, height: 200, clips: [ - { duration: 0.5, layers: [{ type: 'video', path: './assets/lofoten.mp4', cutFrom: 0.4, cutTo: 2 }] }, - - { layers: [ - { type: 'title-background', text: 'Audio track' }, - { type: 'audio', path: './assets/High [NCS Release] - JPB (No Copyright Music)-R8ZRCXy5vhA.m4a' }] }, - - { layers: [ - { type: 'video', path: './assets/lofoten.mp4', cutFrom: 0, cutTo: 2, mixVolume: 0.7 }, - { type: 'audio', path: './assets/High [NCS Release] - JPB (No Copyright Music)-R8ZRCXy5vhA.m4a', mixVolume: 0.3 }] }, - - { layers: [ - { type: 'video', path: './assets/lofoten.mp4', cutFrom: 0.4, cutTo: 2 }, - { type: 'audio', path: './assets/High [NCS Release] - JPB (No Copyright Music)-R8ZRCXy5vhA.m4a' }] }, - { layers: [{ type: 'video', path: './assets/lofoten.mp4', cutFrom: 1, cutTo: 2 }] }, + { duration: 15, layers: { type: 'title-background', text: 'Audio track' } }, ], -} + audioNorm: { enable: true, gaussSize: 3, maxGain: 100 }, + clipsAudioVolume: 50, + audioTracks: [ + { path: './assets/High [NCS Release] - JPB (No Copyright Music)-R8ZRCXy5vhA.m4a', cutFrom: 18 }, + { path: './assets/winxp.mp3', mixVolume: 10, cutFrom: 1, cutTo: 2, start: 2 }, + { path: './assets/Julen_ribas.m4a', mixVolume: 50, cutTo: 7, start: 5 }, + ], +} \ No newline at end of file diff --git a/examples/audio3.json5 b/examples/audio3.json5 new file mode 100644 index 0000000..7a2294e --- /dev/null +++ b/examples/audio3.json5 @@ -0,0 +1,15 @@ +{ + outPath: './audio3.mp4', + width: 200, height: 200, + clips: [ + { layers: [{ type: 'video', path: './assets/lofoten.mp4', cutTo: 2 }, { type: 'title', text: 'Arbitrary audio' }] }, + { duration: 3, layers: [{ type: 'title-background', text: 'Voice starts in 1 sec' }, { type: 'detached-audio', path: './assets/Julen_ribas.m4a', mixVolume: 50, cutFrom: 2, start: 1 }] }, + { duration: 1, layers: [{ type: 'title-background', text: 'Voice continues over clip 2' }] }, + { duration: 3, layers: [{ type: 'title-background', text: 'Voice continues over clip 3' }] }, + { duration: 2, layers: [{ type: 'title-background', text: 'XP sound starts' }, { type: 'detached-audio', path: './assets/winxp.mp3', mixVolume: 10, cutFrom: 0.5 }] }, + ], + audioNorm: { enable: true, gaussSize: 3, maxGain: 100 }, + audioTracks: [ + { path: './assets/High [NCS Release] - JPB (No Copyright Music)-R8ZRCXy5vhA.m4a', cutFrom: 18 }, + ], +} \ No newline at end of file diff --git a/examples/audioLoop.json5 b/examples/audioLoop.json5 index 56c0505..c258101 100644 --- a/examples/audioLoop.json5 +++ b/examples/audioLoop.json5 @@ -1,5 +1,6 @@ { outPath: './audioLoop.mp4', + width: 200, height: 200, audioFilePath: './assets/winxp.mp3', loopAudio: true, // Should properly cut off and not crash with EPIPE if loopAudio=false and audio duration is shorter than total duration diff --git a/index.js b/index.js index 42b306a..1542b99 100644 --- a/index.js +++ b/index.js @@ -8,7 +8,7 @@ const { nanoid } = require('nanoid'); const { parseFps, multipleOf2 } = require('./util'); const { createFabricCanvas, rgbaToFabricImage, getNodeCanvasFromFabricCanvas } = require('./sources/fabric'); const { createFrameSource } = require('./sources/frameSource'); -const parseConfig = require('./parseConfig'); +const { parseConfig } = require('./parseConfig'); const GlTransitions = require('./glTransitions'); const Audio = require('./audio'); const { assertFileValid, checkTransition } = require('./util'); @@ -22,18 +22,22 @@ const Editly = async (config = {}) => { enableFfmpegLog = false, verbose = false, logTimes = false, + keepTmp = false, fast, outPath, clips: clipsIn, + clipsAudioVolume = 1, + audioTracks: arbitraryAudioIn = [], width: requestedWidth, height: requestedHeight, fps: requestedFps, defaults = {}, - audioFilePath: audioFilePathIn, + audioFilePath: backgroundAudioPath, loopAudio, keepSourceAudio, allowRemoteRequests, + audioNorm, ffmpegPath = 'ffmpeg', ffprobePath = 'ffprobe', @@ -41,10 +45,7 @@ const Editly = async (config = {}) => { const isGif = outPath.toLowerCase().endsWith('.gif'); - let audioFilePath; - if (!isGif) audioFilePath = audioFilePathIn; - - if (audioFilePath) await assertFileValid(audioFilePath, allowRemoteRequests); + if (backgroundAudioPath) await assertFileValid(backgroundAudioPath, allowRemoteRequests); checkTransition(defaults.transition); @@ -53,21 +54,17 @@ const Editly = async (config = {}) => { assert(outPath, 'Please provide an output path'); assert(clipsIn.length > 0, 'Please provide at least 1 clip'); - const clips = await parseConfig({ defaults, clips: clipsIn, allowRemoteRequests, ffprobePath }); - - const { editAudio } = Audio({ ffmpegPath, ffprobePath, enableFfmpegLog, verbose }); + const { clips, arbitraryAudio } = await parseConfig({ defaults, clips: clipsIn, arbitraryAudio: arbitraryAudioIn, backgroundAudioPath, loopAudio, allowRemoteRequests, ffprobePath }); + if (verbose) console.log('Calculated', JSON5.stringify({ clips, arbitraryAudio }, null, 2)); const outDir = dirname(outPath); const tmpDir = join(outDir, `editly-tmp-${nanoid()}`); if (verbose) console.log({ tmpDir }); - await fs.remove(tmpDir); await fs.mkdirp(tmpDir); - if (!audioFilePath && keepSourceAudio) { - audioFilePath = await editAudio({ clips, tmpDir }); - } + const { editAudio } = Audio({ ffmpegPath, ffprobePath, enableFfmpegLog, verbose, tmpDir }); - if (verbose) console.log(JSON5.stringify(clips, null, 2)); + const audioFilePath = !isGif ? await editAudio({ keepSourceAudio, arbitraryAudio, clipsAudioVolume, clips, audioNorm }) : undefined; // Try to detect parameters from first video let firstVideoWidth; @@ -193,8 +190,6 @@ const Editly = async (config = {}) => { '-y', outPath, ]; - const loopAudioArgs = loopAudio ? ['-stream_loop', '-1'] : []; - const args = [ ...(enableFfmpegLog ? [] : ['-hide_banner', '-loglevel', 'error']), @@ -205,7 +200,7 @@ const Editly = async (config = {}) => { '-r', framerateStr, '-i', '-', - ...(audioFilePath ? [...loopAudioArgs, '-i', audioFilePath, '-shortest'] : []), + ...(audioFilePath ? ['-i', audioFilePath] : []), ...(!isGif ? ['-map', '0:v:0'] : []), ...(audioFilePath ? ['-map', '1:a:0'] : []), @@ -374,7 +369,7 @@ const Editly = async (config = {}) => { if (verbose) console.log('Cleanup'); if (frameSource1) await frameSource1.close(); if (frameSource2) await frameSource2.close(); - await fs.remove(tmpDir); + if (!keepTmp) await fs.remove(tmpDir); } try { @@ -389,7 +384,8 @@ const Editly = async (config = {}) => { console.log(outPath); }; -// Pure function to get a frame at a certain time (excluding transitions) +// Pure function to get a frame at a certain time +// TODO I think this does not respect transition durations async function renderSingleFrame({ time = 0, defaults, diff --git a/parseConfig.js b/parseConfig.js index 6fde470..e121e08 100644 --- a/parseConfig.js +++ b/parseConfig.js @@ -13,7 +13,23 @@ const { assertFileValid, checkTransition } = require('./util'); const loadedFonts = []; -async function parseConfig({ defaults: defaultsIn = {}, clips, allowRemoteRequests, ffprobePath }) { +async function validateArbitraryAudio(audio) { + assert(audio === undefined || Array.isArray(audio)); + + if (audio) { + // eslint-disable-next-line no-restricted-syntax + for (const { path, cutFrom, cutTo, start } of audio) { + await assertFileValid(path, false); + + if (cutFrom != null && cutTo != null) assert(cutTo > cutFrom); + if (cutFrom != null) assert(cutFrom >= 0); + if (cutTo != null) assert(cutTo >= 0); + assert(start == null || start >= 0, `Invalid "start" ${start}`); + } + } +} + +async function parseConfig({ defaults: defaultsIn = {}, clips, arbitraryAudio: arbitraryAudioIn, backgroundAudioPath, loopAudio, allowRemoteRequests, ffprobePath }) { const defaults = { duration: 4, ...defaultsIn, @@ -88,7 +104,9 @@ async function parseConfig({ defaults: defaultsIn = {}, clips, allowRemoteReques throw new Error(`Invalid layer type ${type}`); } - return pMap(clips, async (clip, clipIndex) => { + const detachedAudioByClip = {}; + + let clipsOut = await pMap(clips, async (clip, clipIndex) => { assert(typeof clip === 'object', '"clips" must contain objects with one or more layers'); const { transition: userTransition, duration: userClipDuration, layers: layersIn } = clip; @@ -136,7 +154,7 @@ async function parseConfig({ defaults: defaultsIn = {}, clips, allowRemoteReques } // Audio is handled later - if (type === 'audio') return layer; + if (['audio', 'detached-audio'].includes(type)) return layer; return handleLayer(layer); }, { concurrency: 1 })); @@ -181,7 +199,7 @@ async function parseConfig({ defaults: defaultsIn = {}, clips, allowRemoteReques return { ...layer, cutFrom, cutTo, speedFactor }; } - if (layer.type === 'video') { + if (type === 'video') { const { inputDuration } = layer; let speedFactor; @@ -197,15 +215,76 @@ async function parseConfig({ defaults: defaultsIn = {}, clips, allowRemoteReques return { ...layer, speedFactor }; } + // These audio tracks are detached from the clips (can run over multiple clips) + // This is useful so we can have audio start relative to clip start time + if (type === 'detached-audio') { + const { cutFrom, cutTo, mixVolume, start } = layer; + if (!detachedAudioByClip[clipIndex]) detachedAudioByClip[clipIndex] = []; + detachedAudioByClip[clipIndex].push({ path, cutFrom, cutTo, mixVolume, start }); + return undefined; + } + return layer; }); + layersOut = layersOut.filter((l) => l); + return { transition, duration: clipDuration, layers: layersOut, }; }, { concurrency: 1 }); + + + let totalClipDuration = 0; + const clipDetachedAudio = []; + + // Need to map again because now we know all clip durations + clipsOut = await pMap(clipsOut, async (clip, i) => { + const nextClip = clipsOut[i + 1]; + + // We clamp all transitions to half the length of every clip + // NOTE: similar logic is duplicated in index.js + let safeTransitionDuration = 0; + if (nextClip) { + // Each clip can have two transitions, make sure we leave enough room: + safeTransitionDuration = Math.min(clip.duration / 2, nextClip.duration / 2, clip.transition.duration); + } + + // We now know all clip durations so we can calculate the offset for detached audio tracks + // eslint-disable-next-line no-restricted-syntax + for (const { start, ...rest } of (detachedAudioByClip[i] || [])) { + clipDetachedAudio.push({ ...rest, start: totalClipDuration + (start || 0) }); + } + + totalClipDuration += clip.duration - safeTransitionDuration; + + return { + ...clip, + transition: { + ...clip.transition, + duration: safeTransitionDuration, + }, + }; + }); + + // Audio can either come from `audioFilePath`, `audio` or from "detached" audio layers in clips + const arbitraryAudio = [ + // Background audio is treated just like arbitrary audio + ...(backgroundAudioPath ? [{ path: backgroundAudioPath, mixVolume: 1, loop: loopAudio ? -1 : 0 }] : []), + ...arbitraryAudioIn, + ...clipDetachedAudio, + ]; + + await validateArbitraryAudio(arbitraryAudio); + + return { + clips: clipsOut, + arbitraryAudio, + }; } -module.exports = parseConfig; +module.exports = { + parseConfig, +};