From 65ef5420e7109bc256c559dd677d5b2879121f34 Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 14 May 2026 02:09:39 -0700 Subject: [PATCH] feat: implement scoring and ranking for video frames, enhance candidate selection process --- .dockerignore | 5 ++ Dockerfile | 12 ++++ captain-definition | 4 ++ src/analysis/candidateRanking.ts | 79 +++++++++++++++++++---- src/analysis/frameDifference.ts | 32 ++++++++++ src/analysis/scanVideo.ts | 104 +++++++++++++++++-------------- 6 files changed, 179 insertions(+), 57 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 captain-definition diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..8e41aa5 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +node_modules +dist +.git +.DS_Store +npm-debug.log* diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..2c3c741 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,12 @@ +FROM node:22-alpine AS build +WORKDIR /app + +COPY package*.json ./ +RUN npm ci + +COPY . . +RUN npm run build + +FROM nginx:1.27-alpine +COPY --from=build /app/dist /usr/share/nginx/html +EXPOSE 80 diff --git a/captain-definition b/captain-definition new file mode 100644 index 0000000..0e14f82 --- /dev/null +++ b/captain-definition @@ -0,0 +1,4 @@ +{ + "schemaVersion": 2, + "dockerfilePath": "./Dockerfile" +} diff --git a/src/analysis/candidateRanking.ts b/src/analysis/candidateRanking.ts index 4a06fd1..779a936 100644 --- a/src/analysis/candidateRanking.ts +++ b/src/analysis/candidateRanking.ts @@ -1,34 +1,91 @@ -import type { CandidateFrame } from "../types/scan"; +export type ScoredFrame = { + score: number; + time: number; +}; export function rankAndTrimCandidates( - candidates: CandidateFrame[], + candidates: ScoredFrame[], targetCount: number, duration: number, -): CandidateFrame[] { - if (candidates.length <= targetCount) { - return [...candidates].sort((a, b) => a.time - b.time); + minGapSeconds: number, +): ScoredFrame[] { + if (targetCount <= 0 || candidates.length === 0) { + return []; } + const uniqueCandidates = dedupeByRoundedTime(candidates); + if (uniqueCandidates.length <= targetCount) { + return [...uniqueCandidates].sort((a, b) => a.time - b.time); + } + + const scoreFloor = Math.max(percentile(uniqueCandidates, 0.6) * 0.6, 0.02); + const localPeaks = uniqueCandidates.filter((candidate, index, items) => { + const previous = items[index - 1]; + const next = items[index + 1]; + const higherThanPrevious = !previous || candidate.score >= previous.score; + const higherThanNext = !next || candidate.score > next.score; + return candidate.score >= scoreFloor && higherThanPrevious && higherThanNext; + }); + + const pool = + localPeaks.length >= targetCount / 2 + ? localPeaks + : [...localPeaks, ...uniqueCandidates.filter((candidate) => !localPeaks.includes(candidate))]; + const bucketCount = Math.max(1, targetCount); const bucketSize = Math.max(duration / bucketCount, 1); - const selected = new Map(); + const selected = new Map(); for (let bucket = 0; bucket < bucketCount; bucket++) { const start = bucket * bucketSize; const end = bucket === bucketCount - 1 ? duration + 0.001 : start + bucketSize; - const best = candidates + const best = pool .filter((candidate) => candidate.time >= start && candidate.time < end) .sort((a, b) => b.score - a.score)[0]; - if (best) { - selected.set(best.id, best); + if (best && respectsMinGap([...selected.values()], best, minGapSeconds)) { + selected.set(frameKey(best.time), best); } } - for (const candidate of [...candidates].sort((a, b) => b.score - a.score)) { + for (const candidate of [...pool].sort((a, b) => b.score - a.score)) { if (selected.size >= targetCount) break; - selected.set(candidate.id, candidate); + if (respectsMinGap([...selected.values()], candidate, minGapSeconds)) { + selected.set(frameKey(candidate.time), candidate); + } + } + + for (const candidate of [...uniqueCandidates].sort((a, b) => b.score - a.score)) { + if (selected.size >= targetCount) break; + if (respectsMinGap([...selected.values()], candidate, minGapSeconds * 0.5)) { + selected.set(frameKey(candidate.time), candidate); + } } return [...selected.values()].sort((a, b) => a.time - b.time); } + +function percentile(candidates: ScoredFrame[], quantile: number) { + const scores = candidates.map((candidate) => candidate.score).sort((a, b) => a - b); + const index = Math.min(scores.length - 1, Math.max(0, Math.floor((scores.length - 1) * quantile))); + return scores[index]; +} + +function dedupeByRoundedTime(candidates: ScoredFrame[]) { + const selected = new Map(); + for (const candidate of [...candidates].sort((a, b) => b.score - a.score)) { + const key = frameKey(candidate.time); + if (!selected.has(key)) { + selected.set(key, candidate); + } + } + return [...selected.values()].sort((a, b) => a.time - b.time); +} + +function frameKey(time: number) { + return time.toFixed(2); +} + +function respectsMinGap(selected: ScoredFrame[], candidate: ScoredFrame, minGapSeconds: number) { + return selected.every((item) => Math.abs(item.time - candidate.time) >= minGapSeconds); +} diff --git a/src/analysis/frameDifference.ts b/src/analysis/frameDifference.ts index 88eb062..4595ee6 100644 --- a/src/analysis/frameDifference.ts +++ b/src/analysis/frameDifference.ts @@ -19,3 +19,35 @@ export function frameDifferenceRatio( return changed / pixels; } + +export function histogramDifferenceRatio(a: ImageData, b: ImageData, bins = 16): number { + const histogramA = new Array(bins).fill(0); + const histogramB = new Array(bins).fill(0); + const dataA = a.data; + const dataB = b.data; + const pixels = dataA.length / 4; + + for (let i = 0; i < dataA.length; i += 4) { + const lumA = 0.299 * dataA[i] + 0.587 * dataA[i + 1] + 0.114 * dataA[i + 2]; + const lumB = 0.299 * dataB[i] + 0.587 * dataB[i + 1] + 0.114 * dataB[i + 2]; + histogramA[Math.min(bins - 1, Math.floor((lumA / 256) * bins))]++; + histogramB[Math.min(bins - 1, Math.floor((lumB / 256) * bins))]++; + } + + let difference = 0; + for (let index = 0; index < bins; index++) { + difference += Math.abs(histogramA[index] - histogramB[index]); + } + + return difference / (pixels * 2); +} + +export function hybridFrameScore( + a: ImageData, + b: ImageData, + pixelDeltaThreshold: number, +): number { + const pixelDifference = frameDifferenceRatio(a, b, pixelDeltaThreshold); + const histogramDifference = histogramDifferenceRatio(a, b); + return histogramDifference * 0.7 + pixelDifference * 0.3; +} diff --git a/src/analysis/scanVideo.ts b/src/analysis/scanVideo.ts index 4b569fa..8598442 100644 --- a/src/analysis/scanVideo.ts +++ b/src/analysis/scanVideo.ts @@ -1,5 +1,5 @@ import { rankAndTrimCandidates } from "./candidateRanking"; -import { frameDifferenceRatio } from "./frameDifference"; +import { hybridFrameScore } from "./frameDifference"; import { captureAnalysisFrame, createThumbnailUrl } from "../media/canvas"; import { seekVideo } from "../media/video"; import type { CandidateFrame, ScanSettings } from "../types/scan"; @@ -15,9 +15,11 @@ export async function scanVideoForCandidates( callbacks: ScanCallbacks, ): Promise { const analysisCanvas = document.createElement("canvas"); - const candidates: CandidateFrame[] = []; - let lastAcceptedFrame: ImageData | null = null; - let lastAcceptedTime: number | null = null; + const scoredFrames: Array<{ score: number; time: number }> = []; + const selectedCandidates: CandidateFrame[] = []; + let previousFrame: ImageData | null = null; + let windowFrame: ImageData | null = null; + let windowTime = 0; try { for ( @@ -38,64 +40,74 @@ export async function scanVideoForCandidates( settings.analysisHeight, ); - if (!lastAcceptedFrame) { - lastAcceptedFrame = imageData; - - if (settings.includeFirstFrame) { - const thumbnailUrl = await createThumbnailUrl(video); - candidates.push({ - id: crypto.randomUUID(), - time, - score: 1, - reason: "initial-frame", - thumbnailUrl, - }); - lastAcceptedTime = time; - } + if (!previousFrame) { + previousFrame = imageData; + windowFrame = imageData; + windowTime = time; } else { - const diffRatio = frameDifferenceRatio( - lastAcceptedFrame, + const scoreFromPrevious = hybridFrameScore( + previousFrame, imageData, settings.pixelDeltaThreshold, ); + const scoreFromWindow = + windowFrame && time - windowTime >= settings.minSecondsBetweenCaptures * 0.5 + ? hybridFrameScore(windowFrame, imageData, settings.pixelDeltaThreshold) + : scoreFromPrevious; + const score = Math.max(scoreFromPrevious, scoreFromWindow); - const hasEnoughChange = diffRatio >= settings.changedPixelRatioThreshold; - const hasEnoughTimeGap = - lastAcceptedTime === null || - time - lastAcceptedTime >= settings.minSecondsBetweenCaptures; + scoredFrames.push({ + time, + score, + }); - if (hasEnoughChange && hasEnoughTimeGap) { - const thumbnailUrl = await createThumbnailUrl(video); - candidates.push({ - id: crypto.randomUUID(), - time, - score: diffRatio, - reason: "visual-change", - thumbnailUrl, - }); - lastAcceptedFrame = imageData; - lastAcceptedTime = time; + previousFrame = imageData; + if (time - windowTime >= settings.minSecondsBetweenCaptures) { + windowFrame = imageData; + windowTime = time; } } callbacks.onProgress(Math.min(time / video.duration, 1)); - - if (candidates.length >= settings.maxCandidates) { - break; - } } callbacks.onProgress(1); - const ranked = rankAndTrimCandidates(candidates, settings.finalTargetCount, video.duration); - const rankedIds = new Set(ranked.map((candidate) => candidate.id)); - candidates - .filter((candidate) => !rankedIds.has(candidate.id)) - .forEach((candidate) => URL.revokeObjectURL(candidate.thumbnailUrl)); + const targetVisualCount = settings.includeFirstFrame + ? Math.max(0, settings.finalTargetCount - 1) + : settings.finalTargetCount; + const selectedMoments = rankAndTrimCandidates( + scoredFrames, + targetVisualCount, + video.duration, + settings.minSecondsBetweenCaptures, + ); - return ranked; + if (settings.includeFirstFrame) { + await seekVideo(video, 0); + selectedCandidates.push({ + id: crypto.randomUUID(), + time: 0, + score: 1, + reason: "initial-frame", + thumbnailUrl: await createThumbnailUrl(video), + }); + } + + for (const moment of selectedMoments) { + await seekVideo(video, moment.time); + selectedCandidates.push({ + id: crypto.randomUUID(), + time: moment.time, + score: moment.score, + reason: "visual-change", + thumbnailUrl: await createThumbnailUrl(video), + }); + } + + return selectedCandidates.sort((a, b) => a.time - b.time); } catch (error) { - candidates.forEach((candidate) => URL.revokeObjectURL(candidate.thumbnailUrl)); + selectedCandidates.forEach((candidate) => URL.revokeObjectURL(candidate.thumbnailUrl)); throw error; } }