feat: implement scoring and ranking for video frames, enhance candidate selection process

This commit is contained in:
Ben
2026-05-14 02:09:39 -07:00
parent e308adc642
commit 65ef5420e7
6 changed files with 179 additions and 57 deletions
+5
View File
@@ -0,0 +1,5 @@
node_modules
dist
.git
.DS_Store
npm-debug.log*
+12
View File
@@ -0,0 +1,12 @@
FROM node:22-alpine AS build
WORKDIR /app
COPY package*.json ./
RUN npm ci
COPY . .
RUN npm run build
FROM nginx:1.27-alpine
COPY --from=build /app/dist /usr/share/nginx/html
EXPOSE 80
+4
View File
@@ -0,0 +1,4 @@
{
"schemaVersion": 2,
"dockerfilePath": "./Dockerfile"
}
+68 -11
View File
@@ -1,34 +1,91 @@
import type { CandidateFrame } from "../types/scan";
export type ScoredFrame = {
score: number;
time: number;
};
export function rankAndTrimCandidates(
candidates: CandidateFrame[],
candidates: ScoredFrame[],
targetCount: number,
duration: number,
): CandidateFrame[] {
if (candidates.length <= targetCount) {
return [...candidates].sort((a, b) => a.time - b.time);
minGapSeconds: number,
): ScoredFrame[] {
if (targetCount <= 0 || candidates.length === 0) {
return [];
}
const uniqueCandidates = dedupeByRoundedTime(candidates);
if (uniqueCandidates.length <= targetCount) {
return [...uniqueCandidates].sort((a, b) => a.time - b.time);
}
const scoreFloor = Math.max(percentile(uniqueCandidates, 0.6) * 0.6, 0.02);
const localPeaks = uniqueCandidates.filter((candidate, index, items) => {
const previous = items[index - 1];
const next = items[index + 1];
const higherThanPrevious = !previous || candidate.score >= previous.score;
const higherThanNext = !next || candidate.score > next.score;
return candidate.score >= scoreFloor && higherThanPrevious && higherThanNext;
});
const pool =
localPeaks.length >= targetCount / 2
? localPeaks
: [...localPeaks, ...uniqueCandidates.filter((candidate) => !localPeaks.includes(candidate))];
const bucketCount = Math.max(1, targetCount);
const bucketSize = Math.max(duration / bucketCount, 1);
const selected = new Map<string, CandidateFrame>();
const selected = new Map<string, ScoredFrame>();
for (let bucket = 0; bucket < bucketCount; bucket++) {
const start = bucket * bucketSize;
const end = bucket === bucketCount - 1 ? duration + 0.001 : start + bucketSize;
const best = candidates
const best = pool
.filter((candidate) => candidate.time >= start && candidate.time < end)
.sort((a, b) => b.score - a.score)[0];
if (best) {
selected.set(best.id, best);
if (best && respectsMinGap([...selected.values()], best, minGapSeconds)) {
selected.set(frameKey(best.time), best);
}
}
for (const candidate of [...candidates].sort((a, b) => b.score - a.score)) {
for (const candidate of [...pool].sort((a, b) => b.score - a.score)) {
if (selected.size >= targetCount) break;
selected.set(candidate.id, candidate);
if (respectsMinGap([...selected.values()], candidate, minGapSeconds)) {
selected.set(frameKey(candidate.time), candidate);
}
}
for (const candidate of [...uniqueCandidates].sort((a, b) => b.score - a.score)) {
if (selected.size >= targetCount) break;
if (respectsMinGap([...selected.values()], candidate, minGapSeconds * 0.5)) {
selected.set(frameKey(candidate.time), candidate);
}
}
return [...selected.values()].sort((a, b) => a.time - b.time);
}
function percentile(candidates: ScoredFrame[], quantile: number) {
const scores = candidates.map((candidate) => candidate.score).sort((a, b) => a - b);
const index = Math.min(scores.length - 1, Math.max(0, Math.floor((scores.length - 1) * quantile)));
return scores[index];
}
function dedupeByRoundedTime(candidates: ScoredFrame[]) {
const selected = new Map<string, ScoredFrame>();
for (const candidate of [...candidates].sort((a, b) => b.score - a.score)) {
const key = frameKey(candidate.time);
if (!selected.has(key)) {
selected.set(key, candidate);
}
}
return [...selected.values()].sort((a, b) => a.time - b.time);
}
function frameKey(time: number) {
return time.toFixed(2);
}
function respectsMinGap(selected: ScoredFrame[], candidate: ScoredFrame, minGapSeconds: number) {
return selected.every((item) => Math.abs(item.time - candidate.time) >= minGapSeconds);
}
+32
View File
@@ -19,3 +19,35 @@ export function frameDifferenceRatio(
return changed / pixels;
}
export function histogramDifferenceRatio(a: ImageData, b: ImageData, bins = 16): number {
const histogramA = new Array<number>(bins).fill(0);
const histogramB = new Array<number>(bins).fill(0);
const dataA = a.data;
const dataB = b.data;
const pixels = dataA.length / 4;
for (let i = 0; i < dataA.length; i += 4) {
const lumA = 0.299 * dataA[i] + 0.587 * dataA[i + 1] + 0.114 * dataA[i + 2];
const lumB = 0.299 * dataB[i] + 0.587 * dataB[i + 1] + 0.114 * dataB[i + 2];
histogramA[Math.min(bins - 1, Math.floor((lumA / 256) * bins))]++;
histogramB[Math.min(bins - 1, Math.floor((lumB / 256) * bins))]++;
}
let difference = 0;
for (let index = 0; index < bins; index++) {
difference += Math.abs(histogramA[index] - histogramB[index]);
}
return difference / (pixels * 2);
}
export function hybridFrameScore(
a: ImageData,
b: ImageData,
pixelDeltaThreshold: number,
): number {
const pixelDifference = frameDifferenceRatio(a, b, pixelDeltaThreshold);
const histogramDifference = histogramDifferenceRatio(a, b);
return histogramDifference * 0.7 + pixelDifference * 0.3;
}
+58 -46
View File
@@ -1,5 +1,5 @@
import { rankAndTrimCandidates } from "./candidateRanking";
import { frameDifferenceRatio } from "./frameDifference";
import { hybridFrameScore } from "./frameDifference";
import { captureAnalysisFrame, createThumbnailUrl } from "../media/canvas";
import { seekVideo } from "../media/video";
import type { CandidateFrame, ScanSettings } from "../types/scan";
@@ -15,9 +15,11 @@ export async function scanVideoForCandidates(
callbacks: ScanCallbacks,
): Promise<CandidateFrame[]> {
const analysisCanvas = document.createElement("canvas");
const candidates: CandidateFrame[] = [];
let lastAcceptedFrame: ImageData | null = null;
let lastAcceptedTime: number | null = null;
const scoredFrames: Array<{ score: number; time: number }> = [];
const selectedCandidates: CandidateFrame[] = [];
let previousFrame: ImageData | null = null;
let windowFrame: ImageData | null = null;
let windowTime = 0;
try {
for (
@@ -38,64 +40,74 @@ export async function scanVideoForCandidates(
settings.analysisHeight,
);
if (!lastAcceptedFrame) {
lastAcceptedFrame = imageData;
if (settings.includeFirstFrame) {
const thumbnailUrl = await createThumbnailUrl(video);
candidates.push({
id: crypto.randomUUID(),
time,
score: 1,
reason: "initial-frame",
thumbnailUrl,
});
lastAcceptedTime = time;
}
if (!previousFrame) {
previousFrame = imageData;
windowFrame = imageData;
windowTime = time;
} else {
const diffRatio = frameDifferenceRatio(
lastAcceptedFrame,
const scoreFromPrevious = hybridFrameScore(
previousFrame,
imageData,
settings.pixelDeltaThreshold,
);
const scoreFromWindow =
windowFrame && time - windowTime >= settings.minSecondsBetweenCaptures * 0.5
? hybridFrameScore(windowFrame, imageData, settings.pixelDeltaThreshold)
: scoreFromPrevious;
const score = Math.max(scoreFromPrevious, scoreFromWindow);
const hasEnoughChange = diffRatio >= settings.changedPixelRatioThreshold;
const hasEnoughTimeGap =
lastAcceptedTime === null ||
time - lastAcceptedTime >= settings.minSecondsBetweenCaptures;
scoredFrames.push({
time,
score,
});
if (hasEnoughChange && hasEnoughTimeGap) {
const thumbnailUrl = await createThumbnailUrl(video);
candidates.push({
id: crypto.randomUUID(),
time,
score: diffRatio,
reason: "visual-change",
thumbnailUrl,
});
lastAcceptedFrame = imageData;
lastAcceptedTime = time;
previousFrame = imageData;
if (time - windowTime >= settings.minSecondsBetweenCaptures) {
windowFrame = imageData;
windowTime = time;
}
}
callbacks.onProgress(Math.min(time / video.duration, 1));
if (candidates.length >= settings.maxCandidates) {
break;
}
}
callbacks.onProgress(1);
const ranked = rankAndTrimCandidates(candidates, settings.finalTargetCount, video.duration);
const rankedIds = new Set(ranked.map((candidate) => candidate.id));
candidates
.filter((candidate) => !rankedIds.has(candidate.id))
.forEach((candidate) => URL.revokeObjectURL(candidate.thumbnailUrl));
const targetVisualCount = settings.includeFirstFrame
? Math.max(0, settings.finalTargetCount - 1)
: settings.finalTargetCount;
const selectedMoments = rankAndTrimCandidates(
scoredFrames,
targetVisualCount,
video.duration,
settings.minSecondsBetweenCaptures,
);
return ranked;
if (settings.includeFirstFrame) {
await seekVideo(video, 0);
selectedCandidates.push({
id: crypto.randomUUID(),
time: 0,
score: 1,
reason: "initial-frame",
thumbnailUrl: await createThumbnailUrl(video),
});
}
for (const moment of selectedMoments) {
await seekVideo(video, moment.time);
selectedCandidates.push({
id: crypto.randomUUID(),
time: moment.time,
score: moment.score,
reason: "visual-change",
thumbnailUrl: await createThumbnailUrl(video),
});
}
return selectedCandidates.sort((a, b) => a.time - b.time);
} catch (error) {
candidates.forEach((candidate) => URL.revokeObjectURL(candidate.thumbnailUrl));
selectedCandidates.forEach((candidate) => URL.revokeObjectURL(candidate.thumbnailUrl));
throw error;
}
}