feat: Add image upload and background removal

Enables users to upload custom avatar assets and automatically remove the background from the generated image.

New features:
- Avatar creation now supports uploading base, blink, and talk textures.
- Added ability to define the main body bounding box during rigging.
- Vision service now includes image segmentation for background removal.
- Studio component dynamically processes the avatar image for background removal if chroma key is enabled.
This commit is contained in:
James Twose 2025-11-20 21:24:22 +01:00
parent 3eff403fb4
commit ddb2455416
7 changed files with 528 additions and 161 deletions

View File

@ -33,7 +33,8 @@ const App: React.FC = () => {
const handleRiggingComplete = (data: {
leftEye: Rect, rightEye: Rect, mouth: Rect, skinColor: string,
textureClosedEye: Rect, textureOpenMouth: Rect
textureClosedEye: Rect, textureOpenMouth: Rect,
mainBody: Rect, chromaKeyColor: string
}) => {
if (generatedData) {
setAvatar({
@ -46,6 +47,8 @@ const App: React.FC = () => {
skinColor: data.skinColor,
textureClosedEye: data.textureClosedEye,
textureOpenMouth: data.textureOpenMouth,
mainBody: data.mainBody,
chromaKeyColor: data.chromaKeyColor
});
setAppState(AppState.STUDIO);
}

View File

@ -1,20 +1,31 @@
import React, { useState } from 'react';
import { generateAvatarImage } from '../services/geminiService';
import { analyzeAvatarImage } from '../services/visionService';
import { stitchAssets } from '../services/imageService';
import LoadingSpinner from './LoadingSpinner';
import { Rect } from '../types';
interface AvatarCreatorProps {
onAvatarGenerated: (url: string, name: string, initialData?: { leftEye: Rect, rightEye: Rect, mouth: Rect, skinColor: string }) => void;
onAvatarGenerated: (url: string, name: string, initialData?: {
leftEye?: Rect, rightEye?: Rect, mouth?: Rect, skinColor?: string,
mainBody?: Rect, textureClosedEye?: Rect, textureOpenMouth?: Rect
}) => void;
}
const AvatarCreator: React.FC<AvatarCreatorProps> = ({ onAvatarGenerated }) => {
const [mode, setMode] = useState<'generate' | 'upload'>('generate');
// Generation State
const [prompt, setPrompt] = useState('');
const [name, setName] = useState('');
const [status, setStatus] = useState<'idle' | 'generating' | 'analyzing'>('idle');
const [status, setStatus] = useState<'idle' | 'generating' | 'analyzing' | 'stitching'>('idle');
const [error, setError] = useState<string | null>(null);
// Upload State
const [baseFile, setBaseFile] = useState<File | null>(null);
const [blinkFile, setBlinkFile] = useState<File | null>(null);
const [talkFile, setTalkFile] = useState<File | null>(null);
const handleGenerate = async () => {
if (!prompt || !name) return;
@ -25,12 +36,11 @@ const AvatarCreator: React.FC<AvatarCreatorProps> = ({ onAvatarGenerated }) => {
// 1. Generate Image (Now creates a character sheet)
const imageUrl = await generateAvatarImage(prompt);
// 2. Analyze Image for Landmarks (Initial guess)
// Note: Vision service will likely find the main face on the left, which is what we want for targets.
// 2. Analyze Image for Landmarks
setStatus('analyzing');
const analysisData = await analyzeAvatarImage(imageUrl);
// 3. Pass to parent (to go to Rigging)
// 3. Pass to parent
if (analysisData) {
onAvatarGenerated(imageUrl, name, analysisData);
} else {
@ -44,68 +54,161 @@ const AvatarCreator: React.FC<AvatarCreatorProps> = ({ onAvatarGenerated }) => {
}
};
const handleUpload = async () => {
if (!baseFile || !name) return;
setStatus('stitching');
setError(null);
try {
// 1. Stitch Assets into Sheet
const { imageUrl, mainBody, textureClosedEye, textureOpenMouth } = await stitchAssets(baseFile, blinkFile || undefined, talkFile || undefined);
// 2. Analyze the Main Body part of the image
// Note: analyzeAvatarImage analyzes the whole image, but since we put the face on the left (or full image),
// it should find the face correctly.
setStatus('analyzing');
const analysisData = await analyzeAvatarImage(imageUrl);
// 3. Combine manual stitch data with automatic vision data
const initialData = {
...(analysisData || {}),
mainBody,
textureClosedEye,
textureOpenMouth
};
onAvatarGenerated(imageUrl, name, initialData);
} catch (err) {
console.error(err);
setError("Failed to process uploaded images. Please ensure they are valid image files.");
} finally {
setStatus('idle');
}
};
const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>, setter: (f: File | null) => void) => {
if (e.target.files && e.target.files[0]) {
setter(e.target.files[0]);
}
};
return (
<div className="max-w-2xl mx-auto bg-slate-800/50 backdrop-blur-lg border border-slate-700 p-8 rounded-2xl shadow-2xl">
<div className="text-center mb-8">
<h2 className="text-3xl font-bold text-transparent bg-clip-text bg-gradient-to-r from-cyan-400 to-purple-500 mb-2">
Design Your Avatar
</h2>
<p className="text-slate-400">
Describe your dream VTuber model. Gemini will generate a character sheet with expression assets.
</p>
</div>
<div className="space-y-6">
<div>
<label className="block text-sm font-medium text-slate-300 mb-2">Model Name</label>
<input
type="text"
value={name}
onChange={(e) => setName(e.target.value)}
placeholder="e.g., Neon Kitsune"
className="w-full bg-slate-900/50 border border-slate-600 rounded-xl px-4 py-3 text-white placeholder-slate-500 focus:ring-2 focus:ring-cyan-500 focus:border-transparent transition-all outline-none"
/>
</div>
<div>
<label className="block text-sm font-medium text-slate-300 mb-2">Description</label>
<textarea
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
placeholder="e.g., A cyberpunk anime girl with neon blue hair, glowing headphones, wearing a futuristic jacket..."
className="w-full h-32 bg-slate-900/50 border border-slate-600 rounded-xl px-4 py-3 text-white placeholder-slate-500 focus:ring-2 focus:ring-cyan-500 focus:border-transparent transition-all outline-none resize-none"
/>
</div>
{error && (
<div className="p-3 bg-red-500/20 border border-red-500/50 rounded-lg text-red-200 text-sm">
{error}
</div>
)}
<div className="max-w-2xl mx-auto bg-slate-800/50 backdrop-blur-lg border border-slate-700 rounded-2xl shadow-2xl overflow-hidden">
{/* Tabs */}
<div className="flex border-b border-slate-700">
<button
onClick={handleGenerate}
disabled={status !== 'idle' || !prompt || !name}
className={`w-full py-4 rounded-xl font-bold text-lg transition-all duration-200 ${
status !== 'idle' || !prompt || !name
? 'bg-slate-700 text-slate-500 cursor-not-allowed'
: 'bg-gradient-to-r from-cyan-500 to-blue-600 hover:from-cyan-400 hover:to-blue-500 text-white shadow-lg shadow-cyan-500/25 transform hover:scale-[1.02]'
onClick={() => setMode('generate')}
className={`flex-1 py-4 text-sm font-bold uppercase tracking-wider transition-colors ${
mode === 'generate'
? 'bg-slate-700/50 text-cyan-400 border-b-2 border-cyan-400'
: 'text-slate-500 hover:text-slate-300'
}`}
>
{status !== 'idle' ? (
<div className="flex items-center justify-center gap-3">
<LoadingSpinner />
<span>{status === 'generating' ? 'Dreaming up Sheet...' : 'Analyzing Features...'}</span>
AI Generator
</button>
<button
onClick={() => setMode('upload')}
className={`flex-1 py-4 text-sm font-bold uppercase tracking-wider transition-colors ${
mode === 'upload'
? 'bg-slate-700/50 text-purple-400 border-b-2 border-purple-400'
: 'text-slate-500 hover:text-slate-300'
}`}
>
Upload Assets
</button>
</div>
<div className="p-8">
<div className="text-center mb-8">
<h2 className="text-3xl font-bold text-transparent bg-clip-text bg-gradient-to-r from-cyan-400 to-purple-500 mb-2">
{mode === 'generate' ? 'Design Your Avatar' : 'Import Your Model'}
</h2>
<p className="text-slate-400">
{mode === 'generate'
? 'Describe your dream VTuber model. Gemini will generate a character sheet with expression assets.'
: 'Upload your existing character art. We support separate files for blink and talk variants.'
}
</p>
</div>
<div className="space-y-6">
<div>
<label className="block text-sm font-medium text-slate-300 mb-2">Model Name</label>
<input
type="text"
value={name}
onChange={(e) => setName(e.target.value)}
placeholder="e.g., Neon Kitsune"
className="w-full bg-slate-900/50 border border-slate-600 rounded-xl px-4 py-3 text-white placeholder-slate-500 focus:ring-2 focus:ring-cyan-500 focus:border-transparent transition-all outline-none"
/>
</div>
{mode === 'generate' ? (
<div>
<label className="block text-sm font-medium text-slate-300 mb-2">Description</label>
<textarea
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
placeholder="e.g., A cyberpunk anime girl with neon blue hair, glowing headphones, wearing a futuristic jacket..."
className="w-full h-32 bg-slate-900/50 border border-slate-600 rounded-xl px-4 py-3 text-white placeholder-slate-500 focus:ring-2 focus:ring-cyan-500 focus:border-transparent transition-all outline-none resize-none"
/>
</div>
) : (
<div className="flex items-center justify-center gap-2">
<span>Generate Model</span>
<svg xmlns="http://www.w3.org/2000/svg" className="h-5 w-5" viewBox="0 0 20 20" fill="currentColor">
<path fillRule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.707-8.707l-3-3a1 1 0 00-1.414 1.414L10.586 9H7a1 1 0 100 2h3.586l-1.293 1.293a1 1 0 101.414 1.414l3-3a1 1 0 000-1.414z" clipRule="evenodd" />
</svg>
<div className="space-y-4">
<div className="p-4 bg-slate-900/50 rounded-xl border border-slate-600 border-dashed">
<label className="block text-sm font-bold text-slate-300 mb-2">Base Model (Required)</label>
<input type="file" accept="image/*" onChange={(e) => handleFileChange(e, setBaseFile)} className="text-sm text-slate-400 file:mr-4 file:py-2 file:px-4 file:rounded-full file:border-0 file:text-sm file:font-semibold file:bg-cyan-500/10 file:text-cyan-400 hover:file:bg-cyan-500/20"/>
<p className="text-xs text-slate-500 mt-1">The main look of your character (Eyes Open, Mouth Closed).</p>
</div>
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
<div className="p-4 bg-slate-900/50 rounded-xl border border-slate-600 border-dashed">
<label className="block text-sm font-bold text-slate-300 mb-2">Closed Eyes (Optional)</label>
<input type="file" accept="image/*" onChange={(e) => handleFileChange(e, setBlinkFile)} className="text-sm text-slate-400 file:mr-4 file:py-2 file:px-4 file:rounded-full file:border-0 file:text-sm file:font-semibold file:bg-purple-500/10 file:text-purple-400 hover:file:bg-purple-500/20"/>
</div>
<div className="p-4 bg-slate-900/50 rounded-xl border border-slate-600 border-dashed">
<label className="block text-sm font-bold text-slate-300 mb-2">Open Mouth (Optional)</label>
<input type="file" accept="image/*" onChange={(e) => handleFileChange(e, setTalkFile)} className="text-sm text-slate-400 file:mr-4 file:py-2 file:px-4 file:rounded-full file:border-0 file:text-sm file:font-semibold file:bg-pink-500/10 file:text-pink-400 hover:file:bg-pink-500/20"/>
</div>
</div>
</div>
)}
</button>
{error && (
<div className="p-3 bg-red-500/20 border border-red-500/50 rounded-lg text-red-200 text-sm">
{error}
</div>
)}
<button
onClick={mode === 'generate' ? handleGenerate : handleUpload}
disabled={status !== 'idle' || !name || (mode === 'generate' && !prompt) || (mode === 'upload' && !baseFile)}
className={`w-full py-4 rounded-xl font-bold text-lg transition-all duration-200 ${
status !== 'idle' || !name || (mode === 'generate' && !prompt) || (mode === 'upload' && !baseFile)
? 'bg-slate-700 text-slate-500 cursor-not-allowed'
: 'bg-gradient-to-r from-cyan-500 to-blue-600 hover:from-cyan-400 hover:to-blue-500 text-white shadow-lg shadow-cyan-500/25 transform hover:scale-[1.02]'
}`}
>
{status !== 'idle' ? (
<div className="flex items-center justify-center gap-3">
<LoadingSpinner />
<span>
{status === 'generating' ? 'Dreaming up Sheet...' :
status === 'stitching' ? 'Processing Assets...' :
'Analyzing Features...'}
</span>
</div>
) : (
<div className="flex items-center justify-center gap-2">
<span>{mode === 'generate' ? 'Generate Model' : 'Create Model'}</span>
<svg xmlns="http://www.w3.org/2000/svg" className="h-5 w-5" viewBox="0 0 20 20" fill="currentColor">
<path fillRule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.707-8.707l-3-3a1 1 0 00-1.414 1.414L10.586 9H7a1 1 0 100 2h3.586l-1.293 1.293a1 1 0 101.414 1.414l3-3a1 1 0 000-1.414z" clipRule="evenodd" />
</svg>
</div>
)}
</button>
</div>
</div>
</div>
);

View File

@ -8,10 +8,11 @@ interface RiggingEditorProps {
onComplete: (data: {
leftEye: Rect; rightEye: Rect; mouth: Rect; skinColor: string;
textureClosedEye: Rect; textureOpenMouth: Rect;
mainBody: Rect; chromaKeyColor: string;
}) => void;
}
type ActiveFeature = 'leftEye' | 'rightEye' | 'mouth' | 'textureClosedEye' | 'textureOpenMouth' | null;
type ActiveFeature = 'leftEye' | 'rightEye' | 'mouth' | 'textureClosedEye' | 'textureOpenMouth' | 'mainBody' | null;
const ResizableBox: React.FC<{
rect: Rect;
@ -125,11 +126,17 @@ const RiggingEditor: React.FC<RiggingEditorProps> = ({ imageUrl, initialData, on
const [rightEye, setRightEye] = useState<Rect>(initialData?.rightEye || { x: 0.45, y: 0.4, w: 0.1, h: 0.1 });
const [mouth, setMouth] = useState<Rect>(initialData?.mouth || { x: 0.35, y: 0.55, w: 0.1, h: 0.05 });
// Main Body (Default to left 70%)
const [mainBody, setMainBody] = useState<Rect>({ x: 0.05, y: 0.05, w: 0.65, h: 0.9 });
// Sources (Right side of image usually)
const [textureClosedEye, setTextureClosedEye] = useState<Rect>({ x: 0.7, y: 0.1, w: 0.2, h: 0.2 });
const [textureOpenMouth, setTextureOpenMouth] = useState<Rect>({ x: 0.7, y: 0.5, w: 0.2, h: 0.2 });
const [skinColor, setSkinColor] = useState<string>(initialData?.skinColor || '#fcd3bf');
// Use this simply as a boolean flag now, passing 'AI_AUTO' if enabled
const [useAiBackground, setUseAiBackground] = useState<boolean>(true);
const [activeFeature, setActiveFeature] = useState<ActiveFeature>(null);
return (
@ -137,8 +144,9 @@ const RiggingEditor: React.FC<RiggingEditorProps> = ({ imageUrl, initialData, on
<div className="text-center mb-6">
<h2 className="text-2xl font-bold text-white mb-2">Rig Your Character</h2>
<p className="text-slate-400 text-sm">
1. Match the <b>Target</b> boxes (Red/Blue/Green) to the main character.<br/>
2. Match the <b>Source</b> boxes (Purple/Orange) to the extra assets on the right.
1. Adjust the <b>Main Body</b> (Yellow) to frame your character.<br/>
2. Match the <b>Targets</b> (Red/Blue/Green) to the face features.<br/>
3. Match the <b>Sources</b> (Purple/Orange) to the assets on the right.
</p>
</div>
@ -155,6 +163,13 @@ const RiggingEditor: React.FC<RiggingEditorProps> = ({ imageUrl, initialData, on
{/* Aspect ratio container to map percentage boxes correctly */}
<div className="absolute inset-0 w-full h-full">
{/* Main Body */}
<ResizableBox
rect={mainBody} color="#facc15" label="Main Body"
isActive={activeFeature === 'mainBody'}
onUpdate={setMainBody} onActivate={() => setActiveFeature('mainBody')}
/>
{/* Targets */}
<ResizableBox
rect={leftEye} color="#ef4444" label="Left Eye Target"
@ -190,21 +205,43 @@ const RiggingEditor: React.FC<RiggingEditorProps> = ({ imageUrl, initialData, on
{/* Sidebar Controls */}
<div className="w-72 flex flex-col gap-4 bg-slate-800/50 p-6 rounded-xl border border-slate-700 h-full overflow-y-auto">
<div className="bg-slate-900/50 p-4 rounded-lg">
<label className="block text-xs font-bold text-slate-400 mb-2 uppercase">Skin Color Fallback</label>
<div className="flex items-center gap-3">
<input
type="color"
value={skinColor}
onChange={(e) => setSkinColor(e.target.value)}
className="w-8 h-8 rounded cursor-pointer border-0 p-0"
/>
<span className="text-xs text-slate-400 font-mono">{skinColor}</span>
</div>
<div className="bg-slate-900/50 p-4 rounded-lg space-y-3">
<div>
<label className="block text-xs font-bold text-slate-400 mb-2 uppercase">Background Removal</label>
<div className="flex items-center justify-between p-2 bg-slate-800 rounded-lg border border-slate-700">
<span className="text-xs text-slate-300">AI Magic Removal</span>
<label className="relative inline-flex items-center cursor-pointer">
<input
type="checkbox"
className="sr-only peer"
checked={useAiBackground}
onChange={(e) => setUseAiBackground(e.target.checked)}
/>
<div className="w-9 h-5 bg-slate-600 peer-focus:outline-none rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-4 after:w-4 after:transition-all peer-checked:bg-cyan-500"></div>
</label>
</div>
</div>
<div>
<label className="block text-xs font-bold text-slate-400 mb-1 uppercase">Eyelid Skin Color</label>
<div className="flex items-center gap-3">
<input
type="color"
value={skinColor}
onChange={(e) => setSkinColor(e.target.value)}
className="w-8 h-8 rounded cursor-pointer border-0 p-0"
/>
<span className="text-xs text-slate-400 font-mono">Fallback</span>
</div>
</div>
</div>
<div className="space-y-3 flex-1">
<div className="text-xs font-bold text-slate-400 uppercase border-b border-slate-700 pb-1">Targets (Main Face)</div>
<div className="text-xs font-bold text-slate-400 uppercase border-b border-slate-700 pb-1">Composition</div>
<div className="flex items-center gap-2 text-sm text-slate-300 cursor-pointer hover:text-white" onClick={() => setActiveFeature('mainBody')}>
<div className="w-3 h-3 bg-yellow-400 rounded-full shadow"></div> Main Body Crop
</div>
<div className="text-xs font-bold text-slate-400 uppercase border-b border-slate-700 pb-1 mt-4">Targets (Main Face)</div>
<div className="flex items-center gap-2 text-sm text-slate-300 cursor-pointer hover:text-white" onClick={() => setActiveFeature('leftEye')}>
<div className="w-3 h-3 bg-red-500 rounded-full shadow"></div> Left Eye
</div>
@ -226,7 +263,11 @@ const RiggingEditor: React.FC<RiggingEditorProps> = ({ imageUrl, initialData, on
<div className="mt-4">
<button
onClick={() => onComplete({ leftEye, rightEye, mouth, skinColor, textureClosedEye, textureOpenMouth })}
onClick={() => onComplete({
leftEye, rightEye, mouth, skinColor,
textureClosedEye, textureOpenMouth, mainBody,
chromaKeyColor: useAiBackground ? 'AI_AUTO' : ''
})}
className="w-full py-4 bg-gradient-to-r from-cyan-500 to-blue-600 hover:from-cyan-400 hover:to-blue-500 text-white rounded-xl font-bold shadow-lg shadow-cyan-500/25 transform hover:scale-[1.02] transition-all"
>
Finish Rigging

View File

@ -1,7 +1,9 @@
import React, { useEffect, useRef, useState } from 'react';
import { useFaceTracking } from '../hooks/useFaceTracking';
import { removeBackground } from '../services/visionService';
import { AvatarConfig, Rect } from '../types';
import LoadingSpinner from './LoadingSpinner';
interface StudioProps {
avatar: AvatarConfig;
@ -50,6 +52,7 @@ const Sprite: React.FC<{
const Studio: React.FC<StudioProps> = ({ avatar, onBack }) => {
const videoRef = useRef<HTMLVideoElement>(null);
const [cameraReady, setCameraReady] = useState(false);
const [processedImageUrl, setProcessedImageUrl] = useState<string | null>(null);
// We use the custom hook to get tracking data
const { trackingData, isLoading: isModelLoading, startTracking } = useFaceTracking(videoRef.current);
@ -85,6 +88,22 @@ const Studio: React.FC<StudioProps> = ({ avatar, onBack }) => {
};
}, []);
// Process Image for Background Removal (AI Segmentation)
useEffect(() => {
if (!avatar.chromaKeyColor) {
setProcessedImageUrl(avatar.imageUrl);
return;
}
const process = async () => {
// If chromaKeyColor is set (to anything, now treated as a flag), we run AI removal
const result = await removeBackground(avatar.imageUrl);
setProcessedImageUrl(result);
};
process();
}, [avatar.imageUrl, avatar.chromaKeyColor]);
// Start tracking when both camera and model are ready
useEffect(() => {
if (cameraReady && !isModelLoading) {
@ -164,90 +183,102 @@ const Studio: React.FC<StudioProps> = ({ avatar, onBack }) => {
{/* Avatar Container */}
<div className="relative w-[600px] h-[600px] flex items-center justify-center z-10">
<div
className="relative w-full h-full flex items-center justify-center"
style={getAvatarStyle()}
>
{/* Main Character Body */}
<img
src={avatar.imageUrl}
alt="Avatar"
className="w-full h-full object-contain drop-shadow-[0_0_15px_rgba(168,85,247,0.5)]"
style={{
// Use clip-path to hide the right-side assets from the main view, keeping only the main character
clipPath: 'inset(0 25% 0 0)' // Hides the right 25% (where assets are)
}}
/>
{/* Dynamic Eyelids (High Fidelity Sprites) */}
{avatar.leftEye && avatar.textureClosedEye && (
<Sprite
imageSrc={avatar.imageUrl}
sourceRect={avatar.textureClosedEye}
className="absolute pointer-events-none z-20"
style={{
left: `${avatar.leftEye.x * 100}%`,
top: `${avatar.leftEye.y * 100}%`,
width: `${avatar.leftEye.w * 100}%`,
height: `${avatar.leftEye.h * 100}%`,
opacity: trackingData.isBlinkingLeft ? 1 : 0,
transition: 'opacity 0.05s linear',
}}
/>
)}
{avatar.rightEye && avatar.textureClosedEye && (
<Sprite
imageSrc={avatar.imageUrl}
sourceRect={avatar.textureClosedEye}
className="absolute pointer-events-none z-20"
style={{
left: `${avatar.rightEye.x * 100}%`,
top: `${avatar.rightEye.y * 100}%`,
width: `${avatar.rightEye.w * 100}%`,
height: `${avatar.rightEye.h * 100}%`,
opacity: trackingData.isBlinkingRight ? 1 : 0,
transition: 'opacity 0.05s linear',
}}
/>
)}
{/* Dynamic Mouth Animation */}
{avatar.mouth && avatar.textureOpenMouth && (
<div
className="absolute pointer-events-none flex items-center justify-center z-10"
style={{
left: `${avatar.mouth.x * 100}%`,
top: `${avatar.mouth.y * 100}%`,
width: `${avatar.mouth.w * 100}%`,
height: `${avatar.mouth.h * 100}%`,
}}
>
{/* Skin Patch - Hides the static closed mouth when speaking */}
<div
className="absolute w-[120%] h-[120%] transition-opacity duration-75"
style={{
backgroundColor: avatar.skinColor || '#fcd3bf',
opacity: trackingData.mouthOpen > 0.1 ? 1 : 0,
filter: 'blur(4px)', // Blends edges
borderRadius: '50%'
}}
{!processedImageUrl ? (
<div className="flex flex-col items-center justify-center gap-4">
<LoadingSpinner />
<span className="text-cyan-400 font-mono text-sm">REMOVING BACKGROUND...</span>
</div>
) : (
<div
className="relative w-full h-full flex items-center justify-center"
style={getAvatarStyle()}
>
{/* Main Character Body (Cropped using Sprite) */}
{avatar.mainBody ? (
<Sprite
imageSrc={processedImageUrl}
sourceRect={avatar.mainBody}
className="w-full h-full object-contain drop-shadow-[0_0_15px_rgba(168,85,247,0.5)]"
/>
) : (
/* Fallback to full image if mainBody is missing */
<img
src={processedImageUrl}
alt="Avatar"
className="w-full h-full object-contain drop-shadow-[0_0_15px_rgba(168,85,247,0.5)]"
/>
)}
{/* Mouth Sprite - Scales based on mouth openness */}
<Sprite
imageSrc={avatar.imageUrl}
sourceRect={avatar.textureOpenMouth}
className="w-full h-full"
style={{
opacity: trackingData.mouthOpen > 0.05 ? 1 : 0,
// Scale open mouth based on volume
transform: `scaleY(${0.8 + trackingData.mouthOpen * 0.5})`,
}}
/>
</div>
)}
</div>
{/* Dynamic Eyelids (High Fidelity Sprites) */}
{avatar.leftEye && avatar.textureClosedEye && (
<Sprite
imageSrc={processedImageUrl}
sourceRect={avatar.textureClosedEye}
className="absolute pointer-events-none z-20"
style={{
left: `${avatar.leftEye.x * 100}%`,
top: `${avatar.leftEye.y * 100}%`,
width: `${avatar.leftEye.w * 100}%`,
height: `${avatar.leftEye.h * 100}%`,
opacity: trackingData.isBlinkingLeft ? 1 : 0,
transition: 'opacity 0.05s linear',
}}
/>
)}
{avatar.rightEye && avatar.textureClosedEye && (
<Sprite
imageSrc={processedImageUrl}
sourceRect={avatar.textureClosedEye}
className="absolute pointer-events-none z-20"
style={{
left: `${avatar.rightEye.x * 100}%`,
top: `${avatar.rightEye.y * 100}%`,
width: `${avatar.rightEye.w * 100}%`,
height: `${avatar.rightEye.h * 100}%`,
opacity: trackingData.isBlinkingRight ? 1 : 0,
transition: 'opacity 0.05s linear',
}}
/>
)}
{/* Dynamic Mouth Animation */}
{avatar.mouth && avatar.textureOpenMouth && (
<div
className="absolute pointer-events-none flex items-center justify-center z-10"
style={{
left: `${avatar.mouth.x * 100}%`,
top: `${avatar.mouth.y * 100}%`,
width: `${avatar.mouth.w * 100}%`,
height: `${avatar.mouth.h * 100}%`,
}}
>
{/* Skin Patch - Hides the static closed mouth when speaking */}
<div
className="absolute w-[120%] h-[120%] transition-opacity duration-75"
style={{
backgroundColor: avatar.skinColor || '#fcd3bf',
opacity: trackingData.mouthOpen > 0.1 ? 1 : 0,
filter: 'blur(4px)', // Blends edges
borderRadius: '50%'
}}
/>
{/* Mouth Sprite - Scales based on mouth openness */}
<Sprite
imageSrc={processedImageUrl}
sourceRect={avatar.textureOpenMouth}
className="w-full h-full"
style={{
opacity: trackingData.mouthOpen > 0.05 ? 1 : 0,
// Scale open mouth based on volume
transform: `scaleY(${0.8 + trackingData.mouthOpen * 0.5})`,
}}
/>
</div>
)}
</div>
)}
{/* Status Indicator overlay if tracking is lost */}
{(!cameraReady) && (

95
services/imageService.ts Normal file
View File

@ -0,0 +1,95 @@
import { Rect } from '../types';
export const fileToDataUrl = (file: File): Promise<string> => {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = (e) => resolve(e.target?.result as string);
reader.onerror = reject;
reader.readAsDataURL(file);
});
};
export const loadImage = (src: string): Promise<HTMLImageElement> => {
return new Promise((resolve, reject) => {
const img = new Image();
img.crossOrigin = "anonymous";
img.onload = () => resolve(img);
img.onerror = reject;
img.src = src;
});
};
export const stitchAssets = async (
base: File,
blink?: File,
talk?: File
): Promise<{ imageUrl: string; mainBody: Rect; textureClosedEye?: Rect; textureOpenMouth?: Rect }> => {
// Load images
const baseData = await fileToDataUrl(base);
const baseImg = await loadImage(baseData);
const blinkImg = blink ? await loadImage(await fileToDataUrl(blink)) : null;
const talkImg = talk ? await loadImage(await fileToDataUrl(talk)) : null;
// Layout: Base on Left. Sidebar on Right containing Blink (top) and Talk (bottom).
// Sidebar width = max(blink.width, talk.width)
const sidebarWidth = Math.max(blinkImg?.width || 0, talkImg?.width || 0);
// If there are no variants, just return the base image as is
if (sidebarWidth === 0) {
return {
imageUrl: baseData,
mainBody: { x: 0, y: 0, w: 1, h: 1 }
};
}
const totalWidth = baseImg.width + sidebarWidth;
const totalHeight = Math.max(baseImg.height, (blinkImg?.height || 0) + (talkImg?.height || 0));
const canvas = document.createElement('canvas');
canvas.width = totalWidth;
canvas.height = totalHeight;
const ctx = canvas.getContext('2d');
if (!ctx) throw new Error("Could not get canvas context");
// Draw Base
ctx.drawImage(baseImg, 0, 0);
// Calculate normalized rects
const mainBody: Rect = {
x: 0,
y: 0,
w: baseImg.width / totalWidth,
h: baseImg.height / totalHeight
};
let textureClosedEye: Rect | undefined;
if (blinkImg) {
ctx.drawImage(blinkImg, baseImg.width, 0);
textureClosedEye = {
x: baseImg.width / totalWidth,
y: 0,
w: blinkImg.width / totalWidth,
h: blinkImg.height / totalHeight
};
}
let textureOpenMouth: Rect | undefined;
if (talkImg) {
const yPos = blinkImg ? blinkImg.height : 0;
ctx.drawImage(talkImg, baseImg.width, yPos);
textureOpenMouth = {
x: baseImg.width / totalWidth,
y: yPos / totalHeight,
w: talkImg.width / totalWidth,
h: talkImg.height / totalHeight
};
}
return {
imageUrl: canvas.toDataURL('image/png'),
mainBody,
textureClosedEye,
textureOpenMouth
};
};

View File

@ -1,8 +1,9 @@
import { FaceLandmarker, FilesetResolver } from '@mediapipe/tasks-vision';
import { FaceLandmarker, FilesetResolver, ImageSegmenter } from '@mediapipe/tasks-vision';
import { Rect } from '../types';
let faceLandmarker: FaceLandmarker | null = null;
let imageSegmenter: ImageSegmenter | null = null;
// Initialize the vision model for static image analysis
const initVision = async () => {
@ -26,6 +27,29 @@ const initVision = async () => {
}
};
// Initialize the segmenter for background removal
const initSegmenter = async () => {
if (imageSegmenter) return;
try {
const filesetResolver = await FilesetResolver.forVisionTasks(
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.18/wasm"
);
imageSegmenter = await ImageSegmenter.createFromOptions(filesetResolver, {
baseOptions: {
modelAssetPath: "https://storage.googleapis.com/mediapipe-models/image_segmenter/selfie_segmenter/float16/latest/selfie_segmenter.tflite",
delegate: "GPU"
},
runningMode: "IMAGE",
outputCategoryMask: false,
outputConfidenceMasks: true
});
} catch (e) {
console.error("Failed to initialize segmenter:", e);
}
};
export const analyzeAvatarImage = async (imageUrl: string): Promise<{ leftEye: Rect, rightEye: Rect, mouth: Rect, skinColor: string } | null> => {
try {
await initVision();
@ -126,3 +150,71 @@ export const analyzeAvatarImage = async (imageUrl: string): Promise<{ leftEye: R
return null;
}
};
export const removeBackground = async (imageUrl: string): Promise<string> => {
try {
await initSegmenter();
if (!imageSegmenter) return imageUrl;
return new Promise((resolve, reject) => {
const img = new Image();
img.crossOrigin = "anonymous";
img.onload = () => {
try {
// 1. Segment the image
const segmentResult = imageSegmenter!.segment(img);
const confidenceMasks = segmentResult.confidenceMasks;
if (!confidenceMasks || confidenceMasks.length === 0) {
resolve(imageUrl);
return;
}
// 2. Create canvas and context
const canvas = document.createElement('canvas');
canvas.width = img.width;
canvas.height = img.height;
const ctx = canvas.getContext('2d');
if (!ctx) {
resolve(imageUrl);
return;
}
// 3. Draw original image
ctx.drawImage(img, 0, 0);
const imageData = ctx.getImageData(0, 0, img.width, img.height);
const pixels = imageData.data;
// 4. Apply mask
// The selfie_segmenter output mask is a Float32Array where values
// indicate confidence of being a person (0.0 to 1.0).
const mask = confidenceMasks[0].getAsFloat32Array();
for (let i = 0; i < mask.length; i++) {
// Threshold for person confidence (0.3 is usually a good balance for hair details)
const confidence = mask[i];
if (confidence < 0.3) {
pixels[i * 4 + 3] = 0; // Set Alpha to 0
} else {
// Optional: Soft edges
// pixels[i * 4 + 3] = Math.floor(confidence * 255);
}
}
ctx.putImageData(imageData, 0, 0);
resolve(canvas.toDataURL('image/png'));
} catch (e) {
console.error("Segmentation error", e);
resolve(imageUrl);
}
};
img.onerror = () => resolve(imageUrl);
img.src = imageUrl;
});
} catch (e) {
console.error("Background removal failed", e);
return imageUrl;
}
};

View File

@ -23,6 +23,8 @@ export interface AvatarConfig {
skinColor?: string;
textureClosedEye?: Rect;
textureOpenMouth?: Rect;
mainBody?: Rect;
chromaKeyColor?: string;
}
export interface TrackingData {