feat: Add image upload and background removal

Enables users to upload custom avatar assets and automatically remove the background from the generated image. New features: - Avatar creation now supports uploading base, blink, and talk textures. - Added ability to define the main body bounding box during rigging. - Vision service now includes image segmentation for background removal. - Studio component dynamically processes the avatar image for background removal if chroma key is enabled.
2025-11-20 21:24:22 +01:00 · 2025-11-20 21:24:22 +01:00 · ddb2455416
commit ddb2455416
parent 3eff403fb4
7 changed files with 528 additions and 161 deletions
--- a/App.tsx
+++ b/App.tsx
@ -33,7 +33,8 @@ const App: React.FC = () => {

  const handleRiggingComplete = (data: { 
    leftEye: Rect, rightEye: Rect, mouth: Rect, skinColor: string,
-    textureClosedEye: Rect, textureOpenMouth: Rect
+    textureClosedEye: Rect, textureOpenMouth: Rect,
+    mainBody: Rect, chromaKeyColor: string
  }) => {
    if (generatedData) {
      setAvatar({
@ -46,6 +47,8 @@ const App: React.FC = () => {
        skinColor: data.skinColor,
        textureClosedEye: data.textureClosedEye,
        textureOpenMouth: data.textureOpenMouth,
+        mainBody: data.mainBody,
+        chromaKeyColor: data.chromaKeyColor
      });
      setAppState(AppState.STUDIO);
    }
--- a/components/AvatarCreator.tsx
+++ b/components/AvatarCreator.tsx
@ -1,20 +1,31 @@
-
 import React, { useState } from 'react';
 import { generateAvatarImage } from '../services/geminiService';
 import { analyzeAvatarImage } from '../services/visionService';
+import { stitchAssets } from '../services/imageService';
 import LoadingSpinner from './LoadingSpinner';
 import { Rect } from '../types';

 interface AvatarCreatorProps {
-  onAvatarGenerated: (url: string, name: string, initialData?: { leftEye: Rect, rightEye: Rect, mouth: Rect, skinColor: string }) => void;
+  onAvatarGenerated: (url: string, name: string, initialData?: { 
+    leftEye?: Rect, rightEye?: Rect, mouth?: Rect, skinColor?: string,
+    mainBody?: Rect, textureClosedEye?: Rect, textureOpenMouth?: Rect 
+  }) => void;
 }

 const AvatarCreator: React.FC<AvatarCreatorProps> = ({ onAvatarGenerated }) => {
+  const [mode, setMode] = useState<'generate' | 'upload'>('generate');
+  
+  // Generation State
  const [prompt, setPrompt] = useState('');
  const [name, setName] = useState('');
-  const [status, setStatus] = useState<'idle' | 'generating' | 'analyzing'>('idle');
+  const [status, setStatus] = useState<'idle' | 'generating' | 'analyzing' | 'stitching'>('idle');
  const [error, setError] = useState<string | null>(null);

+  // Upload State
+  const [baseFile, setBaseFile] = useState<File | null>(null);
+  const [blinkFile, setBlinkFile] = useState<File | null>(null);
+  const [talkFile, setTalkFile] = useState<File | null>(null);
+
  const handleGenerate = async () => {
    if (!prompt || !name) return;
    
@ -25,12 +36,11 @@ const AvatarCreator: React.FC<AvatarCreatorProps> = ({ onAvatarGenerated }) => {
      // 1. Generate Image (Now creates a character sheet)
      const imageUrl = await generateAvatarImage(prompt);
      
-      // 2. Analyze Image for Landmarks (Initial guess)
-      // Note: Vision service will likely find the main face on the left, which is what we want for targets.
+      // 2. Analyze Image for Landmarks
      setStatus('analyzing');
      const analysisData = await analyzeAvatarImage(imageUrl);
      
-      // 3. Pass to parent (to go to Rigging)
+      // 3. Pass to parent
      if (analysisData) {
        onAvatarGenerated(imageUrl, name, analysisData);
      } else {
@ -44,68 +54,161 @@ const AvatarCreator: React.FC<AvatarCreatorProps> = ({ onAvatarGenerated }) => {
    }
  };

+  const handleUpload = async () => {
+    if (!baseFile || !name) return;
+
+    setStatus('stitching');
+    setError(null);
+
+    try {
+      // 1. Stitch Assets into Sheet
+      const { imageUrl, mainBody, textureClosedEye, textureOpenMouth } = await stitchAssets(baseFile, blinkFile || undefined, talkFile || undefined);
+
+      // 2. Analyze the Main Body part of the image
+      // Note: analyzeAvatarImage analyzes the whole image, but since we put the face on the left (or full image), 
+      // it should find the face correctly.
+      setStatus('analyzing');
+      const analysisData = await analyzeAvatarImage(imageUrl);
+
+      // 3. Combine manual stitch data with automatic vision data
+      const initialData = {
+        ...(analysisData || {}),
+        mainBody,
+        textureClosedEye,
+        textureOpenMouth
+      };
+
+      onAvatarGenerated(imageUrl, name, initialData);
+    } catch (err) {
+      console.error(err);
+      setError("Failed to process uploaded images. Please ensure they are valid image files.");
+    } finally {
+      setStatus('idle');
+    }
+  };
+
+  const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>, setter: (f: File | null) => void) => {
+    if (e.target.files && e.target.files[0]) {
+      setter(e.target.files[0]);
+    }
+  };
+
  return (
-    <div className="max-w-2xl mx-auto bg-slate-800/50 backdrop-blur-lg border border-slate-700 p-8 rounded-2xl shadow-2xl">
-      <div className="text-center mb-8">
-        <h2 className="text-3xl font-bold text-transparent bg-clip-text bg-gradient-to-r from-cyan-400 to-purple-500 mb-2">
-          Design Your Avatar
-        </h2>
-        <p className="text-slate-400">
-          Describe your dream VTuber model. Gemini will generate a character sheet with expression assets.
-        </p>
-      </div>
-
-      <div className="space-y-6">
-        <div>
-          <label className="block text-sm font-medium text-slate-300 mb-2">Model Name</label>
-          <input
-            type="text"
-            value={name}
-            onChange={(e) => setName(e.target.value)}
-            placeholder="e.g., Neon Kitsune"
-            className="w-full bg-slate-900/50 border border-slate-600 rounded-xl px-4 py-3 text-white placeholder-slate-500 focus:ring-2 focus:ring-cyan-500 focus:border-transparent transition-all outline-none"
-          />
-        </div>
-
-        <div>
-          <label className="block text-sm font-medium text-slate-300 mb-2">Description</label>
-          <textarea
-            value={prompt}
-            onChange={(e) => setPrompt(e.target.value)}
-            placeholder="e.g., A cyberpunk anime girl with neon blue hair, glowing headphones, wearing a futuristic jacket..."
-            className="w-full h-32 bg-slate-900/50 border border-slate-600 rounded-xl px-4 py-3 text-white placeholder-slate-500 focus:ring-2 focus:ring-cyan-500 focus:border-transparent transition-all outline-none resize-none"
-          />
-        </div>
-
-        {error && (
-          <div className="p-3 bg-red-500/20 border border-red-500/50 rounded-lg text-red-200 text-sm">
-            {error}
-          </div>
-        )}
-
+    <div className="max-w-2xl mx-auto bg-slate-800/50 backdrop-blur-lg border border-slate-700 rounded-2xl shadow-2xl overflow-hidden">
+      {/* Tabs */}
+      <div className="flex border-b border-slate-700">
        <button
-          onClick={handleGenerate}
-          disabled={status !== 'idle' || !prompt || !name}
-          className={`w-full py-4 rounded-xl font-bold text-lg transition-all duration-200 ${
-            status !== 'idle' || !prompt || !name
-              ? 'bg-slate-700 text-slate-500 cursor-not-allowed'
-              : 'bg-gradient-to-r from-cyan-500 to-blue-600 hover:from-cyan-400 hover:to-blue-500 text-white shadow-lg shadow-cyan-500/25 transform hover:scale-[1.02]'
+          onClick={() => setMode('generate')}
+          className={`flex-1 py-4 text-sm font-bold uppercase tracking-wider transition-colors ${
+            mode === 'generate' 
+              ? 'bg-slate-700/50 text-cyan-400 border-b-2 border-cyan-400' 
+              : 'text-slate-500 hover:text-slate-300'
          }`}
        >
-          {status !== 'idle' ? (
-            <div className="flex items-center justify-center gap-3">
-               <LoadingSpinner />
-               <span>{status === 'generating' ? 'Dreaming up Sheet...' : 'Analyzing Features...'}</span>
+          AI Generator
+        </button>
+        <button
+          onClick={() => setMode('upload')}
+          className={`flex-1 py-4 text-sm font-bold uppercase tracking-wider transition-colors ${
+            mode === 'upload' 
+              ? 'bg-slate-700/50 text-purple-400 border-b-2 border-purple-400' 
+              : 'text-slate-500 hover:text-slate-300'
+          }`}
+        >
+          Upload Assets
+        </button>
+      </div>
+
+      <div className="p-8">
+        <div className="text-center mb-8">
+          <h2 className="text-3xl font-bold text-transparent bg-clip-text bg-gradient-to-r from-cyan-400 to-purple-500 mb-2">
+            {mode === 'generate' ? 'Design Your Avatar' : 'Import Your Model'}
+          </h2>
+          <p className="text-slate-400">
+            {mode === 'generate' 
+              ? 'Describe your dream VTuber model. Gemini will generate a character sheet with expression assets.'
+              : 'Upload your existing character art. We support separate files for blink and talk variants.'
+            }
+          </p>
+        </div>
+
+        <div className="space-y-6">
+          <div>
+            <label className="block text-sm font-medium text-slate-300 mb-2">Model Name</label>
+            <input
+              type="text"
+              value={name}
+              onChange={(e) => setName(e.target.value)}
+              placeholder="e.g., Neon Kitsune"
+              className="w-full bg-slate-900/50 border border-slate-600 rounded-xl px-4 py-3 text-white placeholder-slate-500 focus:ring-2 focus:ring-cyan-500 focus:border-transparent transition-all outline-none"
+            />
+          </div>
+
+          {mode === 'generate' ? (
+            <div>
+              <label className="block text-sm font-medium text-slate-300 mb-2">Description</label>
+              <textarea
+                value={prompt}
+                onChange={(e) => setPrompt(e.target.value)}
+                placeholder="e.g., A cyberpunk anime girl with neon blue hair, glowing headphones, wearing a futuristic jacket..."
+                className="w-full h-32 bg-slate-900/50 border border-slate-600 rounded-xl px-4 py-3 text-white placeholder-slate-500 focus:ring-2 focus:ring-cyan-500 focus:border-transparent transition-all outline-none resize-none"
+              />
            </div>
          ) : (
-            <div className="flex items-center justify-center gap-2">
-              <span>Generate Model</span>
-              <svg xmlns="http://www.w3.org/2000/svg" className="h-5 w-5" viewBox="0 0 20 20" fill="currentColor">
-                <path fillRule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.707-8.707l-3-3a1 1 0 00-1.414 1.414L10.586 9H7a1 1 0 100 2h3.586l-1.293 1.293a1 1 0 101.414 1.414l3-3a1 1 0 000-1.414z" clipRule="evenodd" />
-              </svg>
+            <div className="space-y-4">
+              <div className="p-4 bg-slate-900/50 rounded-xl border border-slate-600 border-dashed">
+                 <label className="block text-sm font-bold text-slate-300 mb-2">Base Model (Required)</label>
+                 <input type="file" accept="image/*" onChange={(e) => handleFileChange(e, setBaseFile)} className="text-sm text-slate-400 file:mr-4 file:py-2 file:px-4 file:rounded-full file:border-0 file:text-sm file:font-semibold file:bg-cyan-500/10 file:text-cyan-400 hover:file:bg-cyan-500/20"/>
+                 <p className="text-xs text-slate-500 mt-1">The main look of your character (Eyes Open, Mouth Closed).</p>
+              </div>
+              
+              <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
+                <div className="p-4 bg-slate-900/50 rounded-xl border border-slate-600 border-dashed">
+                  <label className="block text-sm font-bold text-slate-300 mb-2">Closed Eyes (Optional)</label>
+                  <input type="file" accept="image/*" onChange={(e) => handleFileChange(e, setBlinkFile)} className="text-sm text-slate-400 file:mr-4 file:py-2 file:px-4 file:rounded-full file:border-0 file:text-sm file:font-semibold file:bg-purple-500/10 file:text-purple-400 hover:file:bg-purple-500/20"/>
+                </div>
+                <div className="p-4 bg-slate-900/50 rounded-xl border border-slate-600 border-dashed">
+                  <label className="block text-sm font-bold text-slate-300 mb-2">Open Mouth (Optional)</label>
+                  <input type="file" accept="image/*" onChange={(e) => handleFileChange(e, setTalkFile)} className="text-sm text-slate-400 file:mr-4 file:py-2 file:px-4 file:rounded-full file:border-0 file:text-sm file:font-semibold file:bg-pink-500/10 file:text-pink-400 hover:file:bg-pink-500/20"/>
+                </div>
+              </div>
            </div>
          )}
-        </button>
+
+          {error && (
+            <div className="p-3 bg-red-500/20 border border-red-500/50 rounded-lg text-red-200 text-sm">
+              {error}
+            </div>
+          )}
+
+          <button
+            onClick={mode === 'generate' ? handleGenerate : handleUpload}
+            disabled={status !== 'idle' || !name || (mode === 'generate' && !prompt) || (mode === 'upload' && !baseFile)}
+            className={`w-full py-4 rounded-xl font-bold text-lg transition-all duration-200 ${
+              status !== 'idle' || !name || (mode === 'generate' && !prompt) || (mode === 'upload' && !baseFile)
+                ? 'bg-slate-700 text-slate-500 cursor-not-allowed'
+                : 'bg-gradient-to-r from-cyan-500 to-blue-600 hover:from-cyan-400 hover:to-blue-500 text-white shadow-lg shadow-cyan-500/25 transform hover:scale-[1.02]'
+            }`}
+          >
+            {status !== 'idle' ? (
+              <div className="flex items-center justify-center gap-3">
+                 <LoadingSpinner />
+                 <span>
+                   {status === 'generating' ? 'Dreaming up Sheet...' : 
+                    status === 'stitching' ? 'Processing Assets...' :
+                    'Analyzing Features...'}
+                 </span>
+              </div>
+            ) : (
+              <div className="flex items-center justify-center gap-2">
+                <span>{mode === 'generate' ? 'Generate Model' : 'Create Model'}</span>
+                <svg xmlns="http://www.w3.org/2000/svg" className="h-5 w-5" viewBox="0 0 20 20" fill="currentColor">
+                  <path fillRule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.707-8.707l-3-3a1 1 0 00-1.414 1.414L10.586 9H7a1 1 0 100 2h3.586l-1.293 1.293a1 1 0 101.414 1.414l3-3a1 1 0 000-1.414z" clipRule="evenodd" />
+                </svg>
+              </div>
+            )}
+          </button>
+        </div>
      </div>
    </div>
  );
--- a/components/RiggingEditor.tsx
+++ b/components/RiggingEditor.tsx
@ -8,10 +8,11 @@ interface RiggingEditorProps {
  onComplete: (data: { 
    leftEye: Rect; rightEye: Rect; mouth: Rect; skinColor: string;
    textureClosedEye: Rect; textureOpenMouth: Rect;
+    mainBody: Rect; chromaKeyColor: string;
  }) => void;
 }

-type ActiveFeature = 'leftEye' | 'rightEye' | 'mouth' | 'textureClosedEye' | 'textureOpenMouth' | null;
+type ActiveFeature = 'leftEye' | 'rightEye' | 'mouth' | 'textureClosedEye' | 'textureOpenMouth' | 'mainBody' | null;

 const ResizableBox: React.FC<{
  rect: Rect;
@ -125,11 +126,17 @@ const RiggingEditor: React.FC<RiggingEditorProps> = ({ imageUrl, initialData, on
  const [rightEye, setRightEye] = useState<Rect>(initialData?.rightEye || { x: 0.45, y: 0.4, w: 0.1, h: 0.1 });
  const [mouth, setMouth] = useState<Rect>(initialData?.mouth || { x: 0.35, y: 0.55, w: 0.1, h: 0.05 });
  
+  // Main Body (Default to left 70%)
+  const [mainBody, setMainBody] = useState<Rect>({ x: 0.05, y: 0.05, w: 0.65, h: 0.9 });
+
  // Sources (Right side of image usually)
  const [textureClosedEye, setTextureClosedEye] = useState<Rect>({ x: 0.7, y: 0.1, w: 0.2, h: 0.2 });
  const [textureOpenMouth, setTextureOpenMouth] = useState<Rect>({ x: 0.7, y: 0.5, w: 0.2, h: 0.2 });

  const [skinColor, setSkinColor] = useState<string>(initialData?.skinColor || '#fcd3bf');
+  // Use this simply as a boolean flag now, passing 'AI_AUTO' if enabled
+  const [useAiBackground, setUseAiBackground] = useState<boolean>(true);
+  
  const [activeFeature, setActiveFeature] = useState<ActiveFeature>(null);

  return (
@ -137,8 +144,9 @@ const RiggingEditor: React.FC<RiggingEditorProps> = ({ imageUrl, initialData, on
      <div className="text-center mb-6">
        <h2 className="text-2xl font-bold text-white mb-2">Rig Your Character</h2>
        <p className="text-slate-400 text-sm">
-          1. Match the <b>Target</b> boxes (Red/Blue/Green) to the main character.<br/>
-          2. Match the <b>Source</b> boxes (Purple/Orange) to the extra assets on the right.
+          1. Adjust the <b>Main Body</b> (Yellow) to frame your character.<br/>
+          2. Match the <b>Targets</b> (Red/Blue/Green) to the face features.<br/>
+          3. Match the <b>Sources</b> (Purple/Orange) to the assets on the right.
        </p>
      </div>

@ -155,6 +163,13 @@ const RiggingEditor: React.FC<RiggingEditorProps> = ({ imageUrl, initialData, on
            
            {/* Aspect ratio container to map percentage boxes correctly */}
            <div className="absolute inset-0 w-full h-full">
+              {/* Main Body */}
+              <ResizableBox 
+                rect={mainBody} color="#facc15" label="Main Body" 
+                isActive={activeFeature === 'mainBody'}
+                onUpdate={setMainBody} onActivate={() => setActiveFeature('mainBody')}
+              />
+
              {/* Targets */}
              <ResizableBox 
                rect={leftEye} color="#ef4444" label="Left Eye Target" 
@ -190,21 +205,43 @@ const RiggingEditor: React.FC<RiggingEditorProps> = ({ imageUrl, initialData, on
        {/* Sidebar Controls */}
        <div className="w-72 flex flex-col gap-4 bg-slate-800/50 p-6 rounded-xl border border-slate-700 h-full overflow-y-auto">
          
-          <div className="bg-slate-900/50 p-4 rounded-lg">
-            <label className="block text-xs font-bold text-slate-400 mb-2 uppercase">Skin Color Fallback</label>
-            <div className="flex items-center gap-3">
-              <input 
-                type="color" 
-                value={skinColor}
-                onChange={(e) => setSkinColor(e.target.value)}
-                className="w-8 h-8 rounded cursor-pointer border-0 p-0"
-              />
-              <span className="text-xs text-slate-400 font-mono">{skinColor}</span>
-            </div>
+          <div className="bg-slate-900/50 p-4 rounded-lg space-y-3">
+             <div>
+                <label className="block text-xs font-bold text-slate-400 mb-2 uppercase">Background Removal</label>
+                <div className="flex items-center justify-between p-2 bg-slate-800 rounded-lg border border-slate-700">
+                  <span className="text-xs text-slate-300">AI Magic Removal</span>
+                  <label className="relative inline-flex items-center cursor-pointer">
+                    <input 
+                      type="checkbox" 
+                      className="sr-only peer" 
+                      checked={useAiBackground}
+                      onChange={(e) => setUseAiBackground(e.target.checked)}
+                    />
+                    <div className="w-9 h-5 bg-slate-600 peer-focus:outline-none rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-4 after:w-4 after:transition-all peer-checked:bg-cyan-500"></div>
+                  </label>
+                </div>
+             </div>
+             <div>
+                <label className="block text-xs font-bold text-slate-400 mb-1 uppercase">Eyelid Skin Color</label>
+                <div className="flex items-center gap-3">
+                  <input 
+                    type="color" 
+                    value={skinColor}
+                    onChange={(e) => setSkinColor(e.target.value)}
+                    className="w-8 h-8 rounded cursor-pointer border-0 p-0"
+                  />
+                  <span className="text-xs text-slate-400 font-mono">Fallback</span>
+                </div>
+             </div>
          </div>

          <div className="space-y-3 flex-1">
-            <div className="text-xs font-bold text-slate-400 uppercase border-b border-slate-700 pb-1">Targets (Main Face)</div>
+            <div className="text-xs font-bold text-slate-400 uppercase border-b border-slate-700 pb-1">Composition</div>
+            <div className="flex items-center gap-2 text-sm text-slate-300 cursor-pointer hover:text-white" onClick={() => setActiveFeature('mainBody')}>
+              <div className="w-3 h-3 bg-yellow-400 rounded-full shadow"></div> Main Body Crop
+            </div>
+
+            <div className="text-xs font-bold text-slate-400 uppercase border-b border-slate-700 pb-1 mt-4">Targets (Main Face)</div>
            <div className="flex items-center gap-2 text-sm text-slate-300 cursor-pointer hover:text-white" onClick={() => setActiveFeature('leftEye')}>
              <div className="w-3 h-3 bg-red-500 rounded-full shadow"></div> Left Eye
            </div>
@ -226,7 +263,11 @@ const RiggingEditor: React.FC<RiggingEditorProps> = ({ imageUrl, initialData, on

          <div className="mt-4">
            <button
-              onClick={() => onComplete({ leftEye, rightEye, mouth, skinColor, textureClosedEye, textureOpenMouth })}
+              onClick={() => onComplete({ 
+                leftEye, rightEye, mouth, skinColor, 
+                textureClosedEye, textureOpenMouth, mainBody, 
+                chromaKeyColor: useAiBackground ? 'AI_AUTO' : '' 
+              })}
              className="w-full py-4 bg-gradient-to-r from-cyan-500 to-blue-600 hover:from-cyan-400 hover:to-blue-500 text-white rounded-xl font-bold shadow-lg shadow-cyan-500/25 transform hover:scale-[1.02] transition-all"
            >
              Finish Rigging
--- a/components/Studio.tsx
+++ b/components/Studio.tsx
@ -1,7 +1,9 @@

 import React, { useEffect, useRef, useState } from 'react';
 import { useFaceTracking } from '../hooks/useFaceTracking';
+import { removeBackground } from '../services/visionService';
 import { AvatarConfig, Rect } from '../types';
+import LoadingSpinner from './LoadingSpinner';

 interface StudioProps {
  avatar: AvatarConfig;
@ -50,6 +52,7 @@ const Sprite: React.FC<{
 const Studio: React.FC<StudioProps> = ({ avatar, onBack }) => {
  const videoRef = useRef<HTMLVideoElement>(null);
  const [cameraReady, setCameraReady] = useState(false);
+  const [processedImageUrl, setProcessedImageUrl] = useState<string | null>(null);
  
  // We use the custom hook to get tracking data
  const { trackingData, isLoading: isModelLoading, startTracking } = useFaceTracking(videoRef.current);
@ -85,6 +88,22 @@ const Studio: React.FC<StudioProps> = ({ avatar, onBack }) => {
    };
  }, []);

+  // Process Image for Background Removal (AI Segmentation)
+  useEffect(() => {
+    if (!avatar.chromaKeyColor) {
+      setProcessedImageUrl(avatar.imageUrl);
+      return;
+    }
+
+    const process = async () => {
+      // If chromaKeyColor is set (to anything, now treated as a flag), we run AI removal
+      const result = await removeBackground(avatar.imageUrl);
+      setProcessedImageUrl(result);
+    };
+
+    process();
+  }, [avatar.imageUrl, avatar.chromaKeyColor]);
+
  // Start tracking when both camera and model are ready
  useEffect(() => {
    if (cameraReady && !isModelLoading) {
@ -164,90 +183,102 @@ const Studio: React.FC<StudioProps> = ({ avatar, onBack }) => {

        {/* Avatar Container */}
        <div className="relative w-[600px] h-[600px] flex items-center justify-center z-10">
-          <div 
-            className="relative w-full h-full flex items-center justify-center"
-            style={getAvatarStyle()}
-          >
-             {/* Main Character Body */}
-             <img 
-                src={avatar.imageUrl} 
-                alt="Avatar"
-                className="w-full h-full object-contain drop-shadow-[0_0_15px_rgba(168,85,247,0.5)]"
-                style={{
-                  // Use clip-path to hide the right-side assets from the main view, keeping only the main character
-                  clipPath: 'inset(0 25% 0 0)' // Hides the right 25% (where assets are)
-                }}
-             />
-
-             {/* Dynamic Eyelids (High Fidelity Sprites) */}
-             {avatar.leftEye && avatar.textureClosedEye && (
-               <Sprite 
-                 imageSrc={avatar.imageUrl}
-                 sourceRect={avatar.textureClosedEye}
-                 className="absolute pointer-events-none z-20"
-                 style={{
-                   left: `${avatar.leftEye.x * 100}%`,
-                   top: `${avatar.leftEye.y * 100}%`,
-                   width: `${avatar.leftEye.w * 100}%`,
-                   height: `${avatar.leftEye.h * 100}%`,
-                   opacity: trackingData.isBlinkingLeft ? 1 : 0,
-                   transition: 'opacity 0.05s linear',
-                 }}
-               />
-             )}
-             
-             {avatar.rightEye && avatar.textureClosedEye && (
-               <Sprite 
-                 imageSrc={avatar.imageUrl}
-                 sourceRect={avatar.textureClosedEye}
-                 className="absolute pointer-events-none z-20"
-                 style={{
-                   left: `${avatar.rightEye.x * 100}%`,
-                   top: `${avatar.rightEye.y * 100}%`,
-                   width: `${avatar.rightEye.w * 100}%`,
-                   height: `${avatar.rightEye.h * 100}%`,
-                   opacity: trackingData.isBlinkingRight ? 1 : 0,
-                   transition: 'opacity 0.05s linear',
-                 }}
-               />
-             )}
-
-             {/* Dynamic Mouth Animation */}
-             {avatar.mouth && avatar.textureOpenMouth && (
-               <div 
-                 className="absolute pointer-events-none flex items-center justify-center z-10"
-                 style={{
-                   left: `${avatar.mouth.x * 100}%`,
-                   top: `${avatar.mouth.y * 100}%`,
-                   width: `${avatar.mouth.w * 100}%`,
-                   height: `${avatar.mouth.h * 100}%`,
-                 }}
-               >
-                  {/* Skin Patch - Hides the static closed mouth when speaking */}
-                  <div 
-                    className="absolute w-[120%] h-[120%] transition-opacity duration-75"
-                    style={{
-                      backgroundColor: avatar.skinColor || '#fcd3bf',
-                      opacity: trackingData.mouthOpen > 0.1 ? 1 : 0,
-                      filter: 'blur(4px)', // Blends edges
-                      borderRadius: '50%'
-                    }}
+          {!processedImageUrl ? (
+            <div className="flex flex-col items-center justify-center gap-4">
+               <LoadingSpinner />
+               <span className="text-cyan-400 font-mono text-sm">REMOVING BACKGROUND...</span>
+            </div>
+          ) : (
+            <div 
+              className="relative w-full h-full flex items-center justify-center"
+              style={getAvatarStyle()}
+            >
+              {/* Main Character Body (Cropped using Sprite) */}
+              {avatar.mainBody ? (
+                  <Sprite 
+                    imageSrc={processedImageUrl}
+                    sourceRect={avatar.mainBody}
+                    className="w-full h-full object-contain drop-shadow-[0_0_15px_rgba(168,85,247,0.5)]"
                  />
+              ) : (
+                  /* Fallback to full image if mainBody is missing */
+                  <img 
+                    src={processedImageUrl} 
+                    alt="Avatar"
+                    className="w-full h-full object-contain drop-shadow-[0_0_15px_rgba(168,85,247,0.5)]"
+                  />
+              )}

-                  {/* Mouth Sprite - Scales based on mouth openness */}
-                   <Sprite 
-                     imageSrc={avatar.imageUrl}
-                     sourceRect={avatar.textureOpenMouth}
-                     className="w-full h-full"
-                     style={{
-                       opacity: trackingData.mouthOpen > 0.05 ? 1 : 0,
-                       // Scale open mouth based on volume
-                       transform: `scaleY(${0.8 + trackingData.mouthOpen * 0.5})`,
-                     }}
-                   />
-               </div>
-             )}
-          </div>
+              {/* Dynamic Eyelids (High Fidelity Sprites) */}
+              {avatar.leftEye && avatar.textureClosedEye && (
+                <Sprite 
+                  imageSrc={processedImageUrl}
+                  sourceRect={avatar.textureClosedEye}
+                  className="absolute pointer-events-none z-20"
+                  style={{
+                    left: `${avatar.leftEye.x * 100}%`,
+                    top: `${avatar.leftEye.y * 100}%`,
+                    width: `${avatar.leftEye.w * 100}%`,
+                    height: `${avatar.leftEye.h * 100}%`,
+                    opacity: trackingData.isBlinkingLeft ? 1 : 0,
+                    transition: 'opacity 0.05s linear',
+                  }}
+                />
+              )}
+              
+              {avatar.rightEye && avatar.textureClosedEye && (
+                <Sprite 
+                  imageSrc={processedImageUrl}
+                  sourceRect={avatar.textureClosedEye}
+                  className="absolute pointer-events-none z-20"
+                  style={{
+                    left: `${avatar.rightEye.x * 100}%`,
+                    top: `${avatar.rightEye.y * 100}%`,
+                    width: `${avatar.rightEye.w * 100}%`,
+                    height: `${avatar.rightEye.h * 100}%`,
+                    opacity: trackingData.isBlinkingRight ? 1 : 0,
+                    transition: 'opacity 0.05s linear',
+                  }}
+                />
+              )}
+
+              {/* Dynamic Mouth Animation */}
+              {avatar.mouth && avatar.textureOpenMouth && (
+                <div 
+                  className="absolute pointer-events-none flex items-center justify-center z-10"
+                  style={{
+                    left: `${avatar.mouth.x * 100}%`,
+                    top: `${avatar.mouth.y * 100}%`,
+                    width: `${avatar.mouth.w * 100}%`,
+                    height: `${avatar.mouth.h * 100}%`,
+                  }}
+                >
+                    {/* Skin Patch - Hides the static closed mouth when speaking */}
+                    <div 
+                      className="absolute w-[120%] h-[120%] transition-opacity duration-75"
+                      style={{
+                        backgroundColor: avatar.skinColor || '#fcd3bf',
+                        opacity: trackingData.mouthOpen > 0.1 ? 1 : 0,
+                        filter: 'blur(4px)', // Blends edges
+                        borderRadius: '50%'
+                      }}
+                    />
+
+                    {/* Mouth Sprite - Scales based on mouth openness */}
+                    <Sprite 
+                      imageSrc={processedImageUrl}
+                      sourceRect={avatar.textureOpenMouth}
+                      className="w-full h-full"
+                      style={{
+                        opacity: trackingData.mouthOpen > 0.05 ? 1 : 0,
+                        // Scale open mouth based on volume
+                        transform: `scaleY(${0.8 + trackingData.mouthOpen * 0.5})`,
+                      }}
+                    />
+                </div>
+              )}
+            </div>
+          )}
             
             {/* Status Indicator overlay if tracking is lost */}
             {(!cameraReady) && (
--- a/services/imageService.ts
+++ b/services/imageService.ts
@ -0,0 +1,95 @@
+import { Rect } from '../types';
+
+export const fileToDataUrl = (file: File): Promise<string> => {
+  return new Promise((resolve, reject) => {
+    const reader = new FileReader();
+    reader.onload = (e) => resolve(e.target?.result as string);
+    reader.onerror = reject;
+    reader.readAsDataURL(file);
+  });
+};
+
+export const loadImage = (src: string): Promise<HTMLImageElement> => {
+  return new Promise((resolve, reject) => {
+    const img = new Image();
+    img.crossOrigin = "anonymous";
+    img.onload = () => resolve(img);
+    img.onerror = reject;
+    img.src = src;
+  });
+};
+
+export const stitchAssets = async (
+  base: File,
+  blink?: File,
+  talk?: File
+): Promise<{ imageUrl: string; mainBody: Rect; textureClosedEye?: Rect; textureOpenMouth?: Rect }> => {
+    // Load images
+    const baseData = await fileToDataUrl(base);
+    const baseImg = await loadImage(baseData);
+
+    const blinkImg = blink ? await loadImage(await fileToDataUrl(blink)) : null;
+    const talkImg = talk ? await loadImage(await fileToDataUrl(talk)) : null;
+
+    // Layout: Base on Left. Sidebar on Right containing Blink (top) and Talk (bottom).
+    // Sidebar width = max(blink.width, talk.width)
+    const sidebarWidth = Math.max(blinkImg?.width || 0, talkImg?.width || 0);
+    
+    // If there are no variants, just return the base image as is
+    if (sidebarWidth === 0) {
+        return {
+            imageUrl: baseData,
+            mainBody: { x: 0, y: 0, w: 1, h: 1 }
+        };
+    }
+
+    const totalWidth = baseImg.width + sidebarWidth;
+    const totalHeight = Math.max(baseImg.height, (blinkImg?.height || 0) + (talkImg?.height || 0));
+
+    const canvas = document.createElement('canvas');
+    canvas.width = totalWidth;
+    canvas.height = totalHeight;
+    const ctx = canvas.getContext('2d');
+    if (!ctx) throw new Error("Could not get canvas context");
+
+    // Draw Base
+    ctx.drawImage(baseImg, 0, 0);
+    
+    // Calculate normalized rects
+    const mainBody: Rect = {
+        x: 0, 
+        y: 0, 
+        w: baseImg.width / totalWidth,
+        h: baseImg.height / totalHeight
+    };
+    
+    let textureClosedEye: Rect | undefined;
+    if (blinkImg) {
+        ctx.drawImage(blinkImg, baseImg.width, 0);
+        textureClosedEye = {
+            x: baseImg.width / totalWidth,
+            y: 0,
+            w: blinkImg.width / totalWidth,
+            h: blinkImg.height / totalHeight
+        };
+    }
+
+    let textureOpenMouth: Rect | undefined;
+    if (talkImg) {
+        const yPos = blinkImg ? blinkImg.height : 0;
+        ctx.drawImage(talkImg, baseImg.width, yPos);
+        textureOpenMouth = {
+            x: baseImg.width / totalWidth,
+            y: yPos / totalHeight,
+            w: talkImg.width / totalWidth,
+            h: talkImg.height / totalHeight
+        };
+    }
+
+    return {
+        imageUrl: canvas.toDataURL('image/png'),
+        mainBody,
+        textureClosedEye,
+        textureOpenMouth
+    };
+};
--- a/services/visionService.ts
+++ b/services/visionService.ts
@ -1,8 +1,9 @@

-import { FaceLandmarker, FilesetResolver } from '@mediapipe/tasks-vision';
+import { FaceLandmarker, FilesetResolver, ImageSegmenter } from '@mediapipe/tasks-vision';
 import { Rect } from '../types';

 let faceLandmarker: FaceLandmarker | null = null;
+let imageSegmenter: ImageSegmenter | null = null;

 // Initialize the vision model for static image analysis
 const initVision = async () => {
@ -26,6 +27,29 @@ const initVision = async () => {
  }
 };

+// Initialize the segmenter for background removal
+const initSegmenter = async () => {
+  if (imageSegmenter) return;
+
+  try {
+    const filesetResolver = await FilesetResolver.forVisionTasks(
+      "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.18/wasm"
+    );
+
+    imageSegmenter = await ImageSegmenter.createFromOptions(filesetResolver, {
+      baseOptions: {
+        modelAssetPath: "https://storage.googleapis.com/mediapipe-models/image_segmenter/selfie_segmenter/float16/latest/selfie_segmenter.tflite",
+        delegate: "GPU"
+      },
+      runningMode: "IMAGE",
+      outputCategoryMask: false,
+      outputConfidenceMasks: true
+    });
+  } catch (e) {
+    console.error("Failed to initialize segmenter:", e);
+  }
+};
+
 export const analyzeAvatarImage = async (imageUrl: string): Promise<{ leftEye: Rect, rightEye: Rect, mouth: Rect, skinColor: string } | null> => {
  try {
    await initVision();
@ -126,3 +150,71 @@ export const analyzeAvatarImage = async (imageUrl: string): Promise<{ leftEye: R
    return null;
  }
 };
+
+export const removeBackground = async (imageUrl: string): Promise<string> => {
+  try {
+    await initSegmenter();
+    if (!imageSegmenter) return imageUrl;
+
+    return new Promise((resolve, reject) => {
+      const img = new Image();
+      img.crossOrigin = "anonymous";
+      img.onload = () => {
+        try {
+          // 1. Segment the image
+          const segmentResult = imageSegmenter!.segment(img);
+          const confidenceMasks = segmentResult.confidenceMasks;
+          
+          if (!confidenceMasks || confidenceMasks.length === 0) {
+            resolve(imageUrl);
+            return;
+          }
+
+          // 2. Create canvas and context
+          const canvas = document.createElement('canvas');
+          canvas.width = img.width;
+          canvas.height = img.height;
+          const ctx = canvas.getContext('2d');
+          
+          if (!ctx) {
+             resolve(imageUrl);
+             return;
+          }
+
+          // 3. Draw original image
+          ctx.drawImage(img, 0, 0);
+          const imageData = ctx.getImageData(0, 0, img.width, img.height);
+          const pixels = imageData.data;
+          
+          // 4. Apply mask
+          // The selfie_segmenter output mask is a Float32Array where values 
+          // indicate confidence of being a person (0.0 to 1.0).
+          const mask = confidenceMasks[0].getAsFloat32Array();
+          
+          for (let i = 0; i < mask.length; i++) {
+             // Threshold for person confidence (0.3 is usually a good balance for hair details)
+             const confidence = mask[i];
+             if (confidence < 0.3) {
+               pixels[i * 4 + 3] = 0; // Set Alpha to 0
+             } else {
+               // Optional: Soft edges
+               // pixels[i * 4 + 3] = Math.floor(confidence * 255);
+             }
+          }
+
+          ctx.putImageData(imageData, 0, 0);
+          resolve(canvas.toDataURL('image/png'));
+
+        } catch (e) {
+          console.error("Segmentation error", e);
+          resolve(imageUrl);
+        }
+      };
+      img.onerror = () => resolve(imageUrl);
+      img.src = imageUrl;
+    });
+  } catch (e) {
+    console.error("Background removal failed", e);
+    return imageUrl;
+  }
+};
--- a/types.ts
+++ b/types.ts
@ -23,6 +23,8 @@ export interface AvatarConfig {
  skinColor?: string;
  textureClosedEye?: Rect;
  textureOpenMouth?: Rect;
+  mainBody?: Rect;
+  chromaKeyColor?: string;
 }

 export interface TrackingData {