From ddb2455416c7f222612343f53e0d10218744ca8b Mon Sep 17 00:00:00 2001 From: James Twose <39407392+jameshtwose@users.noreply.github.com> Date: Thu, 20 Nov 2025 21:24:22 +0100 Subject: [PATCH] feat: Add image upload and background removal Enables users to upload custom avatar assets and automatically remove the background from the generated image. New features: - Avatar creation now supports uploading base, blink, and talk textures. - Added ability to define the main body bounding box during rigging. - Vision service now includes image segmentation for background removal. - Studio component dynamically processes the avatar image for background removal if chroma key is enabled. --- App.tsx | 5 +- components/AvatarCreator.tsx | 225 +++++++++++++++++++++++++---------- components/RiggingEditor.tsx | 73 +++++++++--- components/Studio.tsx | 195 +++++++++++++++++------------- services/imageService.ts | 95 +++++++++++++++ services/visionService.ts | 94 ++++++++++++++- types.ts | 2 + 7 files changed, 528 insertions(+), 161 deletions(-) create mode 100644 services/imageService.ts diff --git a/App.tsx b/App.tsx index f320ef6..d051beb 100644 --- a/App.tsx +++ b/App.tsx @@ -33,7 +33,8 @@ const App: React.FC = () => { const handleRiggingComplete = (data: { leftEye: Rect, rightEye: Rect, mouth: Rect, skinColor: string, - textureClosedEye: Rect, textureOpenMouth: Rect + textureClosedEye: Rect, textureOpenMouth: Rect, + mainBody: Rect, chromaKeyColor: string }) => { if (generatedData) { setAvatar({ @@ -46,6 +47,8 @@ const App: React.FC = () => { skinColor: data.skinColor, textureClosedEye: data.textureClosedEye, textureOpenMouth: data.textureOpenMouth, + mainBody: data.mainBody, + chromaKeyColor: data.chromaKeyColor }); setAppState(AppState.STUDIO); } diff --git a/components/AvatarCreator.tsx b/components/AvatarCreator.tsx index 1852bd1..35d944e 100644 --- a/components/AvatarCreator.tsx +++ b/components/AvatarCreator.tsx @@ -1,20 +1,31 @@ - import React, { useState } from 'react'; import { generateAvatarImage } from '../services/geminiService'; import { analyzeAvatarImage } from '../services/visionService'; +import { stitchAssets } from '../services/imageService'; import LoadingSpinner from './LoadingSpinner'; import { Rect } from '../types'; interface AvatarCreatorProps { - onAvatarGenerated: (url: string, name: string, initialData?: { leftEye: Rect, rightEye: Rect, mouth: Rect, skinColor: string }) => void; + onAvatarGenerated: (url: string, name: string, initialData?: { + leftEye?: Rect, rightEye?: Rect, mouth?: Rect, skinColor?: string, + mainBody?: Rect, textureClosedEye?: Rect, textureOpenMouth?: Rect + }) => void; } const AvatarCreator: React.FC = ({ onAvatarGenerated }) => { + const [mode, setMode] = useState<'generate' | 'upload'>('generate'); + + // Generation State const [prompt, setPrompt] = useState(''); const [name, setName] = useState(''); - const [status, setStatus] = useState<'idle' | 'generating' | 'analyzing'>('idle'); + const [status, setStatus] = useState<'idle' | 'generating' | 'analyzing' | 'stitching'>('idle'); const [error, setError] = useState(null); + // Upload State + const [baseFile, setBaseFile] = useState(null); + const [blinkFile, setBlinkFile] = useState(null); + const [talkFile, setTalkFile] = useState(null); + const handleGenerate = async () => { if (!prompt || !name) return; @@ -25,12 +36,11 @@ const AvatarCreator: React.FC = ({ onAvatarGenerated }) => { // 1. Generate Image (Now creates a character sheet) const imageUrl = await generateAvatarImage(prompt); - // 2. Analyze Image for Landmarks (Initial guess) - // Note: Vision service will likely find the main face on the left, which is what we want for targets. + // 2. Analyze Image for Landmarks setStatus('analyzing'); const analysisData = await analyzeAvatarImage(imageUrl); - // 3. Pass to parent (to go to Rigging) + // 3. Pass to parent if (analysisData) { onAvatarGenerated(imageUrl, name, analysisData); } else { @@ -44,71 +54,164 @@ const AvatarCreator: React.FC = ({ onAvatarGenerated }) => { } }; + const handleUpload = async () => { + if (!baseFile || !name) return; + + setStatus('stitching'); + setError(null); + + try { + // 1. Stitch Assets into Sheet + const { imageUrl, mainBody, textureClosedEye, textureOpenMouth } = await stitchAssets(baseFile, blinkFile || undefined, talkFile || undefined); + + // 2. Analyze the Main Body part of the image + // Note: analyzeAvatarImage analyzes the whole image, but since we put the face on the left (or full image), + // it should find the face correctly. + setStatus('analyzing'); + const analysisData = await analyzeAvatarImage(imageUrl); + + // 3. Combine manual stitch data with automatic vision data + const initialData = { + ...(analysisData || {}), + mainBody, + textureClosedEye, + textureOpenMouth + }; + + onAvatarGenerated(imageUrl, name, initialData); + } catch (err) { + console.error(err); + setError("Failed to process uploaded images. Please ensure they are valid image files."); + } finally { + setStatus('idle'); + } + }; + + const handleFileChange = (e: React.ChangeEvent, setter: (f: File | null) => void) => { + if (e.target.files && e.target.files[0]) { + setter(e.target.files[0]); + } + }; + return ( -
-
-

- Design Your Avatar -

-

- Describe your dream VTuber model. Gemini will generate a character sheet with expression assets. -

-
- -
-
- - setName(e.target.value)} - placeholder="e.g., Neon Kitsune" - className="w-full bg-slate-900/50 border border-slate-600 rounded-xl px-4 py-3 text-white placeholder-slate-500 focus:ring-2 focus:ring-cyan-500 focus:border-transparent transition-all outline-none" - /> -
- -
- -