vtube-studio/components/AvatarCreator.tsx
James Twose 5078d67d4f refactor: Improve avatar asset processing
Separates asset analysis into distinct steps to accurately capture face landmarks.
Introduces `fileToDataUrl` utility and modifies `stitchAssets` to accept image source strings, reducing redundant file processing and improving clarity.
2025-11-20 22:03:53 +01:00

266 lines
12 KiB
TypeScript

import React, { useState } from 'react';
import { generateAvatarImage } from '../services/geminiService';
import { analyzeAvatarImage } from '../services/visionService';
import { stitchAssets, fileToDataUrl } from '../services/imageService';
import LoadingSpinner from './LoadingSpinner';
import { Rect } from '../types';
interface AvatarCreatorProps {
onAvatarGenerated: (url: string, name: string, initialData?: {
leftEye?: Rect, rightEye?: Rect, mouth?: Rect, skinColor?: string,
mainBody?: Rect, textureClosedEye?: Rect, textureOpenMouth?: Rect
}) => void;
}
const AvatarCreator: React.FC<AvatarCreatorProps> = ({ onAvatarGenerated }) => {
const [mode, setMode] = useState<'generate' | 'upload'>('generate');
// Generation State
const [prompt, setPrompt] = useState('');
const [name, setName] = useState('');
const [status, setStatus] = useState<'idle' | 'generating' | 'analyzing' | 'stitching'>('idle');
const [error, setError] = useState<string | null>(null);
// Upload State
const [baseFile, setBaseFile] = useState<File | null>(null);
const [blinkFile, setBlinkFile] = useState<File | null>(null);
const [talkFile, setTalkFile] = useState<File | null>(null);
const handleGenerate = async () => {
if (!prompt || !name) return;
setStatus('generating');
setError(null);
try {
// 1. Generate Image (Now creates a character sheet)
const imageUrl = await generateAvatarImage(prompt);
// 2. Analyze Image for Landmarks
setStatus('analyzing');
const analysisData = await analyzeAvatarImage(imageUrl);
// 3. Pass to parent
if (analysisData) {
onAvatarGenerated(imageUrl, name, analysisData);
} else {
onAvatarGenerated(imageUrl, name);
}
} catch (err) {
console.error(err);
setError("Failed to generate avatar. Please try again.");
} finally {
setStatus('idle');
}
};
const handleUpload = async () => {
if (!baseFile || !name) return;
setStatus('stitching');
setError(null);
try {
// 1. Prepare Base Image and Analyze it separately
// Analyzing separately ensures we get landmarks for the main face correctly
// without interference from other faces in a stitched sheet.
const baseDataUrl = await fileToDataUrl(baseFile);
const baseAnalysis = await analyzeAvatarImage(baseDataUrl);
// 2. Prepare and Analyze Variant Images
let blinkDataUrl, blinkAnalysis;
if (blinkFile) {
blinkDataUrl = await fileToDataUrl(blinkFile);
// Try to find eyes in the blink image to use as tight texture crop
blinkAnalysis = await analyzeAvatarImage(blinkDataUrl);
}
let talkDataUrl, talkAnalysis;
if (talkFile) {
talkDataUrl = await fileToDataUrl(talkFile);
// Try to find mouth in the talk image
talkAnalysis = await analyzeAvatarImage(talkDataUrl);
}
// 3. Stitch Assets into Sheet
const { imageUrl, mainBody, textureClosedEye: stitchBlinkRect, textureOpenMouth: stitchTalkRect } = await stitchAssets(baseDataUrl, blinkDataUrl, talkDataUrl);
// 4. Map Analysis Data to Stitched Coordinate Space
// Helper to map a rect from (0-1 in sub-image) to (0-1 in stitched-image)
const mapRect = (r: Rect, container: Rect) => ({
x: container.x + r.x * container.w,
y: container.y + r.y * container.h,
w: r.w * container.w,
h: r.h * container.h
});
let initialData: any = {
mainBody,
textureClosedEye: stitchBlinkRect,
textureOpenMouth: stitchTalkRect
};
// Map Base Targets (Eyes, Mouth on main body)
if (baseAnalysis) {
initialData.leftEye = mapRect(baseAnalysis.leftEye, mainBody);
initialData.rightEye = mapRect(baseAnalysis.rightEye, mainBody);
initialData.mouth = mapRect(baseAnalysis.mouth, mainBody);
initialData.skinColor = baseAnalysis.skinColor;
}
// Map Source Textures (Tight crop around features if detected)
// If detections fail (e.g. eyes closed might not be detected), we fall back to the whole image (stitchBlinkRect)
if (blinkAnalysis && stitchBlinkRect) {
// Calculate a bounding box around both eyes in the blink image
const be = blinkAnalysis;
const minX = Math.min(be.leftEye.x, be.rightEye.x);
const minY = Math.min(be.leftEye.y, be.rightEye.y);
const maxX = Math.max(be.leftEye.x + be.leftEye.w, be.rightEye.x + be.rightEye.w);
const maxY = Math.max(be.leftEye.y + be.leftEye.h, be.rightEye.y + be.rightEye.h);
const eyesRect = { x: minX, y: minY, w: maxX - minX, h: maxY - minY };
initialData.textureClosedEye = mapRect(eyesRect, stitchBlinkRect);
}
if (talkAnalysis && stitchTalkRect) {
initialData.textureOpenMouth = mapRect(talkAnalysis.mouth, stitchTalkRect);
}
onAvatarGenerated(imageUrl, name, initialData);
} catch (err) {
console.error(err);
setError("Failed to process uploaded images. Please ensure they are valid image files.");
} finally {
setStatus('idle');
}
};
const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>, setter: (f: File | null) => void) => {
if (e.target.files && e.target.files[0]) {
setter(e.target.files[0]);
}
};
return (
<div className="max-w-2xl mx-auto bg-slate-800/50 backdrop-blur-lg border border-slate-700 rounded-2xl shadow-2xl overflow-hidden">
{/* Tabs */}
<div className="flex border-b border-slate-700">
<button
onClick={() => setMode('generate')}
className={`flex-1 py-4 text-sm font-bold uppercase tracking-wider transition-colors ${
mode === 'generate'
? 'bg-slate-700/50 text-cyan-400 border-b-2 border-cyan-400'
: 'text-slate-500 hover:text-slate-300'
}`}
>
AI Generator
</button>
<button
onClick={() => setMode('upload')}
className={`flex-1 py-4 text-sm font-bold uppercase tracking-wider transition-colors ${
mode === 'upload'
? 'bg-slate-700/50 text-purple-400 border-b-2 border-purple-400'
: 'text-slate-500 hover:text-slate-300'
}`}
>
Upload Assets
</button>
</div>
<div className="p-8">
<div className="text-center mb-8">
<h2 className="text-3xl font-bold text-transparent bg-clip-text bg-gradient-to-r from-cyan-400 to-purple-500 mb-2">
{mode === 'generate' ? 'Design Your Avatar' : 'Import Your Model'}
</h2>
<p className="text-slate-400">
{mode === 'generate'
? 'Describe your dream VTuber model. Gemini will generate a character sheet with expression assets.'
: 'Upload your existing character art. We support separate files for blink and talk variants.'
}
</p>
</div>
<div className="space-y-6">
<div>
<label className="block text-sm font-medium text-slate-300 mb-2">Model Name</label>
<input
type="text"
value={name}
onChange={(e) => setName(e.target.value)}
placeholder="e.g., Neon Kitsune"
className="w-full bg-slate-900/50 border border-slate-600 rounded-xl px-4 py-3 text-white placeholder-slate-500 focus:ring-2 focus:ring-cyan-500 focus:border-transparent transition-all outline-none"
/>
</div>
{mode === 'generate' ? (
<div>
<label className="block text-sm font-medium text-slate-300 mb-2">Description</label>
<textarea
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
placeholder="e.g., A cyberpunk anime girl with neon blue hair, glowing headphones, wearing a futuristic jacket..."
className="w-full h-32 bg-slate-900/50 border border-slate-600 rounded-xl px-4 py-3 text-white placeholder-slate-500 focus:ring-2 focus:ring-cyan-500 focus:border-transparent transition-all outline-none resize-none"
/>
</div>
) : (
<div className="space-y-4">
<div className="p-4 bg-slate-900/50 rounded-xl border border-slate-600 border-dashed">
<label className="block text-sm font-bold text-slate-300 mb-2">Base Model (Required)</label>
<input type="file" accept="image/*" onChange={(e) => handleFileChange(e, setBaseFile)} className="text-sm text-slate-400 file:mr-4 file:py-2 file:px-4 file:rounded-full file:border-0 file:text-sm file:font-semibold file:bg-cyan-500/10 file:text-cyan-400 hover:file:bg-cyan-500/20"/>
<p className="text-xs text-slate-500 mt-1">The main look of your character (Eyes Open, Mouth Closed).</p>
</div>
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
<div className="p-4 bg-slate-900/50 rounded-xl border border-slate-600 border-dashed">
<label className="block text-sm font-bold text-slate-300 mb-2">Closed Eyes (Optional)</label>
<input type="file" accept="image/*" onChange={(e) => handleFileChange(e, setBlinkFile)} className="text-sm text-slate-400 file:mr-4 file:py-2 file:px-4 file:rounded-full file:border-0 file:text-sm file:font-semibold file:bg-purple-500/10 file:text-purple-400 hover:file:bg-purple-500/20"/>
</div>
<div className="p-4 bg-slate-900/50 rounded-xl border border-slate-600 border-dashed">
<label className="block text-sm font-bold text-slate-300 mb-2">Open Mouth (Optional)</label>
<input type="file" accept="image/*" onChange={(e) => handleFileChange(e, setTalkFile)} className="text-sm text-slate-400 file:mr-4 file:py-2 file:px-4 file:rounded-full file:border-0 file:text-sm file:font-semibold file:bg-pink-500/10 file:text-pink-400 hover:file:bg-pink-500/20"/>
</div>
</div>
</div>
)}
{error && (
<div className="p-3 bg-red-500/20 border border-red-500/50 rounded-lg text-red-200 text-sm">
{error}
</div>
)}
<button
onClick={mode === 'generate' ? handleGenerate : handleUpload}
disabled={status !== 'idle' || !name || (mode === 'generate' && !prompt) || (mode === 'upload' && !baseFile)}
className={`w-full py-4 rounded-xl font-bold text-lg transition-all duration-200 ${
status !== 'idle' || !name || (mode === 'generate' && !prompt) || (mode === 'upload' && !baseFile)
? 'bg-slate-700 text-slate-500 cursor-not-allowed'
: 'bg-gradient-to-r from-cyan-500 to-blue-600 hover:from-cyan-400 hover:to-blue-500 text-white shadow-lg shadow-cyan-500/25 transform hover:scale-[1.02]'
}`}
>
{status !== 'idle' ? (
<div className="flex items-center justify-center gap-3">
<LoadingSpinner />
<span>
{status === 'generating' ? 'Dreaming up Sheet...' :
status === 'stitching' ? 'Processing Assets...' :
'Analyzing Features...'}
</span>
</div>
) : (
<div className="flex items-center justify-center gap-2">
<span>{mode === 'generate' ? 'Generate Model' : 'Create Model'}</span>
<svg xmlns="http://www.w3.org/2000/svg" className="h-5 w-5" viewBox="0 0 20 20" fill="currentColor">
<path fillRule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.707-8.707l-3-3a1 1 0 00-1.414 1.414L10.586 9H7a1 1 0 100 2h3.586l-1.293 1.293a1 1 0 101.414 1.414l3-3a1 1 0 000-1.414z" clipRule="evenodd" />
</svg>
</div>
)}
</button>
</div>
</div>
</div>
);
};
export default AvatarCreator;