feat: Generate VTuber character sheet with expression assets

The Gemini service has been updated to generate a character sheet rather than a single avatar image. This sheet includes the main character and separate assets for closed eyes and an open mouth.

The `AvatarConfig` type and `RiggingEditor` component have been extended to handle these new expression assets (`textureClosedEye`, `textureOpenMouth`). A new `Sprite` component has been added to `Studio.tsx` to correctly render these specific regions from the generated character sheet. The UI has been updated to reflect the new generation process.
This commit is contained in:
James Twose 2025-11-20 20:55:47 +01:00
parent b6017794a5
commit 3eff403fb4
6 changed files with 183 additions and 114 deletions

View File

@ -31,7 +31,10 @@ const App: React.FC = () => {
setAppState(AppState.RIGGING);
};
const handleRiggingComplete = (data: { leftEye: Rect, rightEye: Rect, mouth: Rect, skinColor: string }) => {
const handleRiggingComplete = (data: {
leftEye: Rect, rightEye: Rect, mouth: Rect, skinColor: string,
textureClosedEye: Rect, textureOpenMouth: Rect
}) => {
if (generatedData) {
setAvatar({
imageUrl: generatedData.url,
@ -40,7 +43,9 @@ const App: React.FC = () => {
leftEye: data.leftEye,
rightEye: data.rightEye,
mouth: data.mouth,
skinColor: data.skinColor
skinColor: data.skinColor,
textureClosedEye: data.textureClosedEye,
textureOpenMouth: data.textureOpenMouth,
});
setAppState(AppState.STUDIO);
}

View File

@ -22,10 +22,11 @@ const AvatarCreator: React.FC<AvatarCreatorProps> = ({ onAvatarGenerated }) => {
setError(null);
try {
// 1. Generate Image
// 1. Generate Image (Now creates a character sheet)
const imageUrl = await generateAvatarImage(prompt);
// 2. Analyze Image for Landmarks (Initial guess)
// Note: Vision service will likely find the main face on the left, which is what we want for targets.
setStatus('analyzing');
const analysisData = await analyzeAvatarImage(imageUrl);
@ -50,7 +51,7 @@ const AvatarCreator: React.FC<AvatarCreatorProps> = ({ onAvatarGenerated }) => {
Design Your Avatar
</h2>
<p className="text-slate-400">
Describe your dream VTuber model and let Gemini bring it to life.
Describe your dream VTuber model. Gemini will generate a character sheet with expression assets.
</p>
</div>
@ -94,7 +95,7 @@ const AvatarCreator: React.FC<AvatarCreatorProps> = ({ onAvatarGenerated }) => {
{status !== 'idle' ? (
<div className="flex items-center justify-center gap-3">
<LoadingSpinner />
<span>{status === 'generating' ? 'Dreaming up Avatar...' : 'Analyzing Features...'}</span>
<span>{status === 'generating' ? 'Dreaming up Sheet...' : 'Analyzing Features...'}</span>
</div>
) : (
<div className="flex items-center justify-center gap-2">

View File

@ -5,10 +5,13 @@ import { Rect } from '../types';
interface RiggingEditorProps {
imageUrl: string;
initialData?: { leftEye: Rect; rightEye: Rect; mouth: Rect; skinColor: string };
onComplete: (data: { leftEye: Rect; rightEye: Rect; mouth: Rect; skinColor: string }) => void;
onComplete: (data: {
leftEye: Rect; rightEye: Rect; mouth: Rect; skinColor: string;
textureClosedEye: Rect; textureOpenMouth: Rect;
}) => void;
}
type ActiveFeature = 'leftEye' | 'rightEye' | 'mouth' | null;
type ActiveFeature = 'leftEye' | 'rightEye' | 'mouth' | 'textureClosedEye' | 'textureOpenMouth' | null;
const ResizableBox: React.FC<{
rect: Rect;
@ -88,7 +91,7 @@ const ResizableBox: React.FC<{
<div
ref={boxRef}
onMouseDown={handleMouseDown}
className={`absolute border-2 cursor-move group transition-colors ${isActive ? 'z-20' : 'z-10'}`}
className={`absolute border-2 cursor-move group transition-colors ${isActive ? 'z-30' : 'z-20'}`}
style={{
left: `${rect.x * 100}%`,
top: `${rect.y * 100}%`,
@ -100,7 +103,7 @@ const ResizableBox: React.FC<{
>
{/* Label */}
<div
className="absolute -top-6 left-0 text-xs font-bold px-1 rounded text-white whitespace-nowrap"
className="absolute -top-6 left-0 text-xs font-bold px-1 rounded text-white whitespace-nowrap shadow-sm"
style={{ backgroundColor: color }}
>
{label}
@ -117,100 +120,114 @@ const ResizableBox: React.FC<{
};
const RiggingEditor: React.FC<RiggingEditorProps> = ({ imageUrl, initialData, onComplete }) => {
const [leftEye, setLeftEye] = useState<Rect>(initialData?.leftEye || { x: 0.35, y: 0.4, w: 0.12, h: 0.08 });
const [rightEye, setRightEye] = useState<Rect>(initialData?.rightEye || { x: 0.53, y: 0.4, w: 0.12, h: 0.08 });
const [mouth, setMouth] = useState<Rect>(initialData?.mouth || { x: 0.45, y: 0.6, w: 0.1, h: 0.05 });
// Targets (Left side of image usually)
const [leftEye, setLeftEye] = useState<Rect>(initialData?.leftEye || { x: 0.25, y: 0.4, w: 0.1, h: 0.1 });
const [rightEye, setRightEye] = useState<Rect>(initialData?.rightEye || { x: 0.45, y: 0.4, w: 0.1, h: 0.1 });
const [mouth, setMouth] = useState<Rect>(initialData?.mouth || { x: 0.35, y: 0.55, w: 0.1, h: 0.05 });
// Sources (Right side of image usually)
const [textureClosedEye, setTextureClosedEye] = useState<Rect>({ x: 0.7, y: 0.1, w: 0.2, h: 0.2 });
const [textureOpenMouth, setTextureOpenMouth] = useState<Rect>({ x: 0.7, y: 0.5, w: 0.2, h: 0.2 });
const [skinColor, setSkinColor] = useState<string>(initialData?.skinColor || '#fcd3bf');
const [activeFeature, setActiveFeature] = useState<ActiveFeature>(null);
return (
<div className="flex flex-col items-center h-full max-w-4xl mx-auto p-4">
<div className="flex flex-col items-center h-full max-w-6xl mx-auto p-4">
<div className="text-center mb-6">
<h2 className="text-2xl font-bold text-white mb-2">Rig Your Avatar</h2>
<p className="text-slate-400">
Drag and resize the boxes to match your avatar's features.
This ensures the eyes blink correctly.
<h2 className="text-2xl font-bold text-white mb-2">Rig Your Character</h2>
<p className="text-slate-400 text-sm">
1. Match the <b>Target</b> boxes (Red/Blue/Green) to the main character.<br/>
2. Match the <b>Source</b> boxes (Purple/Orange) to the extra assets on the right.
</p>
</div>
<div className="flex gap-8 w-full items-start">
<div className="flex gap-6 w-full items-start h-[70vh]">
{/* Editor Area */}
<div className="flex-1 bg-slate-800 p-4 rounded-xl border border-slate-700 flex justify-center">
<div className="relative inline-block select-none" style={{ width: '500px', maxWidth: '100%' }}>
<div className="flex-1 bg-slate-800 p-4 rounded-xl border border-slate-700 flex justify-center h-full overflow-hidden relative">
<div className="relative inline-block h-full">
<img
src={imageUrl}
alt="Rigging Target"
className="w-full h-auto rounded-lg pointer-events-none select-none block"
className="h-full w-auto object-contain rounded-lg pointer-events-none select-none block"
draggable={false}
/>
<ResizableBox
rect={leftEye}
color="#ef4444" // Red
label="Left Eye"
isActive={activeFeature === 'leftEye'}
onUpdate={setLeftEye}
onActivate={() => setActiveFeature('leftEye')}
/>
{/* Aspect ratio container to map percentage boxes correctly */}
<div className="absolute inset-0 w-full h-full">
{/* Targets */}
<ResizableBox
rect={leftEye} color="#ef4444" label="Left Eye Target"
isActive={activeFeature === 'leftEye'}
onUpdate={setLeftEye} onActivate={() => setActiveFeature('leftEye')}
/>
<ResizableBox
rect={rightEye} color="#3b82f6" label="Right Eye Target"
isActive={activeFeature === 'rightEye'}
onUpdate={setRightEye} onActivate={() => setActiveFeature('rightEye')}
/>
<ResizableBox
rect={mouth} color="#22c55e" label="Mouth Target"
isActive={activeFeature === 'mouth'}
onUpdate={setMouth} onActivate={() => setActiveFeature('mouth')}
/>
<ResizableBox
rect={rightEye}
color="#3b82f6" // Blue
label="Right Eye"
isActive={activeFeature === 'rightEye'}
onUpdate={setRightEye}
onActivate={() => setActiveFeature('rightEye')}
/>
<ResizableBox
rect={mouth}
color="#22c55e" // Green
label="Mouth"
isActive={activeFeature === 'mouth'}
onUpdate={setMouth}
onActivate={() => setActiveFeature('mouth')}
/>
{/* Sources */}
<ResizableBox
rect={textureClosedEye} color="#a855f7" label="Source: Closed Eyes"
isActive={activeFeature === 'textureClosedEye'}
onUpdate={setTextureClosedEye} onActivate={() => setActiveFeature('textureClosedEye')}
/>
<ResizableBox
rect={textureOpenMouth} color="#f97316" label="Source: Open Mouth"
isActive={activeFeature === 'textureOpenMouth'}
onUpdate={setTextureOpenMouth} onActivate={() => setActiveFeature('textureOpenMouth')}
/>
</div>
</div>
</div>
{/* Sidebar Controls */}
<div className="w-64 flex flex-col gap-6 bg-slate-800/50 p-6 rounded-xl border border-slate-700 h-full">
<div className="w-72 flex flex-col gap-4 bg-slate-800/50 p-6 rounded-xl border border-slate-700 h-full overflow-y-auto">
<div>
<label className="block text-sm font-medium text-slate-300 mb-2">Eyelid Color</label>
<div className="bg-slate-900/50 p-4 rounded-lg">
<label className="block text-xs font-bold text-slate-400 mb-2 uppercase">Skin Color Fallback</label>
<div className="flex items-center gap-3">
<input
type="color"
value={skinColor}
onChange={(e) => setSkinColor(e.target.value)}
className="w-10 h-10 rounded cursor-pointer border-0 p-0"
className="w-8 h-8 rounded cursor-pointer border-0 p-0"
/>
<span className="text-xs text-slate-400 font-mono">{skinColor}</span>
</div>
<p className="text-xs text-slate-500 mt-2">
Pick the color of the skin above the eyes for realistic blinking.
</p>
</div>
<div className="space-y-2">
<div className="flex items-center gap-2 text-sm text-slate-300">
<div className="w-3 h-3 bg-red-500 rounded-full"></div>
<span>Left Eye Box</span>
<div className="space-y-3 flex-1">
<div className="text-xs font-bold text-slate-400 uppercase border-b border-slate-700 pb-1">Targets (Main Face)</div>
<div className="flex items-center gap-2 text-sm text-slate-300 cursor-pointer hover:text-white" onClick={() => setActiveFeature('leftEye')}>
<div className="w-3 h-3 bg-red-500 rounded-full shadow"></div> Left Eye
</div>
<div className="flex items-center gap-2 text-sm text-slate-300">
<div className="w-3 h-3 bg-blue-500 rounded-full"></div>
<span>Right Eye Box</span>
<div className="flex items-center gap-2 text-sm text-slate-300 cursor-pointer hover:text-white" onClick={() => setActiveFeature('rightEye')}>
<div className="w-3 h-3 bg-blue-500 rounded-full shadow"></div> Right Eye
</div>
<div className="flex items-center gap-2 text-sm text-slate-300">
<div className="w-3 h-3 bg-green-500 rounded-full"></div>
<span>Mouth Box</span>
<div className="flex items-center gap-2 text-sm text-slate-300 cursor-pointer hover:text-white" onClick={() => setActiveFeature('mouth')}>
<div className="w-3 h-3 bg-green-500 rounded-full shadow"></div> Mouth
</div>
<div className="text-xs font-bold text-slate-400 uppercase border-b border-slate-700 pb-1 mt-4">Sources (Right Side)</div>
<div className="flex items-center gap-2 text-sm text-slate-300 cursor-pointer hover:text-white" onClick={() => setActiveFeature('textureClosedEye')}>
<div className="w-3 h-3 bg-purple-500 rounded-full shadow"></div> Closed Eye Texture
</div>
<div className="flex items-center gap-2 text-sm text-slate-300 cursor-pointer hover:text-white" onClick={() => setActiveFeature('textureOpenMouth')}>
<div className="w-3 h-3 bg-orange-500 rounded-full shadow"></div> Open Mouth Texture
</div>
</div>
<div className="mt-auto pt-6">
<div className="mt-4">
<button
onClick={() => onComplete({ leftEye, rightEye, mouth, skinColor })}
className="w-full py-3 bg-gradient-to-r from-cyan-500 to-blue-600 hover:from-cyan-400 hover:to-blue-500 text-white rounded-xl font-bold shadow-lg shadow-cyan-500/25 transform hover:scale-[1.02] transition-all"
onClick={() => onComplete({ leftEye, rightEye, mouth, skinColor, textureClosedEye, textureOpenMouth })}
className="w-full py-4 bg-gradient-to-r from-cyan-500 to-blue-600 hover:from-cyan-400 hover:to-blue-500 text-white rounded-xl font-bold shadow-lg shadow-cyan-500/25 transform hover:scale-[1.02] transition-all"
>
Finish Rigging
</button>

View File

@ -1,12 +1,52 @@
import React, { useEffect, useRef, useState } from 'react';
import { useFaceTracking } from '../hooks/useFaceTracking';
import { AvatarConfig } from '../types';
import { AvatarConfig, Rect } from '../types';
interface StudioProps {
avatar: AvatarConfig;
onBack: () => void;
}
/**
* Sprite Component
* Renders a specific crop of the source image into a target container.
*/
const Sprite: React.FC<{
imageSrc: string;
sourceRect: Rect;
style?: React.CSSProperties;
className?: string;
}> = ({ imageSrc, sourceRect, style, className }) => {
// To display a cropped region (sourceRect) of the image, we use an inner <img>
// positioned negatively and scaled up.
// Example: If sourceRect.w is 0.1 (10%), the image must be scaled to 10x (1000%) size.
const widthScale = 100 / (sourceRect.w * 100);
const heightScale = 100 / (sourceRect.h * 100);
return (
<div
className={`overflow-hidden relative ${className}`}
style={style}
>
<img
src={imageSrc}
alt=""
style={{
position: 'absolute',
top: `-${sourceRect.y * 100 * heightScale}%`,
left: `-${sourceRect.x * 100 * widthScale}%`,
width: `${widthScale * 100}%`,
height: `${heightScale * 100}%`,
maxWidth: 'none',
maxHeight: 'none',
pointerEvents: 'none'
}}
/>
</div>
);
};
const Studio: React.FC<StudioProps> = ({ avatar, onBack }) => {
const videoRef = useRef<HTMLVideoElement>(null);
const [cameraReady, setCameraReady] = useState(false);
@ -128,49 +168,52 @@ const Studio: React.FC<StudioProps> = ({ avatar, onBack }) => {
className="relative w-full h-full flex items-center justify-center"
style={getAvatarStyle()}
>
{/* Main Character Body */}
<img
src={avatar.imageUrl}
alt="Avatar"
className="w-full h-full object-contain drop-shadow-[0_0_15px_rgba(168,85,247,0.5)]"
style={{
// Use clip-path to hide the right-side assets from the main view, keeping only the main character
clipPath: 'inset(0 25% 0 0)' // Hides the right 25% (where assets are)
}}
/>
{/* Dynamic Eyelids */}
{avatar.leftEye && avatar.skinColor && (
<div
className="absolute pointer-events-none"
{/* Dynamic Eyelids (High Fidelity Sprites) */}
{avatar.leftEye && avatar.textureClosedEye && (
<Sprite
imageSrc={avatar.imageUrl}
sourceRect={avatar.textureClosedEye}
className="absolute pointer-events-none z-20"
style={{
left: `${avatar.leftEye.x * 100}%`,
top: `${avatar.leftEye.y * 100}%`,
width: `${avatar.leftEye.w * 100}%`,
height: `${avatar.leftEye.h * 100}%`,
backgroundColor: avatar.skinColor,
transform: `scaleY(${trackingData.isBlinkingLeft ? 1 : 0})`,
transformOrigin: 'top',
transition: 'transform 0.1s cubic-bezier(0.4, 0, 0.2, 1)', // Snappy blink
borderRadius: '0 0 40% 40%'
opacity: trackingData.isBlinkingLeft ? 1 : 0,
transition: 'opacity 0.05s linear',
}}
/>
)}
{avatar.rightEye && avatar.skinColor && (
<div
className="absolute pointer-events-none"
{avatar.rightEye && avatar.textureClosedEye && (
<Sprite
imageSrc={avatar.imageUrl}
sourceRect={avatar.textureClosedEye}
className="absolute pointer-events-none z-20"
style={{
left: `${avatar.rightEye.x * 100}%`,
top: `${avatar.rightEye.y * 100}%`,
width: `${avatar.rightEye.w * 100}%`,
height: `${avatar.rightEye.h * 100}%`,
backgroundColor: avatar.skinColor,
transform: `scaleY(${trackingData.isBlinkingRight ? 1 : 0})`,
transformOrigin: 'top',
transition: 'transform 0.1s cubic-bezier(0.4, 0, 0.2, 1)', // Snappy blink
borderRadius: '0 0 40% 40%'
opacity: trackingData.isBlinkingRight ? 1 : 0,
transition: 'opacity 0.05s linear',
}}
/>
)}
{/* Dynamic Mouth Animation */}
{avatar.mouth && (
{avatar.mouth && avatar.textureOpenMouth && (
<div
className="absolute pointer-events-none flex items-center justify-center z-10"
style={{
@ -186,31 +229,27 @@ const Studio: React.FC<StudioProps> = ({ avatar, onBack }) => {
style={{
backgroundColor: avatar.skinColor || '#fcd3bf',
opacity: trackingData.mouthOpen > 0.1 ? 1 : 0,
filter: 'blur(3px)', // Blends edges
borderRadius: '40%'
filter: 'blur(4px)', // Blends edges
borderRadius: '50%'
}}
/>
{/* Mouth Interior - Scales based on mouth openness */}
<div
className="relative w-full h-full bg-[#4a1212] border-2 border-[#2d0a0a] overflow-hidden origin-center transition-transform duration-75"
style={{
borderRadius: '50% 50% 50% 50% / 50% 50% 30% 30%', // Slightly more jaw-like shape
// trackingData.mouthOpen is 0-1. We amplify it for better visuals.
transform: `scaleY(${Math.min(1.2, trackingData.mouthOpen * 4)}) scaleX(${0.9 + trackingData.mouthOpen * 0.1})`,
opacity: trackingData.mouthOpen > 0.05 ? 1 : 0,
}}
>
{/* Tongue */}
<div
className="absolute bottom-[-20%] left-1/2 -translate-x-1/2 w-[80%] h-[60%] bg-[#d45d5d] rounded-t-full"
/>
</div>
{/* Mouth Sprite - Scales based on mouth openness */}
<Sprite
imageSrc={avatar.imageUrl}
sourceRect={avatar.textureOpenMouth}
className="w-full h-full"
style={{
opacity: trackingData.mouthOpen > 0.05 ? 1 : 0,
// Scale open mouth based on volume
transform: `scaleY(${0.8 + trackingData.mouthOpen * 0.5})`,
}}
/>
</div>
)}
</div>
{/* Optional: Status Indicator overlay if tracking is lost (all 0s usually) or just visual flair */}
{/* Status Indicator overlay if tracking is lost */}
{(!cameraReady) && (
<div className="absolute inset-0 flex items-center justify-center bg-slate-900/80 z-20 rounded-xl backdrop-blur-sm">
<div className="text-cyan-400 animate-pulse font-mono">INITIALIZING CAMERA LINK...</div>

View File

@ -1,25 +1,30 @@
import { GoogleGenAI } from "@google/genai";
/**
* Generates a VTuber avatar image based on user description.
* Generates a VTuber avatar character sheet.
* Uses gemini-3-pro-image-preview for high quality.
*/
export const generateAvatarImage = async (description: string): Promise<string> => {
try {
// Initialize client inside the function to ensure we use the most up-to-date API key
// after the user has completed the selection flow.
const ai = new GoogleGenAI({ apiKey: process.env.API_KEY });
// We construct a prompt that encourages a good format for a 2D avatar (front facing, clean background)
const prompt = `
Create a high-quality, flat 2D anime or stylized character illustration suitable for a VTuber avatar.
The character should be facing forward (front view).
The background should be a solid, single color (white or bright green) to allow for easy removal or masking.
Create a VTuber character sheet with a flat 2D anime style.
LAYOUT:
1. MAIN CHARACTER (Left side, takes up 70% of width):
- Front-facing view, head and shoulders.
- Neutral expression, eyes open, mouth closed.
2. EXPRESSION ASSETS (Right side, vertical column):
- Top: The same character's face with EYES CLOSED (for blinking).
- Bottom: The same character's face with MOUTH OPEN (for talking).
Character Description: ${description}
Style: Vibrant, clean lines, detailed eyes.
Focus: Head and shoulders only.
Style: Vibrant, clean lines, solid white or green background for easy keying.
`;
const response = await ai.models.generateContent({
@ -31,7 +36,7 @@ export const generateAvatarImage = async (description: string): Promise<string>
},
config: {
imageConfig: {
aspectRatio: "1:1",
aspectRatio: "16:9", // Wide to fit character sheet
imageSize: "1K"
}
}

View File

@ -21,6 +21,8 @@ export interface AvatarConfig {
rightEye?: Rect;
mouth?: Rect;
skinColor?: string;
textureClosedEye?: Rect;
textureOpenMouth?: Rect;
}
export interface TrackingData {