feat: Initialize Gemini V-Studio project setup
Sets up the foundational project structure, including: - Vite for build tooling. - React for the UI. - Tailwind CSS for styling. - MediaPipe for face tracking capabilities. - Gemini API integration for avatar generation. - Basic configuration files (package.json, vite.config.ts, tsconfig.json). - Initial README with local run instructions. - Core types and a basic Gemini service for image generation.
This commit is contained in:
parent
bbd1a19152
commit
b6017794a5
24
.gitignore
vendored
Normal file
24
.gitignore
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
pnpm-debug.log*
|
||||
lerna-debug.log*
|
||||
|
||||
node_modules
|
||||
dist
|
||||
dist-ssr
|
||||
*.local
|
||||
|
||||
# Editor directories and files
|
||||
.vscode/*
|
||||
!.vscode/extensions.json
|
||||
.idea
|
||||
.DS_Store
|
||||
*.suo
|
||||
*.ntvs*
|
||||
*.njsproj
|
||||
*.sln
|
||||
*.sw?
|
||||
128
App.tsx
Normal file
128
App.tsx
Normal file
@ -0,0 +1,128 @@
|
||||
|
||||
import React, { useState } from 'react';
|
||||
import { AppState, AvatarConfig, Rect } from './types';
|
||||
import AvatarCreator from './components/AvatarCreator';
|
||||
import RiggingEditor from './components/RiggingEditor';
|
||||
import Studio from './components/Studio';
|
||||
|
||||
const App: React.FC = () => {
|
||||
const [appState, setAppState] = useState<AppState>(AppState.SETUP);
|
||||
// Temp storage for the generated image before rigging
|
||||
const [generatedData, setGeneratedData] = useState<{url: string, name: string, initialData?: any} | null>(null);
|
||||
const [avatar, setAvatar] = useState<AvatarConfig | null>(null);
|
||||
|
||||
const handleStartCreation = async () => {
|
||||
try {
|
||||
if (window.aistudio) {
|
||||
const hasKey = await window.aistudio.hasSelectedApiKey();
|
||||
if (!hasKey) {
|
||||
await window.aistudio.openSelectKey();
|
||||
}
|
||||
}
|
||||
setAppState(AppState.CREATION);
|
||||
} catch (error) {
|
||||
console.error("Error during API key selection:", error);
|
||||
setAppState(AppState.CREATION);
|
||||
}
|
||||
};
|
||||
|
||||
const handleAvatarGenerated = (url: string, name: string, initialData?: any) => {
|
||||
setGeneratedData({ url, name, initialData });
|
||||
setAppState(AppState.RIGGING);
|
||||
};
|
||||
|
||||
const handleRiggingComplete = (data: { leftEye: Rect, rightEye: Rect, mouth: Rect, skinColor: string }) => {
|
||||
if (generatedData) {
|
||||
setAvatar({
|
||||
imageUrl: generatedData.url,
|
||||
name: generatedData.name,
|
||||
description: '',
|
||||
leftEye: data.leftEye,
|
||||
rightEye: data.rightEye,
|
||||
mouth: data.mouth,
|
||||
skinColor: data.skinColor
|
||||
});
|
||||
setAppState(AppState.STUDIO);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="min-h-screen bg-slate-900 text-white">
|
||||
{appState === AppState.SETUP && (
|
||||
<div className="container mx-auto px-4 py-12 flex flex-col items-center justify-center min-h-screen">
|
||||
<div className="text-center mb-12 space-y-4">
|
||||
<h1 className="text-6xl font-bold text-transparent bg-clip-text bg-gradient-to-r from-cyan-400 via-blue-500 to-purple-600 brand-font tracking-tighter">
|
||||
GEMINI V-STUDIO
|
||||
</h1>
|
||||
<p className="text-xl text-slate-400 max-w-2xl mx-auto">
|
||||
The next-generation browser-based VTuber studio. Generate your persona with AI and animate it with your face.
|
||||
</p>
|
||||
<button
|
||||
onClick={handleStartCreation}
|
||||
className="mt-8 px-8 py-4 bg-white text-slate-900 rounded-full font-bold hover:bg-cyan-50 transition-colors shadow-[0_0_20px_rgba(255,255,255,0.3)]"
|
||||
>
|
||||
Start Creation
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-8 w-full max-w-5xl">
|
||||
<div className="p-6 bg-slate-800/50 rounded-xl border border-slate-700 backdrop-blur-sm">
|
||||
<div className="h-12 w-12 bg-cyan-500/10 rounded-lg flex items-center justify-center mb-4 text-2xl">✨</div>
|
||||
<h3 className="text-xl font-bold mb-2">AI Generation</h3>
|
||||
<p className="text-slate-400">Describe your dream character. Gemini 3 Pro creates high-fidelity sprites in seconds.</p>
|
||||
</div>
|
||||
<div className="p-6 bg-slate-800/50 rounded-xl border border-slate-700 backdrop-blur-sm">
|
||||
<div className="h-12 w-12 bg-purple-500/10 rounded-lg flex items-center justify-center mb-4 text-2xl">📸</div>
|
||||
<h3 className="text-xl font-bold mb-2">Face Tracking</h3>
|
||||
<p className="text-slate-400">Powered by MediaPipe. No expensive equipment needed—just your webcam.</p>
|
||||
</div>
|
||||
<div className="p-6 bg-slate-800/50 rounded-xl border border-slate-700 backdrop-blur-sm">
|
||||
<div className="h-12 w-12 bg-pink-500/10 rounded-lg flex items-center justify-center mb-4 text-2xl">🎥</div>
|
||||
<h3 className="text-xl font-bold mb-2">Live Animation</h3>
|
||||
<p className="text-slate-400">Your avatar mimics your head movements and speech in real-time.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{appState === AppState.CREATION && (
|
||||
<div className="container mx-auto px-4 py-12 min-h-screen flex flex-col">
|
||||
<button
|
||||
onClick={() => setAppState(AppState.SETUP)}
|
||||
className="self-start mb-8 px-4 py-2 text-slate-400 hover:text-white transition-colors"
|
||||
>
|
||||
← Back to Home
|
||||
</button>
|
||||
<div className="flex-1 flex items-center justify-center">
|
||||
<AvatarCreator onAvatarGenerated={handleAvatarGenerated} />
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{appState === AppState.RIGGING && generatedData && (
|
||||
<div className="container mx-auto px-4 py-8 min-h-screen flex flex-col">
|
||||
<button
|
||||
onClick={() => setAppState(AppState.CREATION)}
|
||||
className="self-start mb-4 px-4 py-2 text-slate-400 hover:text-white transition-colors"
|
||||
>
|
||||
← Back to Generator
|
||||
</button>
|
||||
<RiggingEditor
|
||||
imageUrl={generatedData.url}
|
||||
initialData={generatedData.initialData}
|
||||
onComplete={handleRiggingComplete}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{appState === AppState.STUDIO && avatar && (
|
||||
<Studio
|
||||
avatar={avatar}
|
||||
onBack={() => setAppState(AppState.SETUP)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default App;
|
||||
25
README.md
25
README.md
@ -1,11 +1,20 @@
|
||||
<div align="center">
|
||||
|
||||
<img width="1200" height="475" alt="GHBanner" src="https://github.com/user-attachments/assets/0aa67016-6eaf-458a-adb2-6e31a0763ed6" />
|
||||
|
||||
<h1>Built with AI Studio</h2>
|
||||
|
||||
<p>The fastest path from prompt to production with Gemini.</p>
|
||||
|
||||
<a href="https://aistudio.google.com/apps">Start building</a>
|
||||
|
||||
</div>
|
||||
|
||||
# Run and deploy your AI Studio app
|
||||
|
||||
This contains everything you need to run your app locally.
|
||||
|
||||
View your app in AI Studio: https://ai.studio/apps/drive/1Di9b15uKTFXVof4InO8oefefCDaW9Q26
|
||||
|
||||
## Run Locally
|
||||
|
||||
**Prerequisites:** Node.js
|
||||
|
||||
|
||||
1. Install dependencies:
|
||||
`npm install`
|
||||
2. Set the `GEMINI_API_KEY` in [.env.local](.env.local) to your Gemini API key
|
||||
3. Run the app:
|
||||
`npm run dev`
|
||||
|
||||
113
components/AvatarCreator.tsx
Normal file
113
components/AvatarCreator.tsx
Normal file
@ -0,0 +1,113 @@
|
||||
|
||||
import React, { useState } from 'react';
|
||||
import { generateAvatarImage } from '../services/geminiService';
|
||||
import { analyzeAvatarImage } from '../services/visionService';
|
||||
import LoadingSpinner from './LoadingSpinner';
|
||||
import { Rect } from '../types';
|
||||
|
||||
interface AvatarCreatorProps {
|
||||
onAvatarGenerated: (url: string, name: string, initialData?: { leftEye: Rect, rightEye: Rect, mouth: Rect, skinColor: string }) => void;
|
||||
}
|
||||
|
||||
const AvatarCreator: React.FC<AvatarCreatorProps> = ({ onAvatarGenerated }) => {
|
||||
const [prompt, setPrompt] = useState('');
|
||||
const [name, setName] = useState('');
|
||||
const [status, setStatus] = useState<'idle' | 'generating' | 'analyzing'>('idle');
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const handleGenerate = async () => {
|
||||
if (!prompt || !name) return;
|
||||
|
||||
setStatus('generating');
|
||||
setError(null);
|
||||
|
||||
try {
|
||||
// 1. Generate Image
|
||||
const imageUrl = await generateAvatarImage(prompt);
|
||||
|
||||
// 2. Analyze Image for Landmarks (Initial guess)
|
||||
setStatus('analyzing');
|
||||
const analysisData = await analyzeAvatarImage(imageUrl);
|
||||
|
||||
// 3. Pass to parent (to go to Rigging)
|
||||
if (analysisData) {
|
||||
onAvatarGenerated(imageUrl, name, analysisData);
|
||||
} else {
|
||||
onAvatarGenerated(imageUrl, name);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
setError("Failed to generate avatar. Please try again.");
|
||||
} finally {
|
||||
setStatus('idle');
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="max-w-2xl mx-auto bg-slate-800/50 backdrop-blur-lg border border-slate-700 p-8 rounded-2xl shadow-2xl">
|
||||
<div className="text-center mb-8">
|
||||
<h2 className="text-3xl font-bold text-transparent bg-clip-text bg-gradient-to-r from-cyan-400 to-purple-500 mb-2">
|
||||
Design Your Avatar
|
||||
</h2>
|
||||
<p className="text-slate-400">
|
||||
Describe your dream VTuber model and let Gemini bring it to life.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="space-y-6">
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-slate-300 mb-2">Model Name</label>
|
||||
<input
|
||||
type="text"
|
||||
value={name}
|
||||
onChange={(e) => setName(e.target.value)}
|
||||
placeholder="e.g., Neon Kitsune"
|
||||
className="w-full bg-slate-900/50 border border-slate-600 rounded-xl px-4 py-3 text-white placeholder-slate-500 focus:ring-2 focus:ring-cyan-500 focus:border-transparent transition-all outline-none"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-slate-300 mb-2">Description</label>
|
||||
<textarea
|
||||
value={prompt}
|
||||
onChange={(e) => setPrompt(e.target.value)}
|
||||
placeholder="e.g., A cyberpunk anime girl with neon blue hair, glowing headphones, wearing a futuristic jacket..."
|
||||
className="w-full h-32 bg-slate-900/50 border border-slate-600 rounded-xl px-4 py-3 text-white placeholder-slate-500 focus:ring-2 focus:ring-cyan-500 focus:border-transparent transition-all outline-none resize-none"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-500/20 border border-red-500/50 rounded-lg text-red-200 text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={handleGenerate}
|
||||
disabled={status !== 'idle' || !prompt || !name}
|
||||
className={`w-full py-4 rounded-xl font-bold text-lg transition-all duration-200 ${
|
||||
status !== 'idle' || !prompt || !name
|
||||
? 'bg-slate-700 text-slate-500 cursor-not-allowed'
|
||||
: 'bg-gradient-to-r from-cyan-500 to-blue-600 hover:from-cyan-400 hover:to-blue-500 text-white shadow-lg shadow-cyan-500/25 transform hover:scale-[1.02]'
|
||||
}`}
|
||||
>
|
||||
{status !== 'idle' ? (
|
||||
<div className="flex items-center justify-center gap-3">
|
||||
<LoadingSpinner />
|
||||
<span>{status === 'generating' ? 'Dreaming up Avatar...' : 'Analyzing Features...'}</span>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-center justify-center gap-2">
|
||||
<span>Generate Model</span>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" className="h-5 w-5" viewBox="0 0 20 20" fill="currentColor">
|
||||
<path fillRule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.707-8.707l-3-3a1 1 0 00-1.414 1.414L10.586 9H7a1 1 0 100 2h3.586l-1.293 1.293a1 1 0 101.414 1.414l3-3a1 1 0 000-1.414z" clipRule="evenodd" />
|
||||
</svg>
|
||||
</div>
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default AvatarCreator;
|
||||
11
components/LoadingSpinner.tsx
Normal file
11
components/LoadingSpinner.tsx
Normal file
@ -0,0 +1,11 @@
|
||||
import React from 'react';
|
||||
|
||||
const LoadingSpinner: React.FC = () => (
|
||||
<div className="flex justify-center items-center space-x-2">
|
||||
<div className="w-4 h-4 bg-cyan-500 rounded-full animate-bounce" style={{ animationDelay: '0s' }}></div>
|
||||
<div className="w-4 h-4 bg-purple-500 rounded-full animate-bounce" style={{ animationDelay: '0.1s' }}></div>
|
||||
<div className="w-4 h-4 bg-pink-500 rounded-full animate-bounce" style={{ animationDelay: '0.2s' }}></div>
|
||||
</div>
|
||||
);
|
||||
|
||||
export default LoadingSpinner;
|
||||
224
components/RiggingEditor.tsx
Normal file
224
components/RiggingEditor.tsx
Normal file
@ -0,0 +1,224 @@
|
||||
|
||||
import React, { useState, useRef, useEffect } from 'react';
|
||||
import { Rect } from '../types';
|
||||
|
||||
interface RiggingEditorProps {
|
||||
imageUrl: string;
|
||||
initialData?: { leftEye: Rect; rightEye: Rect; mouth: Rect; skinColor: string };
|
||||
onComplete: (data: { leftEye: Rect; rightEye: Rect; mouth: Rect; skinColor: string }) => void;
|
||||
}
|
||||
|
||||
type ActiveFeature = 'leftEye' | 'rightEye' | 'mouth' | null;
|
||||
|
||||
const ResizableBox: React.FC<{
|
||||
rect: Rect;
|
||||
color: string;
|
||||
label: string;
|
||||
isActive: boolean;
|
||||
onUpdate: (rect: Rect) => void;
|
||||
onActivate: () => void;
|
||||
}> = ({ rect, color, label, isActive, onUpdate, onActivate }) => {
|
||||
const boxRef = useRef<HTMLDivElement>(null);
|
||||
const [isDragging, setIsDragging] = useState(false);
|
||||
const [isResizing, setIsResizing] = useState(false);
|
||||
const startPos = useRef({ x: 0, y: 0 });
|
||||
const startRect = useRef<Rect>({ x: 0, y: 0, w: 0, h: 0 });
|
||||
|
||||
const handleMouseDown = (e: React.MouseEvent) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
onActivate();
|
||||
setIsDragging(true);
|
||||
startPos.current = { x: e.clientX, y: e.clientY };
|
||||
startRect.current = { ...rect };
|
||||
};
|
||||
|
||||
const handleResizeDown = (e: React.MouseEvent) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
onActivate();
|
||||
setIsResizing(true);
|
||||
startPos.current = { x: e.clientX, y: e.clientY };
|
||||
startRect.current = { ...rect };
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const handleMouseMove = (e: MouseEvent) => {
|
||||
if (!isDragging && !isResizing) return;
|
||||
|
||||
const parent = boxRef.current?.parentElement;
|
||||
if (!parent) return;
|
||||
const parentRect = parent.getBoundingClientRect();
|
||||
|
||||
const deltaX = (e.clientX - startPos.current.x) / parentRect.width;
|
||||
const deltaY = (e.clientY - startPos.current.y) / parentRect.height;
|
||||
|
||||
if (isDragging) {
|
||||
onUpdate({
|
||||
...rect,
|
||||
x: startRect.current.x + deltaX,
|
||||
y: startRect.current.y + deltaY,
|
||||
});
|
||||
} else if (isResizing) {
|
||||
onUpdate({
|
||||
...rect,
|
||||
w: Math.max(0.01, startRect.current.w + deltaX),
|
||||
h: Math.max(0.01, startRect.current.h + deltaY),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
const handleMouseUp = () => {
|
||||
setIsDragging(false);
|
||||
setIsResizing(false);
|
||||
};
|
||||
|
||||
if (isDragging || isResizing) {
|
||||
window.addEventListener('mousemove', handleMouseMove);
|
||||
window.addEventListener('mouseup', handleMouseUp);
|
||||
}
|
||||
|
||||
return () => {
|
||||
window.removeEventListener('mousemove', handleMouseMove);
|
||||
window.removeEventListener('mouseup', handleMouseUp);
|
||||
};
|
||||
}, [isDragging, isResizing, rect, onUpdate]);
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={boxRef}
|
||||
onMouseDown={handleMouseDown}
|
||||
className={`absolute border-2 cursor-move group transition-colors ${isActive ? 'z-20' : 'z-10'}`}
|
||||
style={{
|
||||
left: `${rect.x * 100}%`,
|
||||
top: `${rect.y * 100}%`,
|
||||
width: `${rect.w * 100}%`,
|
||||
height: `${rect.h * 100}%`,
|
||||
borderColor: color,
|
||||
backgroundColor: isActive ? `${color}20` : 'transparent',
|
||||
}}
|
||||
>
|
||||
{/* Label */}
|
||||
<div
|
||||
className="absolute -top-6 left-0 text-xs font-bold px-1 rounded text-white whitespace-nowrap"
|
||||
style={{ backgroundColor: color }}
|
||||
>
|
||||
{label}
|
||||
</div>
|
||||
|
||||
{/* Resize Handle */}
|
||||
<div
|
||||
onMouseDown={handleResizeDown}
|
||||
className="absolute bottom-0 right-0 w-4 h-4 bg-white border-2 cursor-nwse-resize opacity-0 group-hover:opacity-100 transition-opacity"
|
||||
style={{ borderColor: color }}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const RiggingEditor: React.FC<RiggingEditorProps> = ({ imageUrl, initialData, onComplete }) => {
|
||||
const [leftEye, setLeftEye] = useState<Rect>(initialData?.leftEye || { x: 0.35, y: 0.4, w: 0.12, h: 0.08 });
|
||||
const [rightEye, setRightEye] = useState<Rect>(initialData?.rightEye || { x: 0.53, y: 0.4, w: 0.12, h: 0.08 });
|
||||
const [mouth, setMouth] = useState<Rect>(initialData?.mouth || { x: 0.45, y: 0.6, w: 0.1, h: 0.05 });
|
||||
const [skinColor, setSkinColor] = useState<string>(initialData?.skinColor || '#fcd3bf');
|
||||
const [activeFeature, setActiveFeature] = useState<ActiveFeature>(null);
|
||||
|
||||
return (
|
||||
<div className="flex flex-col items-center h-full max-w-4xl mx-auto p-4">
|
||||
<div className="text-center mb-6">
|
||||
<h2 className="text-2xl font-bold text-white mb-2">Rig Your Avatar</h2>
|
||||
<p className="text-slate-400">
|
||||
Drag and resize the boxes to match your avatar's features.
|
||||
This ensures the eyes blink correctly.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="flex gap-8 w-full items-start">
|
||||
{/* Editor Area */}
|
||||
<div className="flex-1 bg-slate-800 p-4 rounded-xl border border-slate-700 flex justify-center">
|
||||
<div className="relative inline-block select-none" style={{ width: '500px', maxWidth: '100%' }}>
|
||||
<img
|
||||
src={imageUrl}
|
||||
alt="Rigging Target"
|
||||
className="w-full h-auto rounded-lg pointer-events-none select-none block"
|
||||
draggable={false}
|
||||
/>
|
||||
|
||||
<ResizableBox
|
||||
rect={leftEye}
|
||||
color="#ef4444" // Red
|
||||
label="Left Eye"
|
||||
isActive={activeFeature === 'leftEye'}
|
||||
onUpdate={setLeftEye}
|
||||
onActivate={() => setActiveFeature('leftEye')}
|
||||
/>
|
||||
|
||||
<ResizableBox
|
||||
rect={rightEye}
|
||||
color="#3b82f6" // Blue
|
||||
label="Right Eye"
|
||||
isActive={activeFeature === 'rightEye'}
|
||||
onUpdate={setRightEye}
|
||||
onActivate={() => setActiveFeature('rightEye')}
|
||||
/>
|
||||
|
||||
<ResizableBox
|
||||
rect={mouth}
|
||||
color="#22c55e" // Green
|
||||
label="Mouth"
|
||||
isActive={activeFeature === 'mouth'}
|
||||
onUpdate={setMouth}
|
||||
onActivate={() => setActiveFeature('mouth')}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Sidebar Controls */}
|
||||
<div className="w-64 flex flex-col gap-6 bg-slate-800/50 p-6 rounded-xl border border-slate-700 h-full">
|
||||
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-slate-300 mb-2">Eyelid Color</label>
|
||||
<div className="flex items-center gap-3">
|
||||
<input
|
||||
type="color"
|
||||
value={skinColor}
|
||||
onChange={(e) => setSkinColor(e.target.value)}
|
||||
className="w-10 h-10 rounded cursor-pointer border-0 p-0"
|
||||
/>
|
||||
<span className="text-xs text-slate-400 font-mono">{skinColor}</span>
|
||||
</div>
|
||||
<p className="text-xs text-slate-500 mt-2">
|
||||
Pick the color of the skin above the eyes for realistic blinking.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="space-y-2">
|
||||
<div className="flex items-center gap-2 text-sm text-slate-300">
|
||||
<div className="w-3 h-3 bg-red-500 rounded-full"></div>
|
||||
<span>Left Eye Box</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 text-sm text-slate-300">
|
||||
<div className="w-3 h-3 bg-blue-500 rounded-full"></div>
|
||||
<span>Right Eye Box</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 text-sm text-slate-300">
|
||||
<div className="w-3 h-3 bg-green-500 rounded-full"></div>
|
||||
<span>Mouth Box</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="mt-auto pt-6">
|
||||
<button
|
||||
onClick={() => onComplete({ leftEye, rightEye, mouth, skinColor })}
|
||||
className="w-full py-3 bg-gradient-to-r from-cyan-500 to-blue-600 hover:from-cyan-400 hover:to-blue-500 text-white rounded-xl font-bold shadow-lg shadow-cyan-500/25 transform hover:scale-[1.02] transition-all"
|
||||
>
|
||||
Finish Rigging
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default RiggingEditor;
|
||||
259
components/Studio.tsx
Normal file
259
components/Studio.tsx
Normal file
@ -0,0 +1,259 @@
|
||||
import React, { useEffect, useRef, useState } from 'react';
|
||||
import { useFaceTracking } from '../hooks/useFaceTracking';
|
||||
import { AvatarConfig } from '../types';
|
||||
|
||||
interface StudioProps {
|
||||
avatar: AvatarConfig;
|
||||
onBack: () => void;
|
||||
}
|
||||
|
||||
const Studio: React.FC<StudioProps> = ({ avatar, onBack }) => {
|
||||
const videoRef = useRef<HTMLVideoElement>(null);
|
||||
const [cameraReady, setCameraReady] = useState(false);
|
||||
|
||||
// We use the custom hook to get tracking data
|
||||
const { trackingData, isLoading: isModelLoading, startTracking } = useFaceTracking(videoRef.current);
|
||||
|
||||
// Initialize Camera
|
||||
useEffect(() => {
|
||||
const startCamera = async () => {
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({
|
||||
video: { width: 640, height: 480 }, // Lower res is fine for tracking
|
||||
audio: false
|
||||
});
|
||||
if (videoRef.current) {
|
||||
videoRef.current.srcObject = stream;
|
||||
videoRef.current.onloadeddata = () => {
|
||||
setCameraReady(true);
|
||||
};
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("Error accessing camera:", err);
|
||||
alert("Could not access camera. Please ensure permissions are granted.");
|
||||
}
|
||||
};
|
||||
|
||||
startCamera();
|
||||
|
||||
return () => {
|
||||
// Cleanup stream
|
||||
if (videoRef.current && videoRef.current.srcObject) {
|
||||
const stream = videoRef.current.srcObject as MediaStream;
|
||||
stream.getTracks().forEach(track => track.stop());
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
// Start tracking when both camera and model are ready
|
||||
useEffect(() => {
|
||||
if (cameraReady && !isModelLoading) {
|
||||
startTracking();
|
||||
}
|
||||
}, [cameraReady, isModelLoading, startTracking]);
|
||||
|
||||
// Calculate styles based on tracking data
|
||||
const getAvatarStyle = () => {
|
||||
// Deadzone for jitter reduction
|
||||
const smooth = (val: number) => Math.abs(val) < 0.02 ? 0 : val;
|
||||
|
||||
const rX = smooth(trackingData.rotationX); // Pitch
|
||||
const rY = smooth(trackingData.rotationY); // Yaw
|
||||
const rZ = smooth(trackingData.rotationZ); // Roll
|
||||
const tX = smooth(trackingData.translationX);
|
||||
const tY = smooth(trackingData.translationY);
|
||||
|
||||
// Bounce effect on mouth open (Speaking emulation)
|
||||
const bounce = trackingData.mouthOpen > 0.1 ? -5 * trackingData.mouthOpen : 0;
|
||||
|
||||
return {
|
||||
transform: `
|
||||
translate(${tX * 150}px, ${tY * 100 + bounce}px)
|
||||
rotate(${rZ * 1}rad)
|
||||
perspective(500px)
|
||||
rotateX(${rX * 15}deg)
|
||||
rotateY(${rY * -25}deg)
|
||||
scale(${1 + trackingData.mouthOpen * 0.02})
|
||||
`,
|
||||
filter: `brightness(${1 + trackingData.mouthOpen * 0.05})`, // Slight flash when speaking
|
||||
transition: 'transform 0.1s ease-out, filter 0.1s ease'
|
||||
};
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="h-screen w-full flex flex-col bg-slate-900 overflow-hidden relative">
|
||||
{/* Hidden Video Element for Tracking */}
|
||||
<video
|
||||
ref={videoRef}
|
||||
autoPlay
|
||||
playsInline
|
||||
muted
|
||||
className="absolute opacity-0 pointer-events-none w-1 h-1"
|
||||
/>
|
||||
|
||||
{/* Top Bar */}
|
||||
<div className="absolute top-0 left-0 right-0 z-20 p-4 flex justify-between items-center bg-gradient-to-b from-slate-900 to-transparent">
|
||||
<button
|
||||
onClick={onBack}
|
||||
className="px-4 py-2 bg-slate-800/80 hover:bg-slate-700 backdrop-blur rounded-lg text-white font-medium transition-colors border border-slate-600"
|
||||
>
|
||||
← Exit Studio
|
||||
</button>
|
||||
<div className="flex gap-2">
|
||||
<div className={`px-3 py-1 rounded-full text-xs font-bold flex items-center gap-2 ${isModelLoading ? 'bg-yellow-500/20 text-yellow-400' : 'bg-green-500/20 text-green-400'}`}>
|
||||
<span className={`w-2 h-2 rounded-full ${isModelLoading ? 'bg-yellow-400 animate-pulse' : 'bg-green-400'}`}></span>
|
||||
{isModelLoading ? 'Loading Vision Model...' : 'Tracking Active'}
|
||||
</div>
|
||||
<div className="px-3 py-1 rounded-full text-xs font-bold bg-purple-500/20 text-purple-400 border border-purple-500/30">
|
||||
{avatar.name}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Main Stage */}
|
||||
<div className="flex-1 relative flex items-center justify-center overflow-hidden">
|
||||
{/* Background Grid/Effect */}
|
||||
<div className="absolute inset-0 opacity-20"
|
||||
style={{
|
||||
backgroundImage: 'radial-gradient(#4f46e5 1px, transparent 1px)',
|
||||
backgroundSize: '30px 30px'
|
||||
}}>
|
||||
</div>
|
||||
|
||||
<div className="absolute inset-0 bg-gradient-to-t from-slate-900 via-transparent to-slate-900 pointer-events-none"></div>
|
||||
|
||||
{/* Avatar Container */}
|
||||
<div className="relative w-[600px] h-[600px] flex items-center justify-center z-10">
|
||||
<div
|
||||
className="relative w-full h-full flex items-center justify-center"
|
||||
style={getAvatarStyle()}
|
||||
>
|
||||
<img
|
||||
src={avatar.imageUrl}
|
||||
alt="Avatar"
|
||||
className="w-full h-full object-contain drop-shadow-[0_0_15px_rgba(168,85,247,0.5)]"
|
||||
/>
|
||||
|
||||
{/* Dynamic Eyelids */}
|
||||
{avatar.leftEye && avatar.skinColor && (
|
||||
<div
|
||||
className="absolute pointer-events-none"
|
||||
style={{
|
||||
left: `${avatar.leftEye.x * 100}%`,
|
||||
top: `${avatar.leftEye.y * 100}%`,
|
||||
width: `${avatar.leftEye.w * 100}%`,
|
||||
height: `${avatar.leftEye.h * 100}%`,
|
||||
backgroundColor: avatar.skinColor,
|
||||
transform: `scaleY(${trackingData.isBlinkingLeft ? 1 : 0})`,
|
||||
transformOrigin: 'top',
|
||||
transition: 'transform 0.1s cubic-bezier(0.4, 0, 0.2, 1)', // Snappy blink
|
||||
borderRadius: '0 0 40% 40%'
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
|
||||
{avatar.rightEye && avatar.skinColor && (
|
||||
<div
|
||||
className="absolute pointer-events-none"
|
||||
style={{
|
||||
left: `${avatar.rightEye.x * 100}%`,
|
||||
top: `${avatar.rightEye.y * 100}%`,
|
||||
width: `${avatar.rightEye.w * 100}%`,
|
||||
height: `${avatar.rightEye.h * 100}%`,
|
||||
backgroundColor: avatar.skinColor,
|
||||
transform: `scaleY(${trackingData.isBlinkingRight ? 1 : 0})`,
|
||||
transformOrigin: 'top',
|
||||
transition: 'transform 0.1s cubic-bezier(0.4, 0, 0.2, 1)', // Snappy blink
|
||||
borderRadius: '0 0 40% 40%'
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Dynamic Mouth Animation */}
|
||||
{avatar.mouth && (
|
||||
<div
|
||||
className="absolute pointer-events-none flex items-center justify-center z-10"
|
||||
style={{
|
||||
left: `${avatar.mouth.x * 100}%`,
|
||||
top: `${avatar.mouth.y * 100}%`,
|
||||
width: `${avatar.mouth.w * 100}%`,
|
||||
height: `${avatar.mouth.h * 100}%`,
|
||||
}}
|
||||
>
|
||||
{/* Skin Patch - Hides the static closed mouth when speaking */}
|
||||
<div
|
||||
className="absolute w-[120%] h-[120%] transition-opacity duration-75"
|
||||
style={{
|
||||
backgroundColor: avatar.skinColor || '#fcd3bf',
|
||||
opacity: trackingData.mouthOpen > 0.1 ? 1 : 0,
|
||||
filter: 'blur(3px)', // Blends edges
|
||||
borderRadius: '40%'
|
||||
}}
|
||||
/>
|
||||
|
||||
{/* Mouth Interior - Scales based on mouth openness */}
|
||||
<div
|
||||
className="relative w-full h-full bg-[#4a1212] border-2 border-[#2d0a0a] overflow-hidden origin-center transition-transform duration-75"
|
||||
style={{
|
||||
borderRadius: '50% 50% 50% 50% / 50% 50% 30% 30%', // Slightly more jaw-like shape
|
||||
// trackingData.mouthOpen is 0-1. We amplify it for better visuals.
|
||||
transform: `scaleY(${Math.min(1.2, trackingData.mouthOpen * 4)}) scaleX(${0.9 + trackingData.mouthOpen * 0.1})`,
|
||||
opacity: trackingData.mouthOpen > 0.05 ? 1 : 0,
|
||||
}}
|
||||
>
|
||||
{/* Tongue */}
|
||||
<div
|
||||
className="absolute bottom-[-20%] left-1/2 -translate-x-1/2 w-[80%] h-[60%] bg-[#d45d5d] rounded-t-full"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Optional: Status Indicator overlay if tracking is lost (all 0s usually) or just visual flair */}
|
||||
{(!cameraReady) && (
|
||||
<div className="absolute inset-0 flex items-center justify-center bg-slate-900/80 z-20 rounded-xl backdrop-blur-sm">
|
||||
<div className="text-cyan-400 animate-pulse font-mono">INITIALIZING CAMERA LINK...</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Control Deck */}
|
||||
<div className="h-24 bg-slate-800 border-t border-slate-700 p-4 flex justify-center items-center gap-6 z-20">
|
||||
<div className="flex flex-col items-center">
|
||||
<span className="text-xs text-slate-400 mb-1 font-mono">MOUTH</span>
|
||||
<div className="w-24 h-2 bg-slate-700 rounded-full overflow-hidden">
|
||||
<div className="h-full bg-cyan-400 transition-all duration-75" style={{ width: `${Math.min(trackingData.mouthOpen * 100, 100)}%` }}></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col items-center">
|
||||
<span className="text-xs text-slate-400 mb-1 font-mono">HEAD ROLL</span>
|
||||
<div className="w-24 h-2 bg-slate-700 rounded-full overflow-hidden flex justify-center relative">
|
||||
{/* Center marker */}
|
||||
<div className="absolute w-[1px] h-full bg-slate-500 left-1/2"></div>
|
||||
<div
|
||||
className="h-full bg-purple-500 transition-all duration-75 absolute"
|
||||
style={{
|
||||
width: `${Math.abs(trackingData.rotationZ * 50)}%`,
|
||||
left: trackingData.rotationZ < 0 ? 'auto' : '50%',
|
||||
right: trackingData.rotationZ < 0 ? '50%' : 'auto'
|
||||
}}
|
||||
></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col items-center">
|
||||
<span className="text-xs text-slate-400 mb-1 font-mono">BLINK</span>
|
||||
<div className="flex gap-2">
|
||||
<div className={`w-8 h-2 rounded-full ${trackingData.isBlinkingLeft ? 'bg-pink-500' : 'bg-slate-700'}`}></div>
|
||||
<div className={`w-8 h-2 rounded-full ${trackingData.isBlinkingRight ? 'bg-pink-500' : 'bg-slate-700'}`}></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default Studio;
|
||||
140
hooks/useFaceTracking.ts
Normal file
140
hooks/useFaceTracking.ts
Normal file
@ -0,0 +1,140 @@
|
||||
import { useEffect, useRef, useState, useCallback } from 'react';
|
||||
import { FaceLandmarker, FilesetResolver, DrawingUtils } from '@mediapipe/tasks-vision';
|
||||
import { TrackingData } from '../types';
|
||||
|
||||
export const useFaceTracking = (videoElement: HTMLVideoElement | null) => {
|
||||
const [isTracking, setIsTracking] = useState(false);
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
const faceLandmarkerRef = useRef<FaceLandmarker | null>(null);
|
||||
const requestRef = useRef<number | null>(null);
|
||||
const lastVideoTimeRef = useRef<number>(-1);
|
||||
const [trackingData, setTrackingData] = useState<TrackingData>({
|
||||
rotationX: 0,
|
||||
rotationY: 0,
|
||||
rotationZ: 0,
|
||||
translationX: 0,
|
||||
translationY: 0,
|
||||
mouthOpen: 0,
|
||||
isBlinkingLeft: false,
|
||||
isBlinkingRight: false,
|
||||
});
|
||||
|
||||
// Initialize FaceLandmarker
|
||||
useEffect(() => {
|
||||
const initMediaPipe = async () => {
|
||||
try {
|
||||
// Use specific version to match index.html import and prevent version mismatch
|
||||
const filesetResolver = await FilesetResolver.forVisionTasks(
|
||||
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.18/wasm"
|
||||
);
|
||||
|
||||
faceLandmarkerRef.current = await FaceLandmarker.createFromOptions(filesetResolver, {
|
||||
baseOptions: {
|
||||
modelAssetPath: `https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task`,
|
||||
delegate: "GPU"
|
||||
},
|
||||
outputFaceBlendshapes: true,
|
||||
outputFacialTransformationMatrixes: true,
|
||||
runningMode: "VIDEO",
|
||||
numFaces: 1
|
||||
});
|
||||
|
||||
setIsLoading(false);
|
||||
} catch (error) {
|
||||
console.error("Failed to load MediaPipe:", error);
|
||||
setIsLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
initMediaPipe();
|
||||
|
||||
return () => {
|
||||
faceLandmarkerRef.current?.close();
|
||||
};
|
||||
}, []);
|
||||
|
||||
const predict = useCallback(() => {
|
||||
if (!faceLandmarkerRef.current || !videoElement) return;
|
||||
|
||||
// Only predict if video is ready and playing
|
||||
if (videoElement.readyState < 2) return;
|
||||
|
||||
const nowInMs = Date.now();
|
||||
if (lastVideoTimeRef.current !== videoElement.currentTime) {
|
||||
lastVideoTimeRef.current = videoElement.currentTime;
|
||||
|
||||
const results = faceLandmarkerRef.current.detectForVideo(videoElement, nowInMs);
|
||||
|
||||
if (results.faceLandmarks && results.faceLandmarks.length > 0) {
|
||||
// 1. Extract Blendshapes for Expression
|
||||
const blendshapes = results.faceBlendshapes?.[0]?.categories;
|
||||
|
||||
let mouthOpen = 0;
|
||||
let eyeBlinkLeft = 0;
|
||||
let eyeBlinkRight = 0;
|
||||
|
||||
if (blendshapes) {
|
||||
mouthOpen = blendshapes.find(c => c.categoryName === 'jawOpen')?.score || 0;
|
||||
eyeBlinkLeft = blendshapes.find(c => c.categoryName === 'eyeBlinkLeft')?.score || 0;
|
||||
eyeBlinkRight = blendshapes.find(c => c.categoryName === 'eyeBlinkRight')?.score || 0;
|
||||
}
|
||||
|
||||
// 2. Estimate Pose (simplified)
|
||||
// MediaPipe gives a matrix, but often for 2D avatars, simple landmark delta is cleaner.
|
||||
// We use specific landmarks to calculate roll, yaw, pitch approximation.
|
||||
const landmarks = results.faceLandmarks[0];
|
||||
|
||||
// Roll: Angle between eyes
|
||||
const leftEye = landmarks[33]; // Outer left eye
|
||||
const rightEye = landmarks[263]; // Outer right eye
|
||||
const dy = rightEye.y - leftEye.y;
|
||||
const dx = rightEye.x - leftEye.x;
|
||||
const roll = Math.atan2(dy, dx);
|
||||
|
||||
// Yaw: Nose offset from center of eyes
|
||||
const nose = landmarks[1];
|
||||
const midPointX = (leftEye.x + rightEye.x) / 2;
|
||||
const yaw = (nose.x - midPointX) * 2; // sensitivity
|
||||
|
||||
// Pitch: Nose offset vertical
|
||||
const midPointY = (leftEye.y + rightEye.y) / 2;
|
||||
const pitch = (nose.y - midPointY) * 2;
|
||||
|
||||
// Translation
|
||||
const transX = (nose.x - 0.5) * 2;
|
||||
const transY = (nose.y - 0.5) * 2;
|
||||
|
||||
setTrackingData({
|
||||
rotationZ: roll,
|
||||
rotationY: yaw,
|
||||
rotationX: pitch,
|
||||
translationX: transX,
|
||||
translationY: transY,
|
||||
mouthOpen,
|
||||
isBlinkingLeft: eyeBlinkLeft > 0.5,
|
||||
isBlinkingRight: eyeBlinkRight > 0.5
|
||||
});
|
||||
}
|
||||
}
|
||||
requestRef.current = requestAnimationFrame(predict);
|
||||
}, [videoElement]);
|
||||
|
||||
const startTracking = useCallback(() => {
|
||||
setIsTracking(true);
|
||||
requestRef.current = requestAnimationFrame(predict);
|
||||
}, [predict]);
|
||||
|
||||
const stopTracking = useCallback(() => {
|
||||
setIsTracking(false);
|
||||
if (requestRef.current) {
|
||||
cancelAnimationFrame(requestRef.current);
|
||||
}
|
||||
}, []);
|
||||
|
||||
return {
|
||||
isLoading,
|
||||
trackingData,
|
||||
startTracking,
|
||||
stopTracking
|
||||
};
|
||||
};
|
||||
45
index.html
Normal file
45
index.html
Normal file
@ -0,0 +1,45 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Gemini V-Studio</title>
|
||||
<script src="https://cdn.tailwindcss.com"></script>
|
||||
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Space+Grotesk:wght@400;700&display=swap" rel="stylesheet">
|
||||
<style>
|
||||
body {
|
||||
font-family: 'Inter', sans-serif;
|
||||
background-color: #0f172a;
|
||||
color: #f8fafc;
|
||||
}
|
||||
h1, h2, h3, .brand-font {
|
||||
font-family: 'Space Grotesk', sans-serif;
|
||||
}
|
||||
/* Hide scrollbar for cleaner UI */
|
||||
::-webkit-scrollbar {
|
||||
width: 8px;
|
||||
}
|
||||
::-webkit-scrollbar-track {
|
||||
background: #0f172a;
|
||||
}
|
||||
::-webkit-scrollbar-thumb {
|
||||
background: #334155;
|
||||
border-radius: 4px;
|
||||
}
|
||||
</style>
|
||||
<script type="importmap">
|
||||
{
|
||||
"imports": {
|
||||
"react/": "https://aistudiocdn.com/react@^19.2.0/",
|
||||
"react": "https://aistudiocdn.com/react@^19.2.0",
|
||||
"react-dom/": "https://aistudiocdn.com/react-dom@^19.2.0/",
|
||||
"@google/genai": "https://aistudiocdn.com/@google/genai@^1.30.0",
|
||||
"@mediapipe/tasks-vision": "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.18/+esm"
|
||||
}
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
</body>
|
||||
</html>
|
||||
15
index.tsx
Normal file
15
index.tsx
Normal file
@ -0,0 +1,15 @@
|
||||
import React from 'react';
|
||||
import ReactDOM from 'react-dom/client';
|
||||
import App from './App';
|
||||
|
||||
const rootElement = document.getElementById('root');
|
||||
if (!rootElement) {
|
||||
throw new Error("Could not find root element to mount to");
|
||||
}
|
||||
|
||||
const root = ReactDOM.createRoot(rootElement);
|
||||
root.render(
|
||||
<React.StrictMode>
|
||||
<App />
|
||||
</React.StrictMode>
|
||||
);
|
||||
7
metadata.json
Normal file
7
metadata.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"name": "Gemini V-Studio",
|
||||
"description": "Create your own VTuber avatar using Gemini 3 Pro and animate it in real-time using MediaPipe face tracking.",
|
||||
"requestFramePermissions": [
|
||||
"camera"
|
||||
]
|
||||
}
|
||||
23
package.json
Normal file
23
package.json
Normal file
@ -0,0 +1,23 @@
|
||||
{
|
||||
"name": "gemini-v-studio",
|
||||
"private": true,
|
||||
"version": "0.0.0",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "vite build",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"react": "^19.2.0",
|
||||
"react-dom": "^19.2.0",
|
||||
"@google/genai": "^1.30.0",
|
||||
"@mediapipe/tasks-vision": "0.10.18"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.14.0",
|
||||
"@vitejs/plugin-react": "^5.0.0",
|
||||
"typescript": "~5.8.2",
|
||||
"vite": "^6.2.0"
|
||||
}
|
||||
}
|
||||
53
services/geminiService.ts
Normal file
53
services/geminiService.ts
Normal file
@ -0,0 +1,53 @@
|
||||
import { GoogleGenAI } from "@google/genai";
|
||||
|
||||
/**
|
||||
* Generates a VTuber avatar image based on user description.
|
||||
* Uses gemini-3-pro-image-preview for high quality.
|
||||
*/
|
||||
export const generateAvatarImage = async (description: string): Promise<string> => {
|
||||
try {
|
||||
// Initialize client inside the function to ensure we use the most up-to-date API key
|
||||
// after the user has completed the selection flow.
|
||||
const ai = new GoogleGenAI({ apiKey: process.env.API_KEY });
|
||||
|
||||
// We construct a prompt that encourages a good format for a 2D avatar (front facing, clean background)
|
||||
const prompt = `
|
||||
Create a high-quality, flat 2D anime or stylized character illustration suitable for a VTuber avatar.
|
||||
The character should be facing forward (front view).
|
||||
The background should be a solid, single color (white or bright green) to allow for easy removal or masking.
|
||||
|
||||
Character Description: ${description}
|
||||
|
||||
Style: Vibrant, clean lines, detailed eyes.
|
||||
Focus: Head and shoulders only.
|
||||
`;
|
||||
|
||||
const response = await ai.models.generateContent({
|
||||
model: 'gemini-3-pro-image-preview',
|
||||
contents: {
|
||||
parts: [
|
||||
{ text: prompt }
|
||||
]
|
||||
},
|
||||
config: {
|
||||
imageConfig: {
|
||||
aspectRatio: "1:1",
|
||||
imageSize: "1K"
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Parse response for image data
|
||||
for (const part of response.candidates[0].content.parts) {
|
||||
if (part.inlineData) {
|
||||
const base64EncodeString = part.inlineData.data;
|
||||
return `data:image/png;base64,${base64EncodeString}`;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error("No image data found in response");
|
||||
} catch (error) {
|
||||
console.error("Error generating avatar:", error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
128
services/visionService.ts
Normal file
128
services/visionService.ts
Normal file
@ -0,0 +1,128 @@
|
||||
|
||||
import { FaceLandmarker, FilesetResolver } from '@mediapipe/tasks-vision';
|
||||
import { Rect } from '../types';
|
||||
|
||||
let faceLandmarker: FaceLandmarker | null = null;
|
||||
|
||||
// Initialize the vision model for static image analysis
|
||||
const initVision = async () => {
|
||||
if (faceLandmarker) return;
|
||||
|
||||
try {
|
||||
const filesetResolver = await FilesetResolver.forVisionTasks(
|
||||
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.18/wasm"
|
||||
);
|
||||
|
||||
faceLandmarker = await FaceLandmarker.createFromOptions(filesetResolver, {
|
||||
baseOptions: {
|
||||
modelAssetPath: `https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task`,
|
||||
delegate: "GPU"
|
||||
},
|
||||
runningMode: "IMAGE",
|
||||
numFaces: 1
|
||||
});
|
||||
} catch (e) {
|
||||
console.error("Failed to initialize vision service:", e);
|
||||
}
|
||||
};
|
||||
|
||||
export const analyzeAvatarImage = async (imageUrl: string): Promise<{ leftEye: Rect, rightEye: Rect, mouth: Rect, skinColor: string } | null> => {
|
||||
try {
|
||||
await initVision();
|
||||
if (!faceLandmarker) return null;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const img = new Image();
|
||||
img.crossOrigin = "anonymous";
|
||||
img.onload = () => {
|
||||
try {
|
||||
const result = faceLandmarker!.detect(img);
|
||||
|
||||
if (result.faceLandmarks && result.faceLandmarks.length > 0) {
|
||||
const landmarks = result.faceLandmarks[0];
|
||||
|
||||
// Helper to calculate bounding box from landmark indices
|
||||
const getRect = (indices: number[]): Rect => {
|
||||
let minX = 1, minY = 1, maxX = 0, maxY = 0;
|
||||
|
||||
indices.forEach(i => {
|
||||
const l = landmarks[i];
|
||||
if (l.x < minX) minX = l.x;
|
||||
if (l.x > maxX) maxX = l.x;
|
||||
if (l.y < minY) minY = l.y;
|
||||
if (l.y > maxY) maxY = l.y;
|
||||
});
|
||||
|
||||
const w = maxX - minX;
|
||||
const h = maxY - minY;
|
||||
|
||||
// Expand slightly to cover the area comfortably
|
||||
const paddingX = w * 0.1;
|
||||
const paddingY = h * 0.1;
|
||||
|
||||
return {
|
||||
x: minX - paddingX,
|
||||
y: minY - paddingY,
|
||||
w: w + (paddingX * 2),
|
||||
h: h + (paddingY * 2),
|
||||
};
|
||||
};
|
||||
|
||||
// MediaPipe Mesh Indices
|
||||
const leftEyeIndices = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246];
|
||||
const rightEyeIndices = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398];
|
||||
const mouthIndices = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146];
|
||||
|
||||
const leftRect = getRect(leftEyeIndices);
|
||||
const rightRect = getRect(rightEyeIndices);
|
||||
const mouthRect = getRect(mouthIndices);
|
||||
|
||||
// Sample Skin Color
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = img.width;
|
||||
canvas.height = img.height;
|
||||
const ctx = canvas.getContext('2d');
|
||||
|
||||
let color = '#fcd3bf'; // Default fallback
|
||||
|
||||
if (ctx) {
|
||||
ctx.drawImage(img, 0, 0);
|
||||
|
||||
// Landmark 123 is on the left cheek bone area
|
||||
const sampleIdx = 123;
|
||||
const lx = Math.floor(landmarks[sampleIdx].x * img.width);
|
||||
const ly = Math.floor(landmarks[sampleIdx].y * img.height);
|
||||
|
||||
if (lx >= 0 && lx < img.width && ly >= 0 && ly < img.height) {
|
||||
const pixel = ctx.getImageData(lx, ly, 1, 1).data;
|
||||
// Convert rgb to hex for input type="color"
|
||||
const toHex = (c: number) => {
|
||||
const hex = c.toString(16);
|
||||
return hex.length === 1 ? "0" + hex : hex;
|
||||
};
|
||||
color = `#${toHex(pixel[0])}${toHex(pixel[1])}${toHex(pixel[2])}`;
|
||||
}
|
||||
}
|
||||
|
||||
resolve({
|
||||
leftEye: leftRect,
|
||||
rightEye: rightRect,
|
||||
mouth: mouthRect,
|
||||
skinColor: color
|
||||
});
|
||||
} else {
|
||||
console.warn("No face detected in generated image");
|
||||
resolve(null);
|
||||
}
|
||||
} catch (e) {
|
||||
reject(e);
|
||||
}
|
||||
};
|
||||
img.onerror = () => reject(new Error("Failed to load image for analysis"));
|
||||
img.src = imageUrl;
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("Analysis failed", error);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
29
tsconfig.json
Normal file
29
tsconfig.json
Normal file
@ -0,0 +1,29 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"experimentalDecorators": true,
|
||||
"useDefineForClassFields": false,
|
||||
"module": "ESNext",
|
||||
"lib": [
|
||||
"ES2022",
|
||||
"DOM",
|
||||
"DOM.Iterable"
|
||||
],
|
||||
"skipLibCheck": true,
|
||||
"types": [
|
||||
"node"
|
||||
],
|
||||
"moduleResolution": "bundler",
|
||||
"isolatedModules": true,
|
||||
"moduleDetection": "force",
|
||||
"allowJs": true,
|
||||
"jsx": "react-jsx",
|
||||
"paths": {
|
||||
"@/*": [
|
||||
"./*"
|
||||
]
|
||||
},
|
||||
"allowImportingTsExtensions": true,
|
||||
"noEmit": true
|
||||
}
|
||||
}
|
||||
40
types.ts
Normal file
40
types.ts
Normal file
@ -0,0 +1,40 @@
|
||||
|
||||
export enum AppState {
|
||||
SETUP = 'SETUP',
|
||||
CREATION = 'CREATION',
|
||||
RIGGING = 'RIGGING',
|
||||
STUDIO = 'STUDIO',
|
||||
}
|
||||
|
||||
export interface Rect {
|
||||
x: number;
|
||||
y: number;
|
||||
w: number;
|
||||
h: number;
|
||||
}
|
||||
|
||||
export interface AvatarConfig {
|
||||
imageUrl: string;
|
||||
name: string;
|
||||
description: string;
|
||||
leftEye?: Rect;
|
||||
rightEye?: Rect;
|
||||
mouth?: Rect;
|
||||
skinColor?: string;
|
||||
}
|
||||
|
||||
export interface TrackingData {
|
||||
rotationX: number; // Pitch
|
||||
rotationY: number; // Yaw
|
||||
rotationZ: number; // Roll
|
||||
translationX: number;
|
||||
translationY: number;
|
||||
mouthOpen: number;
|
||||
isBlinkingLeft: boolean;
|
||||
isBlinkingRight: boolean;
|
||||
}
|
||||
|
||||
export interface AIStudio {
|
||||
hasSelectedApiKey(): Promise<boolean>;
|
||||
openSelectKey(): Promise<void>;
|
||||
}
|
||||
23
vite.config.ts
Normal file
23
vite.config.ts
Normal file
@ -0,0 +1,23 @@
|
||||
import path from 'path';
|
||||
import { defineConfig, loadEnv } from 'vite';
|
||||
import react from '@vitejs/plugin-react';
|
||||
|
||||
export default defineConfig(({ mode }) => {
|
||||
const env = loadEnv(mode, '.', '');
|
||||
return {
|
||||
server: {
|
||||
port: 3000,
|
||||
host: '0.0.0.0',
|
||||
},
|
||||
plugins: [react()],
|
||||
define: {
|
||||
'process.env.API_KEY': JSON.stringify(env.GEMINI_API_KEY),
|
||||
'process.env.GEMINI_API_KEY': JSON.stringify(env.GEMINI_API_KEY)
|
||||
},
|
||||
resolve: {
|
||||
alias: {
|
||||
'@': path.resolve(__dirname, '.'),
|
||||
}
|
||||
}
|
||||
};
|
||||
});
|
||||
Loading…
Reference in New Issue
Block a user