refactor: Improve avatar asset processing

Separates asset analysis into distinct steps to accurately capture face landmarks. Introduces `fileToDataUrl` utility and modifies `stitchAssets` to accept image source strings, reducing redundant file processing and improving clarity.
2025-11-20 22:03:53 +01:00 · 2025-11-20 22:03:53 +01:00 · 5078d67d4f
commit 5078d67d4f
parent ddb2455416
2 changed files with 69 additions and 22 deletions
--- a/components/AvatarCreator.tsx
+++ b/components/AvatarCreator.tsx
@ -1,7 +1,7 @@
 import React, { useState } from 'react';
 import { generateAvatarImage } from '../services/geminiService';
 import { analyzeAvatarImage } from '../services/visionService';
-import { stitchAssets } from '../services/imageService';
+import { stitchAssets, fileToDataUrl } from '../services/imageService';
 import LoadingSpinner from './LoadingSpinner';
 import { Rect } from '../types';

@ -61,23 +61,72 @@ const AvatarCreator: React.FC<AvatarCreatorProps> = ({ onAvatarGenerated }) => {
    setError(null);

    try {
-      // 1. Stitch Assets into Sheet
-      const { imageUrl, mainBody, textureClosedEye, textureOpenMouth } = await stitchAssets(baseFile, blinkFile || undefined, talkFile || undefined);
+      // 1. Prepare Base Image and Analyze it separately
+      // Analyzing separately ensures we get landmarks for the main face correctly 
+      // without interference from other faces in a stitched sheet.
+      const baseDataUrl = await fileToDataUrl(baseFile);
+      const baseAnalysis = await analyzeAvatarImage(baseDataUrl);

-      // 2. Analyze the Main Body part of the image
-      // Note: analyzeAvatarImage analyzes the whole image, but since we put the face on the left (or full image), 
-      // it should find the face correctly.
-      setStatus('analyzing');
-      const analysisData = await analyzeAvatarImage(imageUrl);
+      // 2. Prepare and Analyze Variant Images
+      let blinkDataUrl, blinkAnalysis;
+      if (blinkFile) {
+        blinkDataUrl = await fileToDataUrl(blinkFile);
+        // Try to find eyes in the blink image to use as tight texture crop
+        blinkAnalysis = await analyzeAvatarImage(blinkDataUrl);
+      }

-      // 3. Combine manual stitch data with automatic vision data
-      const initialData = {
-        ...(analysisData || {}),
+      let talkDataUrl, talkAnalysis;
+      if (talkFile) {
+        talkDataUrl = await fileToDataUrl(talkFile);
+        // Try to find mouth in the talk image
+        talkAnalysis = await analyzeAvatarImage(talkDataUrl);
+      }
+
+      // 3. Stitch Assets into Sheet
+      const { imageUrl, mainBody, textureClosedEye: stitchBlinkRect, textureOpenMouth: stitchTalkRect } = await stitchAssets(baseDataUrl, blinkDataUrl, talkDataUrl);
+
+      // 4. Map Analysis Data to Stitched Coordinate Space
+      
+      // Helper to map a rect from (0-1 in sub-image) to (0-1 in stitched-image)
+      const mapRect = (r: Rect, container: Rect) => ({
+        x: container.x + r.x * container.w,
+        y: container.y + r.y * container.h,
+        w: r.w * container.w,
+        h: r.h * container.h
+      });
+
+      let initialData: any = {
        mainBody,
-        textureClosedEye,
-        textureOpenMouth
+        textureClosedEye: stitchBlinkRect,
+        textureOpenMouth: stitchTalkRect
      };

+      // Map Base Targets (Eyes, Mouth on main body)
+      if (baseAnalysis) {
+        initialData.leftEye = mapRect(baseAnalysis.leftEye, mainBody);
+        initialData.rightEye = mapRect(baseAnalysis.rightEye, mainBody);
+        initialData.mouth = mapRect(baseAnalysis.mouth, mainBody);
+        initialData.skinColor = baseAnalysis.skinColor;
+      }
+
+      // Map Source Textures (Tight crop around features if detected)
+      // If detections fail (e.g. eyes closed might not be detected), we fall back to the whole image (stitchBlinkRect)
+      if (blinkAnalysis && stitchBlinkRect) {
+         // Calculate a bounding box around both eyes in the blink image
+         const be = blinkAnalysis;
+         const minX = Math.min(be.leftEye.x, be.rightEye.x);
+         const minY = Math.min(be.leftEye.y, be.rightEye.y);
+         const maxX = Math.max(be.leftEye.x + be.leftEye.w, be.rightEye.x + be.rightEye.w);
+         const maxY = Math.max(be.leftEye.y + be.leftEye.h, be.rightEye.y + be.rightEye.h);
+         
+         const eyesRect = { x: minX, y: minY, w: maxX - minX, h: maxY - minY };
+         initialData.textureClosedEye = mapRect(eyesRect, stitchBlinkRect);
+      }
+
+      if (talkAnalysis && stitchTalkRect) {
+         initialData.textureOpenMouth = mapRect(talkAnalysis.mouth, stitchTalkRect);
+      }
+
      onAvatarGenerated(imageUrl, name, initialData);
    } catch (err) {
      console.error(err);
--- a/services/imageService.ts
+++ b/services/imageService.ts
@ -20,16 +20,14 @@ export const loadImage = (src: string): Promise<HTMLImageElement> => {
 };

 export const stitchAssets = async (
-  base: File,
-  blink?: File,
-  talk?: File
+  baseSrc: string,
+  blinkSrc?: string,
+  talkSrc?: string
 ): Promise<{ imageUrl: string; mainBody: Rect; textureClosedEye?: Rect; textureOpenMouth?: Rect }> => {
    // Load images
-    const baseData = await fileToDataUrl(base);
-    const baseImg = await loadImage(baseData);
-
-    const blinkImg = blink ? await loadImage(await fileToDataUrl(blink)) : null;
-    const talkImg = talk ? await loadImage(await fileToDataUrl(talk)) : null;
+    const baseImg = await loadImage(baseSrc);
+    const blinkImg = blinkSrc ? await loadImage(blinkSrc) : null;
+    const talkImg = talkSrc ? await loadImage(talkSrc) : null;

    // Layout: Base on Left. Sidebar on Right containing Blink (top) and Talk (bottom).
    // Sidebar width = max(blink.width, talk.width)
@ -38,7 +36,7 @@ export const stitchAssets = async (
    // If there are no variants, just return the base image as is
    if (sidebarWidth === 0) {
        return {
-            imageUrl: baseData,
+            imageUrl: baseSrc,
            mainBody: { x: 0, y: 0, w: 1, h: 1 }
        };
    }