vtube-studio/services/visionService.ts


import { FaceLandmarker, FilesetResolver } from '@mediapipe/tasks-vision';
import { Rect } from '../types';

let faceLandmarker: FaceLandmarker | null = null;

// Initialize the vision model for static image analysis
const initVision = async () => {
  if (faceLandmarker) return;

  try {
    const filesetResolver = await FilesetResolver.forVisionTasks(
      "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.18/wasm"
    );

    faceLandmarker = await FaceLandmarker.createFromOptions(filesetResolver, {
      baseOptions: {
        modelAssetPath: `https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task`,
        delegate: "GPU"
      },
      runningMode: "IMAGE",
      numFaces: 1
    });
  } catch (e) {
    console.error("Failed to initialize vision service:", e);
  }
};

export const analyzeAvatarImage = async (imageUrl: string): Promise<{ leftEye: Rect, rightEye: Rect, mouth: Rect, skinColor: string } | null> => {
  try {
    await initVision();
    if (!faceLandmarker) return null;

    return new Promise((resolve, reject) => {
      const img = new Image();
      img.crossOrigin = "anonymous";
      img.onload = () => {
        try {
          const result = faceLandmarker!.detect(img);

          if (result.faceLandmarks && result.faceLandmarks.length > 0) {
            const landmarks = result.faceLandmarks[0];

            // Helper to calculate bounding box from landmark indices
            const getRect = (indices: number[]): Rect => {
              let minX = 1, minY = 1, maxX = 0, maxY = 0;

              indices.forEach(i => {
                const l = landmarks[i];
                if (l.x < minX) minX = l.x;
                if (l.x > maxX) maxX = l.x;
                if (l.y < minY) minY = l.y;
                if (l.y > maxY) maxY = l.y;
              });

              const w = maxX - minX;
              const h = maxY - minY;

              // Expand slightly to cover the area comfortably
              const paddingX = w * 0.1;
              const paddingY = h * 0.1;

              return {
                x: minX - paddingX,
                y: minY - paddingY,
                w: w + (paddingX * 2),
                h: h + (paddingY * 2),
              };
            };

            // MediaPipe Mesh Indices
            const leftEyeIndices = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246];
            const rightEyeIndices = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398];
            const mouthIndices = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146];

            const leftRect = getRect(leftEyeIndices);
            const rightRect = getRect(rightEyeIndices);
            const mouthRect = getRect(mouthIndices);

            // Sample Skin Color
            const canvas = document.createElement('canvas');
            canvas.width = img.width;
            canvas.height = img.height;
            const ctx = canvas.getContext('2d');

            let color = '#fcd3bf'; // Default fallback

            if (ctx) {
              ctx.drawImage(img, 0, 0);

              // Landmark 123 is on the left cheek bone area
              const sampleIdx = 123;
              const lx = Math.floor(landmarks[sampleIdx].x * img.width);
              const ly = Math.floor(landmarks[sampleIdx].y * img.height);

              if (lx >= 0 && lx < img.width && ly >= 0 && ly < img.height) {
                  const pixel = ctx.getImageData(lx, ly, 1, 1).data;
                  // Convert rgb to hex for input type="color"
                  const toHex = (c: number) => {
                    const hex = c.toString(16);
                    return hex.length === 1 ? "0" + hex : hex;
                  };
                  color = `#${toHex(pixel[0])}${toHex(pixel[1])}${toHex(pixel[2])}`;
              }
            }

            resolve({
              leftEye: leftRect,
              rightEye: rightRect,
              mouth: mouthRect,
              skinColor: color
            });
          } else {
            console.warn("No face detected in generated image");
            resolve(null);
          }
        } catch (e) {
          reject(e);
        }
      };
      img.onerror = () => reject(new Error("Failed to load image for analysis"));
      img.src = imageUrl;
    });
  } catch (error) {
    console.error("Analysis failed", error);
    return null;
  }
};