import { useEffect, useRef, useState, useCallback } from 'react'; import { FaceLandmarker, FilesetResolver, DrawingUtils } from '@mediapipe/tasks-vision'; import { TrackingData } from '../types'; export const useFaceTracking = (videoElement: HTMLVideoElement | null) => { const [isTracking, setIsTracking] = useState(false); const [isLoading, setIsLoading] = useState(true); const faceLandmarkerRef = useRef(null); const requestRef = useRef(null); const lastVideoTimeRef = useRef(-1); const [trackingData, setTrackingData] = useState({ rotationX: 0, rotationY: 0, rotationZ: 0, translationX: 0, translationY: 0, mouthOpen: 0, isBlinkingLeft: false, isBlinkingRight: false, }); // Initialize FaceLandmarker useEffect(() => { const initMediaPipe = async () => { try { // Use specific version to match index.html import and prevent version mismatch const filesetResolver = await FilesetResolver.forVisionTasks( "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.18/wasm" ); faceLandmarkerRef.current = await FaceLandmarker.createFromOptions(filesetResolver, { baseOptions: { modelAssetPath: `https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task`, delegate: "GPU" }, outputFaceBlendshapes: true, outputFacialTransformationMatrixes: true, runningMode: "VIDEO", numFaces: 1 }); setIsLoading(false); } catch (error) { console.error("Failed to load MediaPipe:", error); setIsLoading(false); } }; initMediaPipe(); return () => { faceLandmarkerRef.current?.close(); }; }, []); const predict = useCallback(() => { if (!faceLandmarkerRef.current || !videoElement) return; // Only predict if video is ready and playing if (videoElement.readyState < 2) return; const nowInMs = Date.now(); if (lastVideoTimeRef.current !== videoElement.currentTime) { lastVideoTimeRef.current = videoElement.currentTime; const results = faceLandmarkerRef.current.detectForVideo(videoElement, nowInMs); if (results.faceLandmarks && results.faceLandmarks.length > 0) { // 1. Extract Blendshapes for Expression const blendshapes = results.faceBlendshapes?.[0]?.categories; let mouthOpen = 0; let eyeBlinkLeft = 0; let eyeBlinkRight = 0; if (blendshapes) { mouthOpen = blendshapes.find(c => c.categoryName === 'jawOpen')?.score || 0; eyeBlinkLeft = blendshapes.find(c => c.categoryName === 'eyeBlinkLeft')?.score || 0; eyeBlinkRight = blendshapes.find(c => c.categoryName === 'eyeBlinkRight')?.score || 0; } // 2. Estimate Pose (simplified) // MediaPipe gives a matrix, but often for 2D avatars, simple landmark delta is cleaner. // We use specific landmarks to calculate roll, yaw, pitch approximation. const landmarks = results.faceLandmarks[0]; // Roll: Angle between eyes const leftEye = landmarks[33]; // Outer left eye const rightEye = landmarks[263]; // Outer right eye const dy = rightEye.y - leftEye.y; const dx = rightEye.x - leftEye.x; const roll = Math.atan2(dy, dx); // Yaw: Nose offset from center of eyes const nose = landmarks[1]; const midPointX = (leftEye.x + rightEye.x) / 2; const yaw = (nose.x - midPointX) * 2; // sensitivity // Pitch: Nose offset vertical const midPointY = (leftEye.y + rightEye.y) / 2; const pitch = (nose.y - midPointY) * 2; // Translation const transX = (nose.x - 0.5) * 2; const transY = (nose.y - 0.5) * 2; setTrackingData({ rotationZ: roll, rotationY: yaw, rotationX: pitch, translationX: transX, translationY: transY, mouthOpen, isBlinkingLeft: eyeBlinkLeft > 0.5, isBlinkingRight: eyeBlinkRight > 0.5 }); } } requestRef.current = requestAnimationFrame(predict); }, [videoElement]); const startTracking = useCallback(() => { setIsTracking(true); requestRef.current = requestAnimationFrame(predict); }, [predict]); const stopTracking = useCallback(() => { setIsTracking(false); if (requestRef.current) { cancelAnimationFrame(requestRef.current); } }, []); return { isLoading, trackingData, startTracking, stopTracking }; };