stream_hacks to main #2

Merged
skeh merged 8 commits from stream_hacks into main 2024-12-11 03:22:13 +00:00
Showing only changes of commit 4ed2ac1384 - Show all commits

View file

@ -1,6 +1,7 @@
import math
import numpy as np
from scipy.spatial.distance import cdist
from .. import TransformProcess
from ovtk_track.types import Quaternion, Point3d
@ -24,9 +25,96 @@ class Process(TransformProcess):
self.normal = np.array(normal, dtype=float)
self.up = np.array(vec_perp(normal), dtype=float)
# REVIEW: See calc_eye. These probably need to change based on normal / up.
# Or maybe they dont and we just rotate the output quaternion?
# Ugh. The code works for now, but i no understand....
self.SIN_LEFT_THETA = 2 * np.sin(np.pi / 2)
self.SIN_UP_THETA = np.sin(np.pi / 6)
def setup(self):
pass
def calc_head(self, landmarks):
# REVIEW: This doesnt really work quite right!! look + roll arent mixing as expected
# Vector pointing from head center to nose
nose = Landmarks.to_numpy(landmarks[LANDMARK_TYPES.NOSE | LANDMARK_TYPES.TIP]).mean(axis=0)
head_center = Landmarks.to_numpy(landmarks[LANDMARK_TYPES.FACE | LANDMARK_TYPES.OUTLINE]).mean(axis=0)
look_vec = (nose - head_center)
look_vec /= np.linalg.norm(look_vec)
# Vector pointing left to right across the face
eye_center_l = Landmarks.to_numpy(landmarks[LANDMARK_TYPES.EYE | LANDMARK_TYPES.LEFT]).mean(axis=0)
eye_center_r = Landmarks.to_numpy(landmarks[LANDMARK_TYPES.EYE | LANDMARK_TYPES.RIGHT]).mean(axis=0)
roll_vec = (eye_center_l - eye_center_r)
roll_vec /= np.linalg.norm(roll_vec)
# Quat that rotates from normal to head center -> nose vec
look = Quaternion(np.dot(look_vec, self.normal), *np.cross(look_vec, self.normal))
look.w += look.magnitude()
look = look.normalize()
# Quat that represents a rotation around the roll axis (i think??)
roll_angle = np.sum(roll_vec * self.up)
roll = Quaternion(math.cos(roll_angle), *(self.normal * math.sin(roll_angle)))
roll = roll.normalize()
combo = look + roll
combo = combo.normalize()
return combo, head_center
def calc_eye(self, landmarks):
# Get poi
corners = np.empty((2, 2, 3), dtype=np.float32)
centers = np.empty((2, 3), dtype=np.float32)
pupils = np.empty((2, 3), dtype=np.float32)
cross_heights = np.empty((2), dtype=np.float32)
for i, side in enumerate([LANDMARK_TYPES.LEFT, LANDMARK_TYPES.RIGHT]):
# Find corners by searching for points with the largest distance from each other
# REVIEW: These *should* will always be the same points in the map - make a landmark type selector?
eye_outline = Landmarks.to_numpy(landmarks[LANDMARK_TYPES.EYE | LANDMARK_TYPES.OUTLINE | side])
hdist = cdist(eye_outline, eye_outline, metric='euclidean')
best_pair = np.unravel_index(hdist.argmax(), hdist.shape)
corners[i] = eye_outline[best_pair[0]], eye_outline[best_pair[1]]
# Get height of eye (relative to a line passing through each corner)
cross_heights[i] = np.array([
np.linalg.norm(np.cross(corners[i][1]-corners[i][0],
corners[i][0]-point))
/ np.linalg.norm(corners[i][0]-corners[i][1])
for point in eye_outline
]).max()
centers[i] = eye_outline.mean(axis=0)
pupils[i] = Landmarks.to_numpy(landmarks[LANDMARK_TYPES.IRIS | side]).mean(axis=0)
# Calculate important distances based on POI
eye_length = np.linalg.norm(np.diff(corners, axis=1), axis=(2, 1))
ic_distance = np.linalg.norm(pupils - centers, axis=1)
zc_distance = np.linalg.norm(pupils - corners[:, 1], axis=1)
aspect_ratio = 1 / (cross_heights / eye_length)
# Takes above and spits out spherical coordiates of pupil (relative to camera)
# Black magic as far as i can comprehend
# Copied in large part from https://github.com/1996scarlet/OpenVtuber/blob/970229d3a5ebe14a7519352da039d00a0b87e2d9/service/TFLiteIrisLocalization.py#L101
s0 = (corners[1, :, 1] - corners[0, :, 1]) * pupils[:, 0]
s1 = (corners[1, :, 0] - corners[0, :, 0]) * pupils[:, 1]
s2 = corners[1, :, 0] * corners[0, :, 1]
s3 = corners[1, :, 1] * corners[0, :, 0]
delta_y = (s0 - s1 + s2 - s3) / eye_length / 2
delta_x = np.sqrt(abs(ic_distance**2 - delta_y**2))
delta = np.array((delta_x * self.SIN_LEFT_THETA,
delta_y * self.SIN_UP_THETA))
delta /= eye_length
theta, pha = np.arcsin(delta)
inv_judge = zc_distance**2 - delta_y**2 < eye_length**2 / 4
theta[inv_judge] *= -1
# Convert spherical coordiates to quaternions
# Based on https://github.com/moble/quaternion/blob/8f6fc306306c45f0bf79331a22ef3998e4d187bc/src/quaternion/__init__.py#L599
quats = np.array([np.cos(pha/2) * np.cos(theta/2),
np.sin(pha/2) * np.cos(theta/2),
np.cos(pha/2) * np.sin(theta/2),
-np.sin(pha/2) * np.sin(theta/2)]).T
return [Quaternion(*quat) for quat in quats], aspect_ratio
def process(self):
landmarks = self._inputs['landmarks'].get()
skeleton = None
@ -34,43 +122,10 @@ class Process(TransformProcess):
joints = {}
if landmarks.has(LANDMARK_TYPES.FACE):
# Get head look / pos
nose = Landmarks.to_numpy(landmarks[LANDMARK_TYPES.NOSE | LANDMARK_TYPES.TIP]).mean(0)
head_center = Landmarks.to_numpy(landmarks[LANDMARK_TYPES.FACE | LANDMARK_TYPES.OUTLINE]).mean(0)
look_vec = (nose - head_center)
look_quat, head_pos = self.calc_head(landmarks)
eye_quats, eye_aspect = self.calc_eye(landmarks)
eye_center_l = Landmarks.to_numpy(landmarks[LANDMARK_TYPES.EYE | LANDMARK_TYPES.LEFT]).mean(0)
eye_center_r = Landmarks.to_numpy(landmarks[LANDMARK_TYPES.EYE | LANDMARK_TYPES.RIGHT]).mean(0)
roll_vec = (eye_center_l - eye_center_r)
look_vec /= np.linalg.norm(look_vec)
roll_vec /= np.linalg.norm(roll_vec)
roll_angle = np.sum(roll_vec * self.up)
roll = Quaternion(math.cos(roll_angle), * self.normal * math.sin(roll_angle))
roll = roll.normalize()
look = Quaternion(np.dot(look_vec, self.normal), *np.cross(look_vec, self.normal))
look.w += look.magnitude()
look = look.normalize()
combo = look + roll
combo = combo.normalize()
# Get eye data
marks_left = Landmarks.to_numpy(landmarks[LANDMARK_TYPES.EYE | LANDMARK_TYPES.LEFT])
marks_right = Landmarks.to_numpy(landmarks[LANDMARK_TYPES.EYE | LANDMARK_TYPES.RIGHT])
range = np.array([marks_left.max(axis=0) - marks_left.min(axis=0),
marks_right.max(axis=0) - marks_right.min(axis=0)])
delta = np.array([eye_center_l - Landmarks.to_numpy(landmarks[LANDMARK_TYPES.IRIS | LANDMARK_TYPES.CENTER | LANDMARK_TYPES.LEFT]).mean(0),
eye_center_r - Landmarks.to_numpy(landmarks[LANDMARK_TYPES.IRIS | LANDMARK_TYPES.CENTER | LANDMARK_TYPES.RIGHT]).mean(0)])
delta /= range
try:
eye_aspect_ratio = range[::, 0] / range[::, 1]
except ZeroDivisionError:
eye_aspect_ratio = None
head_joint = Joint(Point3d(*head_center), combo, dict(look_delta=delta, eye_aspect_ratio=eye_aspect_ratio))
head_joint = Joint(Point3d(*head_pos), look_quat, attr=dict(eye_rot=eye_quats, eye_aspect=eye_aspect))
joints[JOINT_TYPES.HEAD] = head_joint