Source code for uwlab.devices.rokoko_glove

# Copyright (c) 2024-2025, The UW Lab Project Developers.
# All Rights Reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

from __future__ import annotations

import json
import socket
import torch
from collections.abc import Callable
from typing import TYPE_CHECKING

from isaaclab.devices import DeviceBase
from isaaclab.utils.math import quat_apply, quat_from_euler_xyz

if TYPE_CHECKING:
    from uwlab.devices import RokokoGlovesCfg


[docs]class RokokoGlove(DeviceBase): """A Rokoko_Glove controller for sending SE(3) commands as absolute poses of hands individual part This class is designed to track hands and fingers's pose from rokoko gloves. It uses the udp network protocol to listen to Rokoko Live Studio data gathered from Rokoko smart gloves, and process the data in form of torch Tensor. Addressing the efficiency and ease to understand, the tracking will only be performed with user's parts input, and all Literal of available parts is exhaustively listed in the comment under method __init__. available tracking literals: LEFT_HAND: leftHand, leftThumbProximal, leftThumbMedial, leftThumbDistal, leftThumbTip, leftIndexProximal, leftIndexMedial, leftIndexDistal, leftIndexTip, leftMiddleProximal, leftMiddleMedial, leftMiddleDistal, leftMiddleTip, leftRingProximal, leftRingMedial, leftRingDistal, leftRingTip, leftLittleProximal, leftLittleMedial, leftLittleDistal, leftLittleTip RIGHT_HAND: rightHand, rightThumbProximal, rightThumbMedial, rightThumbDistal, rightThumbTip rightIndexProximal, rightIndexMedial, rightIndexDistal, rightIndexTip, rightMiddleProximal, rightMiddleMedial, rightMiddleDistal, rightMiddleTip rightRingProximal, rightRingMedial, rightRingDistal, rightRingTip, rightLittleProximal, rightLittleMedial, rightLittleDistal, rightLittleTip """
[docs] def __init__( self, cfg: RokokoGlovesCfg, # Make sure this matches the port used in Rokoko Studio Live device="cuda:0", ): """Initialize the Rokoko_Glove Controller. Be aware that current implementation outputs pose of each hand part in the same order as input list, but parts come from left hand always come before parts from right hand. Args: UDP_IP: The IP Address of network to listen to, 0.0.0.0 refers to all available networks UDP_PORT: The port Rokoko Studio Live sends to left_hand_track: the tracking point of left hand this class will be tracking. right_hand_track: the tracking point of right hand this class will be tracking. scale: the overall scale for the hand. proximal_offset: the inter proximal offset that shorten or widen the spread of hand. """ import lz4.frame self.lz4frame = lz4.frame self.device = device self._additional_callbacks = dict() # Define the IP address and port to listen on self.UDP_IP = cfg.UDP_IP self.UDP_PORT = cfg.UDP_PORT self.scale = cfg.scale self.proximal_offset = cfg.proximal_offset self.thumb_scale = cfg.thumb_scale self.left_fingertip_names = cfg.left_hand_track self.right_fingertip_names = cfg.right_hand_track self.command_type = cfg.command_type # Create a UDP socket self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 8192) self.sock.bind((self.UDP_IP, self.UDP_PORT)) self.normalize_position = True self.normalize_rotation = True print(f"Listening for UDP packets on {self.UDP_IP}:{self.UDP_PORT}") # fmt: off self.left_hand_joint_names = [ 'leftHand', 'leftThumbProximal', 'leftThumbMedial', 'leftThumbDistal', 'leftThumbTip', 'leftIndexProximal', 'leftIndexMedial', 'leftIndexDistal', 'leftIndexTip', 'leftMiddleProximal', 'leftMiddleMedial', 'leftMiddleDistal', 'leftMiddleTip', 'leftRingProximal', 'leftRingMedial', 'leftRingDistal', 'leftRingTip', 'leftLittleProximal', 'leftLittleMedial', 'leftLittleDistal', 'leftLittleTip'] self.right_hand_joint_names = [ 'rightHand', 'rightThumbProximal', 'rightThumbMedial', 'rightThumbDistal', 'rightThumbTip', 'rightIndexProximal', 'rightIndexMedial', 'rightIndexDistal', 'rightIndexTip', 'rightMiddleProximal', 'rightMiddleMedial', 'rightMiddleDistal', 'rightMiddleTip', 'rightRingProximal', 'rightRingMedial', 'rightRingDistal', 'rightRingTip', 'rightLittleProximal', 'rightLittleMedial', 'rightLittleDistal', 'rightLittleTip'] # fmt: on self.left_joint_dict = {self.left_hand_joint_names[i]: i for i in range(len(self.left_hand_joint_names))} self.right_joint_dict = {self.right_hand_joint_names[i]: i for i in range(len(self.right_hand_joint_names))} self.left_finger_dict = {i: self.left_joint_dict[i] for i in self.left_fingertip_names} self.right_finger_dict = {i: self.right_joint_dict[i] for i in self.right_fingertip_names} self.left_fingertip_poses = torch.zeros((len(self.left_hand_joint_names), 7), device=self.device) self.right_fingertip_poses = torch.zeros((len(self.right_hand_joint_names), 7), device=self.device) self.fingertip_poses = torch.zeros( (len(self.left_hand_joint_names) + len(self.right_hand_joint_names), 7), device=self.device ) output_indices_list = [ *[self.right_joint_dict[i] for i in self.left_fingertip_names], *[self.right_joint_dict[i] + len(self.left_hand_joint_names) for i in self.right_fingertip_names], ] self.output_indices = torch.tensor(output_indices_list, device=self.device) # necessary joints for compute normalization self.necessary_joints = [] if len(self.right_fingertip_names): self.necessary_joints.extend(["rightIndexProximal", "rightMiddleProximal", "rightHand"]) if len(self.left_fingertip_names): self.necessary_joints.extend(["leftIndexProximal", "leftMiddleProximal", "leftHand"])
[docs] def reset(self): "Reset Internal Buffer" self.left_fingertip_poses = torch.zeros((len(self.left_hand_joint_names), 7), device=self.device) self.right_fingertip_poses = torch.zeros((len(self.right_hand_joint_names), 7), device=self.device)
[docs] def advance(self): """Provides the properly scaled, ordered, selected tracking results received from Rokoko Studio. Returns: A tuple containing the 2D (n,7) pose array ordered by user inputted joint track list, and a dummy truth value. """ self.left_fingertip_poses = torch.zeros((len(self.left_hand_joint_names), 7), device=self.device) self.right_fingertip_poses = torch.zeros((len(self.right_hand_joint_names), 7), device=self.device) body_data = self._get_gloves_data() for joint_name in self.left_fingertip_names: joint_data = body_data[joint_name] joint_position = torch.tensor(list(joint_data["position"].values())) joint_rotation = torch.tensor(list(joint_data["rotation"].values())) self.left_fingertip_poses[self.right_joint_dict[joint_name]][:3] = joint_position self.left_fingertip_poses[self.left_joint_dict[joint_name]][3:] = joint_rotation for joint_name in self.right_fingertip_names: joint_data = body_data[joint_name] joint_position = torch.tensor(list(joint_data["position"].values())) joint_rotation = torch.tensor(list(joint_data["rotation"].values())) self.right_fingertip_poses[self.right_joint_dict[joint_name]][:3] = joint_position self.right_fingertip_poses[self.right_joint_dict[joint_name]][3:] = joint_rotation # for normalization purpose for joint_name in self.necessary_joints: joint_data = body_data[joint_name] joint_position = torch.tensor(list(joint_data["position"].values())) joint_rotation = torch.tensor(list(joint_data["rotation"].values())) self.right_fingertip_poses[self.right_joint_dict[joint_name]][:3] = joint_position self.right_fingertip_poses[self.right_joint_dict[joint_name]][3:] = joint_rotation left_wrist_position = self.left_fingertip_poses[0][:3] if len(self.left_fingertip_names) > 0: # scale self.left_fingertip_poses[:, :3] = ( self.left_fingertip_poses[:, :3] - left_wrist_position ) * self.scale + left_wrist_position # reposition leftIndexProximalIdx = self.left_joint_dict["leftIndexProximal"] leftMiddleProximalIdx = self.left_joint_dict["leftMiddleProximal"] leftRingProximalIdx = self.left_joint_dict["leftRingProximal"] leftLittleProximalIdx = self.left_joint_dict["leftLittleProximal"] reposition_vector = ( self.left_fingertip_poses[leftMiddleProximalIdx][:3] - self.left_fingertip_poses[leftIndexProximalIdx][:3] ) self.left_fingertip_poses[leftIndexProximalIdx:, :3] += self.proximal_offset * reposition_vector self.left_fingertip_poses[leftMiddleProximalIdx:, :3] += self.proximal_offset * reposition_vector self.left_fingertip_poses[leftRingProximalIdx:, :3] += self.proximal_offset * reposition_vector self.left_fingertip_poses[leftLittleProximalIdx:, :3] += self.proximal_offset * reposition_vector self.fingertip_poses[: len(self.left_fingertip_poses)] = self.left_fingertip_poses right_wrist_position = self.right_fingertip_poses[0][:3] if len(self.right_fingertip_names) > 0: rightThumbProximalIdx = self.right_joint_dict["rightThumbProximal"] rightIndexProximalIdx = self.right_joint_dict["rightIndexProximal"] rightMiddleProximalIdx = self.right_joint_dict["rightMiddleProximal"] rightRingProximalIdx = self.right_joint_dict["rightRingProximal"] rightLittleProximalIdx = self.right_joint_dict["rightLittleProximal"] # normalize rot_matrix = self.normalize_hand_positions(self.right_joint_dict) position_normalized_poses = self.right_fingertip_poses[:, :3] - right_wrist_position if self.normalize_rotation: self.right_fingertip_poses[:, :3] = position_normalized_poses @ rot_matrix # scale self.right_fingertip_poses[:, :3] = self.right_fingertip_poses[:, :3] * self.scale t_idx = rightThumbProximalIdx self.right_fingertip_poses[t_idx : t_idx + 4, :3] = ( self.right_fingertip_poses[t_idx : t_idx + 4, :3] * self.thumb_scale ) if not self.normalize_position: self.right_fingertip_poses[:, :3] += right_wrist_position # reposition reposition_vector = ( self.right_fingertip_poses[rightMiddleProximalIdx][:3] - self.right_fingertip_poses[rightIndexProximalIdx][:3] ) self.right_fingertip_poses[rightIndexProximalIdx:, :3] += self.proximal_offset * reposition_vector self.right_fingertip_poses[rightMiddleProximalIdx:, :3] += self.proximal_offset * reposition_vector self.right_fingertip_poses[rightRingProximalIdx:, :3] += self.proximal_offset * reposition_vector self.right_fingertip_poses[rightLittleProximalIdx:, :3] += self.proximal_offset * reposition_vector self.fingertip_poses[len(self.left_fingertip_poses) :] = self.right_fingertip_poses self.fingertip_poses[self.output_indices] = self.to_world_convention(self.fingertip_poses[self.output_indices]) if self.command_type == "pos": return self.fingertip_poses[self.output_indices][:, :3] return self.fingertip_poses[self.output_indices]
def to_world_convention(self, poses: torch.Tensor): # # to world convention poses[:, 1] = -poses[:, 1] x = torch.tensor([0], device=self.device) y = torch.tensor([0], device=self.device) z = torch.tensor([0.4], device=self.device) quat = quat_from_euler_xyz(x, y, z) poses[:, :3] = quat_apply(quat, poses[:, :3]) return poses def normalize_hand_positions(self, hand_keypoints): wrist = self.right_fingertip_poses[hand_keypoints["rightHand"]][:3] index_proximal = self.right_fingertip_poses[hand_keypoints["rightIndexProximal"]][:3] middle_proximal = self.right_fingertip_poses[hand_keypoints["rightMiddleProximal"]][:3] # Define the plane with two vectors vec1 = index_proximal - wrist vec2 = middle_proximal - index_proximal # Compute orthonormal basis for the plane vec1_normalized = vec1 / vec1.norm() vec2_proj = vec2 - torch.dot(vec2, vec1_normalized) * vec1_normalized vec2_normalized = vec2_proj / vec2_proj.norm() vec3_normalized = torch.linalg.cross(vec1_normalized, vec2_normalized) # Construct rotation matrix rotation_matrix = torch.stack([vec1_normalized, vec2_normalized, vec3_normalized], dim=-1) return rotation_matrix def _get_gloves_data(self): while True: try: data, addr = self.sock.recvfrom(8192) # Buffer size is 1024 bytes break except OSError as e: print(f"Error: {e}") continue decompressed_data = self.lz4frame.decompress(data) received_json = json.loads(decompressed_data) # received_json = json.loads(data) body_data = received_json["scene"]["actors"][0]["body"] return body_data def add_callback(self, key: str, func: Callable): # check keys supported by callback if key not in ["L", "R"]: raise ValueError(f"Only left (L) and right (R) buttons supported. Provided: {key}.") # TODO: Improve this to allow multiple buttons on same key. self._additional_callbacks[key] = func
[docs] def debug_advance_all_joint_data(self): """Provides the properly scaled, all tracking results received from Rokoko Studio. It is intended to use a debug and visualization function inspecting all data from Rokoko Glove. Returns: A tuple containing the 2D (42,7) pose array(left:0-21, right:21-42), and a dummy truth value. """ body_data = self._get_gloves_data() # for joint_name in self.left_hand_joint_names: # joint_data = body_data[joint_name] # joint_position = torch.tensor(list(joint_data["position"].values())) # joint_rotation = torch.tensor(list(joint_data["rotation"].values())) # self.left_fingertip_poses[self.left_joint_dict[joint_name]][:3] = joint_position # self.left_fingertip_poses[self.left_joint_dict[joint_name]][3:] = joint_rotation for joint_name in self.right_hand_joint_names: joint_data = body_data[joint_name] joint_position = torch.tensor(list(joint_data["position"].values())) joint_rotation = torch.tensor(list(joint_data["rotation"].values())) self.right_fingertip_poses[self.right_joint_dict[joint_name]][:3] = joint_position self.right_fingertip_poses[self.right_joint_dict[joint_name]][3:] = joint_rotation # left_wrist_position = self.left_fingertip_poses[0][:3] # self.left_fingertip_poses[:, :3] = (self.left_fingertip_poses[:, :3] - left_wrist_position) * self.scale + left_wrist_position # self.fingertip_poses[:len(self.left_fingertip_poses)] = self.left_fingertip_poses right_wrist_position = self.right_fingertip_poses[0][:3] # scale rightThumbProximalIdx = self.right_joint_dict["rightThumbProximal"] rightIndexProximalIdx = self.right_joint_dict["rightIndexProximal"] rightMiddleProximalIdx = self.right_joint_dict["rightMiddleProximal"] rightRingProximalIdx = self.right_joint_dict["rightRingProximal"] rightLittleProximalIdx = self.right_joint_dict["rightLittleProximal"] # scale self.right_fingertip_poses[:, :3] = ( self.right_fingertip_poses[:, :3] - right_wrist_position ) * self.scale + right_wrist_position t_idx = rightThumbProximalIdx self.right_fingertip_poses[t_idx : t_idx + 4, :3] = ( self.right_fingertip_poses[t_idx : t_idx + 4, :3] - right_wrist_position ) * self.thumb_scale + right_wrist_position # reposition reposition_vector = ( self.right_fingertip_poses[rightMiddleProximalIdx][:3] - self.right_fingertip_poses[rightIndexProximalIdx][:3] ) self.right_fingertip_poses[rightIndexProximalIdx:, :3] += self.proximal_offset * reposition_vector self.right_fingertip_poses[rightMiddleProximalIdx:, :3] += self.proximal_offset * reposition_vector self.right_fingertip_poses[rightRingProximalIdx:, :3] += self.proximal_offset * reposition_vector self.right_fingertip_poses[rightLittleProximalIdx:, :3] += self.proximal_offset * reposition_vector self.fingertip_poses[len(self.left_fingertip_poses) :] = self.right_fingertip_poses return self.fingertip_poses, True