MediaPipe 为 BVH 设置地标

Question

我试图将 MP 姿势地标（33 个值）映射/翻译到 BVH 层次结构，但由于某些原因，我的模型不断翻转和弯曲。我遇到的挑战，

对于运动，如何从MP地标坐标获得Z，X，Y旋转？
对于髋关节（根），如何更新位置坐标，X、Y、Z，对于其余部分，如何使用传入帧更新旋转值（Z、X、Y）？

如果有人可以帮助回答这些问题中的任何一个，甚至可以指出正确的资源，那就太棒了。提前致谢！

BVH文件的层级为：

HIERARCHY
ROOT hip
{
  OFFSET 0 0 0
  CHANNELS 6 Xposition Yposition Zposition Zrotation Yrotation Xrotation
  JOINT abdomen
  {
    OFFSET 0 20.6881 -0.73152
    CHANNELS 3 Zrotation Xrotation Yrotation
    JOINT chest
    {
      OFFSET 0 11.7043 -0.48768
      CHANNELS 3 Zrotation Xrotation Yrotation
      JOINT neck
      {
        OFFSET 0 22.1894 -2.19456
        CHANNELS 3 Zrotation Xrotation Yrotation
        JOINT head
        {
          OFFSET -0.24384 7.07133 1.2192
          CHANNELS 3 Zrotation Xrotation Yrotation
          JOINT leftEye
          {
            OFFSET 4.14528 8.04674 8.04672
            CHANNELS 3 Zrotation Xrotation Yrotation
            End Site
            {
              OFFSET 1 0 0
            }
          }
          JOINT rightEye
          {
            OFFSET -3.6576 8.04674 8.04672
            CHANNELS 3 Zrotation Xrotation Yrotation
            End Site
            {
              OFFSET 1 0 0
            }
          }
        }
      }
      JOINT rCollar
      {
        OFFSET -2.68224 19.2634 -4.8768
        CHANNELS 3 Zrotation Xrotation Yrotation
        JOINT rShldr
        {
          OFFSET -8.77824 -1.95073 1.46304
          CHANNELS 3 Zrotation Xrotation Yrotation
          JOINT rForeArm
          {
            OFFSET -28.1742 -1.7115 0.48768
            CHANNELS 3 Zrotation Xrotation Yrotation
            JOINT rHand
            {
              OFFSET -22.5879 0.773209 7.07136
              CHANNELS 3 Zrotation Xrotation Yrotation
              JOINT rThumb1
              {
                OFFSET -1.2192 -0.487915 3.41376
                CHANNELS 3 Zrotation Xrotation Yrotation
                JOINT rThumb2
                {
                  OFFSET -3.37035 -0.52449 3.41376
                  CHANNELS 3 Zrotation Xrotation Yrotation
                  End Site
                  {
                    OFFSET -1.78271 -1.18214 1.43049
                  }
                }
              }
              JOINT rIndex1
              {
                OFFSET -7.75947 0.938293 5.60832
                CHANNELS 3 Zrotation Xrotation Yrotation
                JOINT rIndex2
                {
                  OFFSET -2.54057 -0.884171 1.56538
                  CHANNELS 3 Zrotation Xrotation Yrotation
                  End Site
                  {
                    OFFSET -1.62519 -0.234802 1.16502
                  }
                }
              }
              JOINT rMid1
              {
                OFFSET -8.24714 1.18213 3.41376
                CHANNELS 3 Zrotation Xrotation Yrotation
                JOINT rMid2
                {
                  OFFSET -3.10165 -0.590103 1.0647
                  CHANNELS 3 Zrotation Xrotation Yrotation
                  End Site
                  {
                    OFFSET -2.48547 -0.328903 0.83742
                  }
                }
              }
              JOINT rRing1
              {
                OFFSET -8.82822 0.546677 1.51678
                CHANNELS 3 Zrotation Xrotation Yrotation
                JOINT rRing2
                {
                  OFFSET -2.60934 -0.819778 -0.0198488
                  CHANNELS 3 Zrotation Xrotation Yrotation
                  End Site
                  {
                    OFFSET -2.33842 -0.294052 0.168128
                  }
                }
              }
              JOINT rPinky1
              {
                OFFSET -8.27202 -0.0477905 -0.4584
                CHANNELS 3 Zrotation Xrotation Yrotation
                JOINT rPinky2
                {
                  OFFSET -1.82734 -0.647385 -0.700984
                  CHANNELS 3 Zrotation Xrotation Yrotation
                  End Site
                  {
                    OFFSET -1.69225 -0.51767 -0.607171
                  }
                }
              }
            }
          }
        }
      }
      JOINT lCollar
      {
        OFFSET 2.68224 19.2634 -4.8768
        CHANNELS 3 Zrotation Xrotation Yrotation
        JOINT lShldr
        {
          OFFSET 8.77824 -1.95073 1.46304
          CHANNELS 3 Zrotation Xrotation Yrotation
          JOINT lForeArm
          {
            OFFSET 28.1742 -1.7115 0.48768
            CHANNELS 3 Zrotation Xrotation Yrotation
            JOINT lHand
            {
              OFFSET 22.5879 0.773209 7.07136
              CHANNELS 3 Zrotation Xrotation Yrotation
              JOINT lThumb1
              {
                OFFSET 1.2192 -0.487915 3.41376
                CHANNELS 3 Zrotation Xrotation Yrotation
                JOINT lThumb2
                {
                  OFFSET 3.37035 -0.52449 3.41376
                  CHANNELS 3 Zrotation Xrotation Yrotation
                  End Site
                  {
                    OFFSET 1.78271 -1.18214 1.43049
                  }
                }
              }
              JOINT lIndex1
              {
                OFFSET 7.75947 0.938293 5.60832
                CHANNELS 3 Zrotation Xrotation Yrotation
                JOINT lIndex2
                {
                  OFFSET 2.54057 -0.884171 1.56538
                  CHANNELS 3 Zrotation Xrotation Yrotation
                  End Site
                  {
                    OFFSET 1.62519 -0.234802 1.16502
                  }
                }
              }
              JOINT lMid1
              {
                OFFSET 8.24714 1.18213 3.41376
                CHANNELS 3 Zrotation Xrotation Yrotation
                JOINT lMid2
                {
                  OFFSET 3.10165 -0.590103 1.0647
                  CHANNELS 3 Zrotation Xrotation Yrotation
                  End Site
                  {
                    OFFSET 2.48547 -0.328903 0.83742
                  }
                }
              }
              JOINT lRing1
              {
                OFFSET 8.82822 0.546677 1.51678
                CHANNELS 3 Zrotation Xrotation Yrotation
                JOINT lRing2
                {
                  OFFSET 2.60934 -0.819778 -0.0198488
                  CHANNELS 3 Zrotation Xrotation Yrotation
                  End Site
                  {
                    OFFSET 2.33842 -0.294052 0.168128
                  }
                }
              }
              JOINT lPinky1
              {
                OFFSET 8.27202 -0.0477905 -0.4584
                CHANNELS 3 Zrotation Xrotation Yrotation
                JOINT lPinky2
                {
                  OFFSET 1.82734 -0.647385 -0.700984
                  CHANNELS 3 Zrotation Xrotation Yrotation
                  End Site
                  {
                    OFFSET 1.69225 -0.51767 -0.607171
                  }
                }
              }
            }
          }
        }
      }
    }
  }
  JOINT rButtock
  {
    OFFSET -8.77824 4.35084 1.2192
    CHANNELS 3 Zrotation Xrotation Yrotation
    JOINT rThigh
    {
      OFFSET 0 -1.70687 -2.19456
      CHANNELS 3 Zrotation Xrotation Yrotation
      JOINT rShin
      {
        OFFSET 0 -36.8199 0.73152
        CHANNELS 3 Zrotation Xrotation Yrotation
        JOINT rFoot
        {
          OFFSET 0.73152 -45.1104 -5.12064
          CHANNELS 3 Zrotation Xrotation Yrotation
          End Site
          {
            OFFSET -1.1221 -3.69964 12.103
          }
        }
      }
    }
  }
  JOINT lButtock
  {
    OFFSET 8.77824 4.35084 1.2192
    CHANNELS 3 Zrotation Xrotation Yrotation
    JOINT lThigh
    {
      OFFSET 0 -1.70687 -2.19456
      CHANNELS 3 Zrotation Xrotation Yrotation
      JOINT lShin
      {
        OFFSET 0 -36.8199 0.73152
        CHANNELS 3 Zrotation Xrotation Yrotation
        JOINT lFoot
        {
          OFFSET -0.73152 -45.1104 -5.12064
          CHANNELS 3 Zrotation Xrotation Yrotation
          End Site
          {
            OFFSET 1.1221 -3.69964 12.103
          }
        }
      }
    }
  }
}

我用过的旋转函数：

def rot(a, b):
    c = b - a
    c /= np.linalg.norm(c)
    
    thetaZ = np.arctan2(c[1], c[0])
    thetaX = np.arctan2(c[2], c[0]**2 + c[1]**2)
    thetaY = np.arctan2(c[2], np.sqrt(c[0]**2 + c[1]**2))
    
    thetaZdeg = float('{:.3f}'.format(np.rad2deg(thetaZ))[:-1])
    thetaXdeg = float('{:.3f}'.format(np.rad2deg(thetaX))[:-1])
    thetaYdeg = float('{:.3f}'.format(np.rad2deg(thetaY))[:-1])
    
    return [thetaZdeg, thetaXdeg, thetaYdeg]

我做的从地标到骨架的映射：

def dataLoad2(i, j, res1, res2):
    return str((res1[i] + res2[j] / 2)[2]) + " " + str((res1[i] + res2[j] / 2)[1]) + " " + str((res1[i] + res2[j] / 2)[0]) + " "     

def dataLoad(i, res):
    try:
        return str(res[i][2]) + " " + str(res[i][1]) + " " + str(res[i][0]) + " "  
    except:
        return str(res[2]) + " " + str(res[1]) + " " + str(res[0]) + " "     

def writeBVH(res):
    hip = dataLoad2(24, 23, res, res)
    chest = dataLoad2(11, 12, res, res)

    temp = list(np.array([eval(i) for i in hip.split(' ')[:-1]]) + np.array([eval(i) for i in chest.split(' ')[:-1]]) / 2)
    abdomen = dataLoad(0, temp)
    mouth = dataLoad2(10, 9, res, res)

    temp = list(np.array([eval(i) for i in mouth.split(' ')[:-1]]) + np.array([eval(i) for i in chest.split(' ')[:-1]]) / 2)
    neck = dataLoad(0, temp)
    head = dataLoad(0, res)

    lefteye = dataLoad(2, res)
    righteye = dataLoad(5, res)

    rcollar = dataLoad2(12, 10, res, res)
    rshldr = dataLoad(12, res)
    rforearm = dataLoad(14, res)
    rhand = dataLoad(16,res)

    rthumb1 = dataLoad2(16, 22, res, res)
    rthumb2 = dataLoad(22, res)

    rindex1 = dataLoad2(16, 20, res, res)
    rindex2 = dataLoad(20, res)

    ''' Edit These After Using Hand Landmarks '''
    rmid1 = dataLoad2(16, 20, res, res)
    rmid2 = dataLoad(20, res)

    rring1 = dataLoad2(16, 20, res, res)
    rring2 = dataLoad(20, res)
    ''' Edit These After Using Hand Landmarks '''

    rpinky1 = dataLoad2(16, 18, res, res)
    rpinky2 = dataLoad(18, res)

    lcollar = dataLoad2(11, 9, res, res)
    lshldr = dataLoad(11, res)
    lforearm = dataLoad(13, res)
    lhand = dataLoad(15,res)

    lthumb1 = dataLoad2(15, 21, res, res)
    lthumb2 = dataLoad(21, res)

    lindex1 = dataLoad2(15, 19, res, res)
    lindex2 = dataLoad(19, res)

    ''' Edit These After Using Hand Landmarks '''
    lmid1 = dataLoad2(15, 19, res, res)
    lmid2 = dataLoad(19, res)

    lring1 = dataLoad2(15, 19, res, res)
    lring2 = dataLoad(19, res)
    ''' Edit These After Using Hand Landmarks '''

    lpinky1 = dataLoad2(15, 17, res, res)
    lpinky2 = dataLoad(17, res)

    rbuttock = dataLoad(24, res)
    rthigh = dataLoad2(24, 26, res, res)
    rshin = dataLoad(26, res)
    rfoot = dataLoad(28, res)

    lbuttock = dataLoad(23, res)
    lthigh = dataLoad2(23, 25, res, res)
    lshin = dataLoad(25, res)
    lfoot = dataLoad(27, res)

    return hip+hip+abdomen+chest+neck+head+lefteye+righteye+rcollar+rshldr+rforearm+rhand+rthumb1+rthumb2+rindex1+rindex2+rmid1+rmid2+rring1+rring2+rpinky1+rpinky2+lcollar+lshldr+lforearm+lhand+lthumb1+lthumb2+lindex1+lindex2+lmid1+lmid2+lring1+lring2+lpinky1+lpinky2+rbuttock+rthigh+rshin+rfoot+lbuttock+lthigh+lshin+lfoot+"\n"

实时抓拍的主循环：

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose

# For static images:
IMAGE_FILES = []
BG_COLOR = (192, 192, 192) # gray
with mp_pose.Pose(
    static_image_mode=True,
    model_complexity=2,
    enable_segmentation=True,
    min_detection_confidence=0.5) as pose:
  for idx, file in enumerate(IMAGE_FILES):
    image = cv2.imread(file)
    image_height, image_width, _ = image.shape
    # Convert the BGR image to RGB before processing.
    results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    if not results.pose_landmarks:
      continue
    print(
        f'Nose coordinates: ('
        f'{results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE].x * image_width}, '
        f'{results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE].y * image_height})'
    )

    annotated_image = image.copy()
    # Draw segmentation on the image.
    # To improve segmentation around boundaries, consider applying a joint
    # bilateral filter to "results.segmentation_mask" with "image".
    condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1
    bg_image = np.zeros(image.shape, dtype=np.uint8)
    bg_image[:] = BG_COLOR
    annotated_image = np.where(condition, annotated_image, bg_image)
    # Draw pose landmarks on the image.
    mp_drawing.draw_landmarks(
        annotated_image,
        results.pose_landmarks,
        mp_pose.POSE_CONNECTIONS,
        landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
    cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
    # Plot pose world landmarks.
    mp_drawing.plot_landmarks(
        results.pose_world_landmarks, mp_pose.POSE_CONNECTIONS)

temp = [0.00] * 132
count = 0
with open("test.bvh", "a") as bvh:
    # For webcam input:
    cap = cv2.VideoCapture(0)
    with mp_pose.Pose(
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5) as pose:
      while cap.isOpened():
        success, image = cap.read()
        if not success:
          print("Ignoring empty camera frame.")
          # If loading a video, use 'break' instead of 'continue'.
          continue

        # To improve performance, optionally mark the image as not writeable to
        # pass by reference.
        image.flags.writeable = False
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = pose.process(image)
        if results.pose_landmarks:
            count += 1
            res = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]) 
            t = writeBVH(res)
            t = t.split(' ')[:-1]
            res = [[float('{:.3f}'.format(float(t[0]))[:-1]), float('{:.3f}'.format(float(t[1]))[:-1]), float('{:.3f}'.format(float(t[2]))[:-1])]]
            t = np.array(list(map(float, t))).reshape(44,3) 
            temp = np.array(list(map(float, temp))).reshape(44,3)
            for i in range(1, 44):
                res.append(rot(t[i], temp[i]))
            res = list(np.array(res).flatten())
            (res[4], res[5]) = (res[5], res[4])
            s = ''
            for i in res:
                s += str(i) + ' '
            s += '\n'
            bvh.write(s)
            temp = list(temp.flatten())
        # Draw the pose annotation on the image.
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        mp_drawing.draw_landmarks(
            image,
            results.pose_landmarks,
            mp_pose.POSE_CONNECTIONS,
            landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
        # Flip the image horizontally for a selfie-view display.
        cv2.imshow('MediaPipe Pose', cv2.flip(image, 1))
        if cv2.waitKey(5) & 0xFF == 27:
          break
    cap.release()
    cv2.destroyAllWindows()
    print(count)

MediaPipe 为 BVH 设置地标

问题描述投票：0回答：0

BVH文件的层级为：

我用过的旋转函数：

我做的从地标到骨架的映射：

实时抓拍的主循环：

最新问题

MediaPipe 为 BVH 设置地标

问题描述 投票：0回答：0

BVH文件的层级为：

我用过的旋转函数：

我做的从地标到骨架的映射：

实时抓拍的主循环：

最新问题

问题描述投票：0回答：0