视频输入的TFLite推断

问题描述 投票:0回答:1

我有一个SSD tflite检测模型,该模型在台式计算机上与Python一起运行。就目前而言,下面的脚本将单个图像用作推理输入,并且可以正常工作:

    # Load TFLite model and allocate tensors.
    interpreter = tf.lite.Interpreter(model_path="model.tflite")
    interpreter.allocate_tensors()

    img_resized = Image.open(file_name)
    input_data = np.expand_dims(img_resized, axis=0)
    input_data = (np.float32(input_data) - input_mean) / input_std

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])

如何作为输入对.mp4视频进行推理?

是否还可以从该视频上检测到的对象绘制边界框?

python tensorflow-lite inference
1个回答
0
投票

要回答关于在视频上进行推理的第一个问题。这是您可以使用的代码。我为分类模型的推断编写了此代码,因此,在您的情况下,output_data变量的输出将以边界框的形式出现,您必须使用OpenCV将它们映射到框架上,这也将回答您的第二个问题(绘制边界视频上的方框)。

import cv2
from PIL import Image
import numpy as np
import tensorflow as tf

def read_tensor_from_readed_frame(frame, input_height=224, input_width=224,
        input_mean=0, input_std=255):
  output_name = "normalized"
  float_caster = tf.cast(frame, tf.float32)
  dims_expander = tf.expand_dims(float_caster, 0);
  resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])
  normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])
  sess = tf.Session()
  result = sess.run(normalized)
  return result

def load_labels(label_file):
  label = []
  proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()
  for l in proto_as_ascii_lines:
    label.append(l.rstrip())
  return label

def VideoSrcInit(paath):
    cap = cv2.VideoCapture(paath)
    flag, image = cap.read()
    if flag:
        print("Valid Video Path. Lets move to detection!")
    else:
        raise ValueError("Video Initialization Failed. Please make sure video path is valid.")
    return cap

def main():
  Labels_Path = "labels.txt"
  Model_Path = "model.tflite"
  input_path = "video.mp4"

  ##Loading labels
  labels = load_labels(Labels_Path)

  ##Load tflite model and allocate tensors
  interpreter = tf.lite.Interpreter(model_path=Model_Path)
  interpreter.allocate_tensors()
  # Get input and output tensors.
  input_details = interpreter.get_input_details()
  output_details = interpreter.get_output_details()

  input_shape = input_details[0]['shape']

  ##Read video
  cap = VideoSrcInit(input_path)

  while True:
    ok, cv_image = cap.read()
    if not ok:
      break

    ##Converting the readed frame to RGB as opencv reads frame in BGR
    image = Image.fromarray(cv_image).convert('RGB')

    ##Converting image into tensor
    image_tensor = read_tensor_from_readed_frame(image ,224, 224)

    ##Test model
    interpreter.set_tensor(input_details[0]['index'], image_tensor)
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])

    ## You need to check the output of the output_data variable and 
    ## map it on the frame in order to draw the bounding boxes.


    cv2.namedWindow("cv_image", cv2.WINDOW_NORMAL)
    cv2.imshow("cv_image",cv_image)

    ##Use p to pause the video and use q to termiate the program
    key = cv2.waitKey(10) & 0xFF
    if key == ord("q"):
      break
    elif key == ord("p"):
      cv2.waitKey(0)
      continue 
  cap.release()

if __name__ == '__main__':
  main()
© www.soinside.com 2019 - 2024. All rights reserved.