我想创建一个实时基本手语翻译器,可以翻译字母和数字。我使用 CNN 完成了训练,我可以通过将新图像放入文件并运行比较来测试新图像。我如何使其实时?
我尝试了网上建议的一些步骤,但相机似乎没有检测到我的手,而且准确性很糟糕
import os
import tensorflow as tf
import numpy as np
import pathlib
import json
with open("model_arch.json", "r") as json_file:
model_json = json_file.read()
model = tf.keras.models.model_from_json(model_json)
model.load_weights("model_weights.h5")
data_dir = pathlib.Path('C:\\Users\\User\\Documents\\FYP\\FYP\\data')
img_height = 180
img_width = 180
train_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="training",
seed=123,
image_size=(img_height, img_width),
batch_size=32
)
class_names = train_ds.class_names
test_directory = "C:\\Users\\User\\Documents\\FYP\\FYP\\test"
image_count = len(list(data_dir.glob('*/*.jpeg')))
test_image_paths = [os.path.join(test_directory, f) for f in os.listdir(test_directory) if f.lower().endswith('.jpeg')]
if not test_image_paths:
print("No JPEG images found in the specified test directory.")
exit()
for test_image_path in test_image_paths:
try:
img = tf.keras.utils.load_img(
test_image_path, target_size=(img_height, img_width)
)
except Exception as e:
print(f"Error loading the image {test_image_path}: {e}")
continue
img_array = tf.keras.utils.img_to_array(img)
img_array = tf.expand_dims(img_array, 0)
predictions = model.predict(img_array)
score = tf.nn.softmax(predictions[0])
predicted_class = class_names[np.argmax(score)]
confidence = 100 * np.max(score)
print(
f"The image most likely belongs to class {predicted_class} with a {confidence:.2f}% confidence."
)
建议之一是创建如下所示的预处理定义:
def preprocess_image(img_path, target_size=(180, 180)):
# Load image from file
img = tf.keras.utils.load_img(img_path, target_size=target_size)
# Convert the image to a NumPy array
img_array = tf.keras.utils.img_to_array(img)
# Normalize pixel values to the range [0, 1]
img_array = normalize_image(img_array)
# Apply data augmentation (optional, for training data)
# img_array = augment_image(img_array)
# Resize the image to match the expected input size of the model
img_array = resize_image(img_array, target_size)
# Convert the image to grayscale (optional)
# img_array = convert_to_grayscale(img_array)
# Apply Gaussian blur (optional)
# img_array = apply_gaussian_blur(img_array, sigma=1.0)
# Crop the image to focus on the region of interest (optional)
# img_array = crop_image(img_array, crop_size=0.8)
# Expand the dimensions to create a batch of size 1
img_array = tf.expand_dims(img_array, 0)
return img_array
def normalize_image(img):
return img / 255.0
def resize_image(img, target_size):
return tf.image.resize(img, target_size)
def augment_image(img):
img = tf.image.random_flip_left_right(img)
img = tf.image.random_flip_up_down(img)
# Add more augmentations as needed
return img
def convert_to_grayscale(img):
return tf.image.rgb_to_grayscale(img)
def apply_gaussian_blur(img, sigma=1.0):
return tf.image.gaussian_filter2d(img, sigma=sigma)
def crop_image(img, crop_size):
return tf.image.central_crop(img, crop_size)
# Example usage:
#preprocessed_image = preprocess_image(test_image_path)
预处理后,您可以检查模型结构并应用一些正则化技术,例如dropout,权重正则化等。您还可以测试调整权重和偏差初始化方法。