我正在创建 Bharathanatyam Mudras 检测模型。我有 3 个手印“Alapathmam”、“Aralam”和“Arthachandram”的数据集,大约有 400 张图像。
我试图通过观看 youtube 教程来做到这一点。但他们使用 Teachable Machine 来训练数据集。我做了同样的事情,它奏效了。但由于这是我的大学,我需要自己创建一个模型。我这样做了,通过观看一些教程。但是当我使用我的模型时,它只显示“Arthachandram”,无论我使用什么手印,它都会显示这些。我不知道为什么会收到此错误。这是我的代码。
数据采集代码:
import cv2
from cvzone.HandTrackingModule import HandDetector
import numpy as np
import math
import time
cap=cv2.VideoCapture(0)
detector = HandDetector(maxHands=1)
offset = 20
imgSize = 224
counter = 0
folder = 'Data/Arthachandram'
while True:
success, img = cap.read()
hands, img = detector.findHands(img)
if hands:
hand = hands[0]
x,y,w,h= hand['bbox']
imgWhite = np.ones((imgSize,imgSize,3),np.uint8)*255
imgCrop = img[y-offset:y+h+offset, x-offset:x+w+offset]
imgCropShape = imgCrop.shape
aspectRatio = h/w
if aspectRatio>1:
k = imgSize/h
wCal = math.ceil(k*w)
imgResize = cv2.resize(imgCrop, (wCal, imgSize))
imgResizeShape = imgCrop.shape
wGap = math.ceil((imgSize-wCal)/2)
imgWhite[:, wGap:wCal+wGap] = imgResize
else:
k = imgSize/w
hCal = math.ceil(k*h)
imgResize = cv2.resize(imgCrop, (imgSize, hCal))
imgResizeShape = imgCrop.shape
hGap = math.ceil((imgSize-hCal)/2)
imgWhite[hGap:hCal+hGap, :] = imgResize
cv2.imshow("ImageCrop", imgCrop)
cv2.imshow("ImageWhite", imgWhite)
cv2.imshow("Image", img)
key = cv2.waitKey(1)
if key == ord("s"):
counter +=1
cv2.imwrite(f'{folder}/Image_{time.time()}.jpg',imgWhite)
print(counter)
型号:
import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf
import pathlib
from tensorflow import keras
from keras import layers
from keras.models import Sequential
data_dir = pathlib.Path(r'C:\Users\abhim\Desktop\MCA Project\SIGN LANG TENSORFLOW\Data')
image_count = len(list(data_dir.glob('*/*.jpg')))
print(image_count)
batch_size = 32
img_height = 244
img_width = 244
train_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="training",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
val_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="validation",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
class_names = train_ds.class_names
print(class_names)
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
normalization_layer = layers.Rescaling(1./255)
normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]
# Notice the pixel values are now in `[0,1]`.
print(np.min(first_image), np.max(first_image))
num_classes = len(class_names)
model = Sequential([
layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(num_classes)
])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.summary()
epochs=10
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=epochs
)
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(epochs)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
model.save("model/h5")
labels_file_path = "labels.txt"
with open(labels_file_path, 'w') as file:
for class_index, class_name in enumerate(class_names):
file.write(f"{class_index}: {class_name}\n")
测试:
import cv2
from cvzone.HandTrackingModule import HandDetector
from cvzone.ClassificationModule import Classifier
import numpy as np
import math
cap = cv2.VideoCapture(0)
detector = HandDetector(maxHands=1)
classifier = Classifier("Model/model.h5", "Model/labels.txt")
offset = 20
imgSize = 224
folder = "Data/C"
counter = 0
labels = ["Alapathmam", "Aralam", "Arthachandram"]
while True:
success, img = cap.read()
imgOutput = img.copy()
hands, img = detector.findHands(img)
if hands:
hand = hands[0]
x, y, w, h = hand['bbox']
imgWhite = np.ones((imgSize, imgSize, 3), np.uint8) * 255
imgCrop = img[y - offset:y + h + offset, x - offset:x + w + offset]
imgCropShape = imgCrop.shape
aspectRatio = h / w
if aspectRatio > 1:
k = imgSize / h
wCal = math.ceil(k * w)
imgResize = cv2.resize(imgCrop, (wCal, imgSize))
imgResizeShape = imgResize.shape
wGap = math.ceil((imgSize - wCal) / 2)
imgWhite[:, wGap:wCal + wGap] = imgResize
prediction, index = classifier.getPrediction(imgWhite, draw=False)
print(prediction, index)
else:
k = imgSize / w
hCal = math.ceil(k * h)
imgResize = cv2.resize(imgCrop, (imgSize, hCal))
imgResizeShape = imgResize.shape
hGap = math.ceil((imgSize - hCal) / 2)
imgWhite[hGap:hCal + hGap, :] = imgResize
prediction, index = classifier.getPrediction(imgWhite, draw=False)
#cv2.rectangle(imgOutput, (x - offset, y - offset-50), (x+w+offset, y - offset-50+50), (255, 0, 255), cv2.FILLED)
cv2.putText(imgOutput, labels[index], (x, y -26), cv2.FONT_HERSHEY_COMPLEX, 0.7, (255,255,255),2)
cv2.rectangle(imgOutput, (x-offset, y-offset),(x + w+offset, y + h+offset), (0, 255, 0), 4)
cv2.imshow("ImageCrop", imgCrop)
cv2.imshow("ImageWhite", imgWhite)
cv2.imshow("Image", imgOutput)
cv2.waitKey(1)
我尝试了很多改变,但似乎没有任何效果。