我尝试为图像数据集创建一个CNN网络,该数据集包含20个文件夹中总共2280张图像(每个文件夹包含114张图像)。我已经处理了图像,从文件夹中读取图像并创建了字典。但是,当我想提供整个数据集以将数据拆分为训练和测试时,我得到的标签大小不正确。图像数组的大小是正确的:(2280,56,56),但 all_labels 数组应该是 (2280,20),而我得到 (2280,56)
如果有人可以帮助我,那将非常感激。 提前谢谢您!
所以我的代码如下
def read_image(image_path):
# Read the image using OpenCV
image = cv2.imread(image_path)
# Crop the image
#image_crop = image[:, left_crop:image.shape[1] - right_crop]
#cv2.imshow("output", image_crop)
#cv2.waitKey()
# Resize the image
#image_resized = cv2.resize(image_crop, (width, height), interpolation=cv2.INTER_AREA)
image_resized = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Normalize the pixel values
image_resized = image_resized.astype('float32') / 255.0
#print(len(image_resized.shape))
# Print the size of the image resized
#print(f"Image resize size: {image_resized.shape}")
#cv2.namedWindow("output", cv2.WINDOW_NORMAL)
#cv2.imshow("output", image_resized)
#cv2.waitKey()
return image_resized
def process_folder(folder_path):
images = []
# Get a list of all files (images) in the folder
image_files = sorted([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))])
for image_file in image_files:
image_path = os.path.join(folder_path, image_file)
# Crop and resize the image
image_processed = read_image(image_path)
# Append to the list
images.append(image_processed)
# Convert the list of images to a numpy array
images_array = np.array(images)
return images_array
def create_folder_dictionary(root_folder_path):
folder_dictionary = {}
labels_data = {}
# Get a list of all folders in the root folder
folders = sorted([f for f in os.listdir(root_folder_path) if os.path.isdir(os.path.join(root_folder_path, f))])
for folder in folders:
folder_path = os.path.join(root_folder_path, folder)
# Process each folder and store the resulting array in the dictionary
folder_data = process_folder(folder_path)
#folder_dictionary[folder] = folder_data
if folder_data is not None:
folder_dictionary[folder] = folder_data
labels_data[folder] = folder_data
return folder_dictionary,labels_data
root_folder_path = r'C:\Users\sumit\Downloads\master thesis\ImageDataset'
result,labels_data = create_folder_dictionary(root_folder_path)
# Extract keys and values
folders = list(result.keys())
folder_data_list = list(result.values())
#print(folders)
#print(len(folders))
#print(len(folder_data_list))
# Print the total number of images
total_images = sum(len(images) for images in folder_data_list)
print(f"Total number of images: {total_images}")
folders_new = list(map(int, folders))
folders_arr = np.array(folders_new)
#print("labels", folders_arr.shape)
folders_data_list_arr = np.array(folder_data_list)
#print("images", folders_data_list_arr.shape[2])
all_images = np.concatenate(folder_data_list, axis=0)
print(all_images.shape)
# Create all_labels
all_labels = np.concatenate([np.full_like(data[:, 0], label) for label, data in zip(labels_data, folder_data_list)], axis=0)
print("Shape of all_labels:", all_labels.shape)
在这一行中:
all_labels = np.concatenate([np.full_like(data[:, 0], label) for label, data in zip(labels_data, folder_data_list)], axis=0)
将 np.full_like(data[:, 0] 替换为 np.full(data.shape[0] 如果仍然出现错误,请尝试用 int(label) 替换该行中的 label 所以将该行替换为
all_labels = np.concatenate([np.full(data.shape[0], int(label)) for label, data in zip(labels_data, folder_data_list)], axis=0)
应该可以。