在 tensorflow 中导入图像数据集时出错

问题描述 投票:0回答:0

尝试将数据集导入张量流时出现错误。我用张量流训练了其他模型,所以我很确定张量流工作得很好。有谁知道这个错误的原因?如果可以告诉我如何修复它,请告诉我我正在使用这个数据集https://www.kaggle.com/datasets/atharvataras/dog-breeds-images

dataset=keras.utils.image_dataset_from_directory(".\dataset",seed=123,label_mode='categorical',image_size=(IMAGE_SIZE,IMAGE_SIZE),batch_size=BATCH_SIZE)

我得到这个错误

Output exceeds the size limit. Open the full output data in a text editor---------------------------------------------------------------------------
UnicodeDecodeError                        Traceback (most recent call last)
Cell In[7], line 1
----> 1 dataset=keras.utils.image_dataset_from_directory(".\dataset",seed=123,label_mode='categorical',image_size=(IMAGE_SIZE,IMAGE_SIZE),batch_size=BATCH_SIZE)

File c:\Users\harsh\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\utils\image_dataset.py:210, in image_dataset_from_directory(directory, labels, label_mode, class_names, color_mode, batch_size, image_size, shuffle, seed, validation_split, subset, interpolation, follow_links, crop_to_aspect_ratio, **kwargs)
    208 if seed is None:
    209     seed = np.random.randint(1e6)
--> 210 image_paths, labels, class_names = dataset_utils.index_directory(
    211     directory,
    212     labels,
    213     formats=ALLOWLIST_FORMATS,
    214     class_names=class_names,
    215     shuffle=shuffle,
    216     seed=seed,
    217     follow_links=follow_links,
    218 )
    220 if label_mode == "binary" and len(class_names) != 2:
    221     raise ValueError(
    222         'When passing `label_mode="binary"`, there must be exactly 2 '
    223         f"class_names. Received: class_names={class_names}"
    224     )

File c:\Users\harsh\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\utils\dataset_utils.py:543, in index_directory(directory, labels, formats, class_names, shuffle, seed, follow_links)
    541 subdirs = []
...
--> 703   return _pywrap_file_io.IsDirectory(compat.path_to_bytes(path))
    704 except errors.OpError:
    705   return False

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8c in position 114: invalid start byte

整个错误输出

{
    "name": "UnicodeDecodeError",
    "message": "'utf-8' codec can't decode byte 0x8c in position 114: invalid start byte",
    "stack": "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mUnicodeDecodeError\u001b[0m                        Traceback (most recent call last)\nCell \u001b[1;32mIn[7], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m dataset\u001b[39m=\u001b[39mkeras\u001b[39m.\u001b[39;49mutils\u001b[39m.\u001b[39;49mimage_dataset_from_directory(\u001b[39m\"\u001b[39;49m\u001b[39m.\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mdataset\u001b[39;49m\u001b[39m\"\u001b[39;49m,seed\u001b[39m=\u001b[39;49m\u001b[39m123\u001b[39;49m,label_mode\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mcategorical\u001b[39;49m\u001b[39m'\u001b[39;49m,image_size\u001b[39m=\u001b[39;49m(IMAGE_SIZE,IMAGE_SIZE),batch_size\u001b[39m=\u001b[39;49mBATCH_SIZE)\n\nFile \u001b[1;32mc:\\Users\\harsh\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\utils\\image_dataset.py:210\u001b[0m, in \u001b[0;36mimage_dataset_from_directory\u001b[1;34m(directory, labels, label_mode, class_names, color_mode, batch_size, image_size, shuffle, seed, validation_split, subset, interpolation, follow_links, crop_to_aspect_ratio, **kwargs)\u001b[0m\n\u001b[0;32m    208\u001b[0m \u001b[39mif\u001b[39;00m seed \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m    209\u001b[0m     seed \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mrandom\u001b[39m.\u001b[39mrandint(\u001b[39m1e6\u001b[39m)\n\u001b[1;32m--> 210\u001b[0m image_paths, labels, class_names \u001b[39m=\u001b[39m dataset_utils\u001b[39m.\u001b[39;49mindex_directory(\n\u001b[0;32m    211\u001b[0m     directory,\n\u001b[0;32m    212\u001b[0m     labels,\n\u001b[0;32m    213\u001b[0m     formats\u001b[39m=\u001b[39;49mALLOWLIST_FORMATS,\n\u001b[0;32m    214\u001b[0m     class_names\u001b[39m=\u001b[39;49mclass_names,\n\u001b[0;32m    215\u001b[0m     shuffle\u001b[39m=\u001b[39;49mshuffle,\n\u001b[0;32m    216\u001b[0m     seed\u001b[39m=\u001b[39;49mseed,\n\u001b[0;32m    217\u001b[0m     follow_links\u001b[39m=\u001b[39;49mfollow_links,\n\u001b[0;32m    218\u001b[0m )\n\u001b[0;32m    220\u001b[0m \u001b[39mif\u001b[39;00m label_mode \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mbinary\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mand\u001b[39;00m \u001b[39mlen\u001b[39m(class_names) \u001b[39m!=\u001b[39m \u001b[39m2\u001b[39m:\n\u001b[0;32m    221\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m    222\u001b[0m         \u001b[39m'\u001b[39m\u001b[39mWhen passing `label_mode=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mbinary\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m`, there must be exactly 2 \u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m    223\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mclass_names. Received: class_names=\u001b[39m\u001b[39m{\u001b[39;00mclass_names\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m    224\u001b[0m     )\n\nFile \u001b[1;32mc:\\Users\\harsh\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\utils\\dataset_utils.py:543\u001b[0m, in \u001b[0;36mindex_directory\u001b[1;34m(directory, labels, formats, class_names, shuffle, seed, follow_links)\u001b[0m\n\u001b[0;32m    541\u001b[0m subdirs \u001b[39m=\u001b[39m []\n\u001b[0;32m    542\u001b[0m \u001b[39mfor\u001b[39;00m subdir \u001b[39min\u001b[39;00m \u001b[39msorted\u001b[39m(tf\u001b[39m.\u001b[39mio\u001b[39m.\u001b[39mgfile\u001b[39m.\u001b[39mlistdir(directory)):\n\u001b[1;32m--> 543\u001b[0m     \u001b[39mif\u001b[39;00m tf\u001b[39m.\u001b[39;49mio\u001b[39m.\u001b[39;49mgfile\u001b[39m.\u001b[39;49misdir(tf\u001b[39m.\u001b[39;49mio\u001b[39m.\u001b[39;49mgfile\u001b[39m.\u001b[39;49mjoin(directory, subdir)):\n\u001b[0;32m    544\u001b[0m         \u001b[39mif\u001b[39;00m subdir\u001b[39m.\u001b[39mendswith(\u001b[39m\"\u001b[39m\u001b[39m/\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[0;32m    545\u001b[0m             subdir \u001b[39m=\u001b[39m subdir[:\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]\n\nFile \u001b[1;32mc:\\Users\\harsh\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tensorflow\\python\\lib\\io\\file_io.py:703\u001b[0m, in \u001b[0;36mis_directory_v2\u001b[1;34m(path)\u001b[0m\n\u001b[0;32m    694\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Returns whether the path is a directory or not.\u001b[39;00m\n\u001b[0;32m    695\u001b[0m \n\u001b[0;32m    696\u001b[0m \u001b[39mArgs:\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    700\u001b[0m \u001b[39m  True, if the path is a directory; False otherwise\u001b[39;00m\n\u001b[0;32m    701\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m    702\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 703\u001b[0m   \u001b[39mreturn\u001b[39;00m _pywrap_file_io\u001b[39m.\u001b[39;49mIsDirectory(compat\u001b[39m.\u001b[39;49mpath_to_bytes(path))\n\u001b[0;32m    704\u001b[0m \u001b[39mexcept\u001b[39;00m errors\u001b[39m.\u001b[39mOpError:\n\u001b[0;32m    705\u001b[0m   \u001b[39mreturn\u001b[39;00m \u001b[39mFalse\u001b[39;00m\n\n\u001b[1;31mUnicodeDecodeError\u001b[0m: 'utf-8' codec can't decode byte 0x8c in position 114: invalid start byte"
}

我对此很陌生,我将不胜感激。

我试着寻找其他有类似问题的人。我唯一能找到的是人们在导入 csv 时遇到同样的错误,他们通常通过将编码类型从 UTF-8 更改为其他内容来修复它,但我认为我们不能在使用 keras.preprocessing 时做到这一点,如果我们可以请告诉我如何去做以及如何找出适用于该数据集的格式。

tensorflow machine-learning keras utf-8 tensorflow-datasets
© www.soinside.com 2019 - 2024. All rights reserved.