尝试将数据集导入张量流时出现错误。我用张量流训练了其他模型,所以我很确定张量流工作得很好。有谁知道这个错误的原因?如果可以告诉我如何修复它,请告诉我我正在使用这个数据集https://www.kaggle.com/datasets/atharvataras/dog-breeds-images
dataset=keras.utils.image_dataset_from_directory(".\dataset",seed=123,label_mode='categorical',image_size=(IMAGE_SIZE,IMAGE_SIZE),batch_size=BATCH_SIZE)
我得到这个错误
Output exceeds the size limit. Open the full output data in a text editor---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
Cell In[7], line 1
----> 1 dataset=keras.utils.image_dataset_from_directory(".\dataset",seed=123,label_mode='categorical',image_size=(IMAGE_SIZE,IMAGE_SIZE),batch_size=BATCH_SIZE)
File c:\Users\harsh\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\utils\image_dataset.py:210, in image_dataset_from_directory(directory, labels, label_mode, class_names, color_mode, batch_size, image_size, shuffle, seed, validation_split, subset, interpolation, follow_links, crop_to_aspect_ratio, **kwargs)
208 if seed is None:
209 seed = np.random.randint(1e6)
--> 210 image_paths, labels, class_names = dataset_utils.index_directory(
211 directory,
212 labels,
213 formats=ALLOWLIST_FORMATS,
214 class_names=class_names,
215 shuffle=shuffle,
216 seed=seed,
217 follow_links=follow_links,
218 )
220 if label_mode == "binary" and len(class_names) != 2:
221 raise ValueError(
222 'When passing `label_mode="binary"`, there must be exactly 2 '
223 f"class_names. Received: class_names={class_names}"
224 )
File c:\Users\harsh\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\utils\dataset_utils.py:543, in index_directory(directory, labels, formats, class_names, shuffle, seed, follow_links)
541 subdirs = []
...
--> 703 return _pywrap_file_io.IsDirectory(compat.path_to_bytes(path))
704 except errors.OpError:
705 return False
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8c in position 114: invalid start byte
整个错误输出
{
"name": "UnicodeDecodeError",
"message": "'utf-8' codec can't decode byte 0x8c in position 114: invalid start byte",
"stack": "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mUnicodeDecodeError\u001b[0m Traceback (most recent call last)\nCell \u001b[1;32mIn[7], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m dataset\u001b[39m=\u001b[39mkeras\u001b[39m.\u001b[39;49mutils\u001b[39m.\u001b[39;49mimage_dataset_from_directory(\u001b[39m\"\u001b[39;49m\u001b[39m.\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mdataset\u001b[39;49m\u001b[39m\"\u001b[39;49m,seed\u001b[39m=\u001b[39;49m\u001b[39m123\u001b[39;49m,label_mode\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mcategorical\u001b[39;49m\u001b[39m'\u001b[39;49m,image_size\u001b[39m=\u001b[39;49m(IMAGE_SIZE,IMAGE_SIZE),batch_size\u001b[39m=\u001b[39;49mBATCH_SIZE)\n\nFile \u001b[1;32mc:\\Users\\harsh\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\utils\\image_dataset.py:210\u001b[0m, in \u001b[0;36mimage_dataset_from_directory\u001b[1;34m(directory, labels, label_mode, class_names, color_mode, batch_size, image_size, shuffle, seed, validation_split, subset, interpolation, follow_links, crop_to_aspect_ratio, **kwargs)\u001b[0m\n\u001b[0;32m 208\u001b[0m \u001b[39mif\u001b[39;00m seed \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 209\u001b[0m seed \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mrandom\u001b[39m.\u001b[39mrandint(\u001b[39m1e6\u001b[39m)\n\u001b[1;32m--> 210\u001b[0m image_paths, labels, class_names \u001b[39m=\u001b[39m dataset_utils\u001b[39m.\u001b[39;49mindex_directory(\n\u001b[0;32m 211\u001b[0m directory,\n\u001b[0;32m 212\u001b[0m labels,\n\u001b[0;32m 213\u001b[0m formats\u001b[39m=\u001b[39;49mALLOWLIST_FORMATS,\n\u001b[0;32m 214\u001b[0m class_names\u001b[39m=\u001b[39;49mclass_names,\n\u001b[0;32m 215\u001b[0m shuffle\u001b[39m=\u001b[39;49mshuffle,\n\u001b[0;32m 216\u001b[0m seed\u001b[39m=\u001b[39;49mseed,\n\u001b[0;32m 217\u001b[0m follow_links\u001b[39m=\u001b[39;49mfollow_links,\n\u001b[0;32m 218\u001b[0m )\n\u001b[0;32m 220\u001b[0m \u001b[39mif\u001b[39;00m label_mode \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mbinary\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mand\u001b[39;00m \u001b[39mlen\u001b[39m(class_names) \u001b[39m!=\u001b[39m \u001b[39m2\u001b[39m:\n\u001b[0;32m 221\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 222\u001b[0m \u001b[39m'\u001b[39m\u001b[39mWhen passing `label_mode=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mbinary\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m`, there must be exactly 2 \u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m 223\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mclass_names. Received: class_names=\u001b[39m\u001b[39m{\u001b[39;00mclass_names\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m 224\u001b[0m )\n\nFile \u001b[1;32mc:\\Users\\harsh\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\utils\\dataset_utils.py:543\u001b[0m, in \u001b[0;36mindex_directory\u001b[1;34m(directory, labels, formats, class_names, shuffle, seed, follow_links)\u001b[0m\n\u001b[0;32m 541\u001b[0m subdirs \u001b[39m=\u001b[39m []\n\u001b[0;32m 542\u001b[0m \u001b[39mfor\u001b[39;00m subdir \u001b[39min\u001b[39;00m \u001b[39msorted\u001b[39m(tf\u001b[39m.\u001b[39mio\u001b[39m.\u001b[39mgfile\u001b[39m.\u001b[39mlistdir(directory)):\n\u001b[1;32m--> 543\u001b[0m \u001b[39mif\u001b[39;00m tf\u001b[39m.\u001b[39;49mio\u001b[39m.\u001b[39;49mgfile\u001b[39m.\u001b[39;49misdir(tf\u001b[39m.\u001b[39;49mio\u001b[39m.\u001b[39;49mgfile\u001b[39m.\u001b[39;49mjoin(directory, subdir)):\n\u001b[0;32m 544\u001b[0m \u001b[39mif\u001b[39;00m subdir\u001b[39m.\u001b[39mendswith(\u001b[39m\"\u001b[39m\u001b[39m/\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[0;32m 545\u001b[0m subdir \u001b[39m=\u001b[39m subdir[:\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]\n\nFile \u001b[1;32mc:\\Users\\harsh\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tensorflow\\python\\lib\\io\\file_io.py:703\u001b[0m, in \u001b[0;36mis_directory_v2\u001b[1;34m(path)\u001b[0m\n\u001b[0;32m 694\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Returns whether the path is a directory or not.\u001b[39;00m\n\u001b[0;32m 695\u001b[0m \n\u001b[0;32m 696\u001b[0m \u001b[39mArgs:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 700\u001b[0m \u001b[39m True, if the path is a directory; False otherwise\u001b[39;00m\n\u001b[0;32m 701\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 702\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 703\u001b[0m \u001b[39mreturn\u001b[39;00m _pywrap_file_io\u001b[39m.\u001b[39;49mIsDirectory(compat\u001b[39m.\u001b[39;49mpath_to_bytes(path))\n\u001b[0;32m 704\u001b[0m \u001b[39mexcept\u001b[39;00m errors\u001b[39m.\u001b[39mOpError:\n\u001b[0;32m 705\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mFalse\u001b[39;00m\n\n\u001b[1;31mUnicodeDecodeError\u001b[0m: 'utf-8' codec can't decode byte 0x8c in position 114: invalid start byte"
}
我对此很陌生,我将不胜感激。
我试着寻找其他有类似问题的人。我唯一能找到的是人们在导入 csv 时遇到同样的错误,他们通常通过将编码类型从 UTF-8 更改为其他内容来修复它,但我认为我们不能在使用 keras.preprocessing 时做到这一点,如果我们可以请告诉我如何去做以及如何找出适用于该数据集的格式。