我正在尝试运行此代码,但是出现错误,但我不确定该错误是由于地址文件还是其他原因造成的。
数据集文件包含子宫颈的图像。
图像按其标记类别进行组织:Type_1、Type_2 和 Type_3
def im_multi(path):
try:
im_stats_im_ = Image.open(path)
return [path, {'size': im_stats_im_.size}]
except:
print(path)
return [path, {'size': [0,0]}]
def im_stats(im_stats_df):
im_stats_d = {}
p = Pool(cpu_count())
ret = p.map(im_multi, im_stats_df['path'])
for i in range(len(ret)):
im_stats_d[ret[i][0]] = ret[i][1]
im_stats_df['size'] = im_stats_df['path'].map(lambda x: ' '.join(str(s) for s in im_stats_d[x]['size']))
return im_stats_df
def get_im_cv2(path):
img = cv2.imread(path)
resized = cv2.resize(img, (32, 32), cv2.INTER_LINEAR) #use cv2.resize(img, (64, 64), cv2.INTER_LINEAR)
return [path, resized]
def normalize_image_features(paths):
imf_d = {}
p = Pool(cpu_count())
ret = p.map(get_im_cv2, paths)
for i in range(len(ret)):
imf_d[ret[i][0]] = ret[i][1]
ret = []
fdata = [imf_d[f] for f in paths]
fdata = np.array(fdata, dtype=np.uint8)
fdata = fdata.transpose((0, 3, 1, 2))
fdata = fdata.astype('float32')
fdata = fdata / 255
return fdata
#train = glob.glob('../input/train/**/*.jpg') + glob.glob('../input/additional/**/*.jpg')
train=glob.glob('D:\\Test cods\\KerasCNNClean\\data\\train\\Type_1\\*.jpg')
+glob.glob('D:\\Test cods\\KerasCNNClean\\data\\train\\Type_2\\*.jpg') +glob.glob('D:\\Test
cods\\KerasCNNClean\\data\\train\\Type_3\\*.jpg')
train = pd.DataFrame([[p.split('/')[3],p.split('/')[4],p] for p in train], columns =
['type','image','path'])[::5] #limit for Kaggle Demo
train = im_stats(train)
train = train[train['size'] != '0 0'].reset_index(drop=True) #corrupt images removed
print("Bad images removed")
print("loading test data")
train_data = normalize_image_features(train['path'])
print("test data loaded")
#np.save('train.npy', train_data, allow_pickle=True, fix_imports=True)
np.save(r'D:/Test cods/KerasCNNClean/npyTrain.train.npy', train_data, allow_pickle=True,
fix_imports=True)
le = LabelEncoder()
train_target = le.fit_transform(train['type'].values)
print(le.classes_)
#np.save('train_target.npy', train_target, allow_pickle=True, fix_imports=True)
np.save(r'D:/Test cods/KerasCNNClean/npyTrain.train_target.npy', train_target,
allow_pickle=True, fix_imports=True)
我找不到问题所在:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-46-4f89beac4d83> in <module>
1 test = glob.glob('D:\\Test cods\\KerasCNNClean\\data\\test\\*.jpg')
----> 2 test = pd.DataFrame([[p.split('/')[3],p] for p in test], columns = ['image','path']) #[::20] #limit for Kaggle Demo
3 print("loading train data")
4 test_data = normalize_image_features(test['path'])
5 np.save(r'D:/Test cods/KerasCNNClean/npyTest.test.npy', test_data, allow_pickle=True, fix_imports=True)
<ipython-input-46-4f89beac4d83> in <listcomp>(.0)
1 test = glob.glob('D:\\Test cods\\KerasCNNClean\\data\\test\\*.jpg')
----> 2 test = pd.DataFrame([[p.split('/')[3],p] for p in test], columns = ['image','path']) #[::20] #limit for Kaggle Demo
3 print("loading train data")
4 test_data = normalize_image_features(test['path'])
5 np.save(r'D:/Test cods/KerasCNNClean/npyTest.test.npy', test_data, allow_pickle=True, fix_imports=True)
IndexError: list index out of range
您遇到的错误 IndexError: list index out of range 表示在“test”DataFrame 创建过程中的列表理解中,拆分操作 (p.split('/')) 正在尝试访问不存在的索引。
要解决此问题,您需要检查“测试”列表中的路径结构并相应地调整拆分操作。路径似乎使用反斜杠 () 而不是正斜杠 (/)。