TypeError:在 DataLoader 工作进程 0 中捕获 TypeError。TypeError:'KeyError' 对象不可迭代

问题描述 投票:0回答:0
from torchvision_starter.engine import train_one_epoch, evaluate
from torchvision_starter import utils
import multiprocessing
import time

n_cpu = multiprocessing.cpu_count()

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


_ = model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.2,
                                               verbose=True
                                              )
# Let's train for 10 epochs
num_epochs = 1

start = time.time()

for epoch in range(10, 10 + num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loaders['train'], device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the validation dataset
    evaluate(model, data_loaders['valid'], device=device)

stop = time.time()

print(f"\n\n{num_epochs} epochs in {stop - start} s ({(stop-start) / 3600:.2f} hrs)")

在我继续这部分之前,一切都很好。但是在我运行该部分之后,错误如下:

TypeError                                 Traceback (most recent call last) Cell In[5], line 39      35 start = time.time()      37 for epoch in range(10, 10 + num_epochs):      38     # train for one epoch, printing every 10 iterations ---> 39     train_one_epoch(model, optimizer, data_loaders['train'], device, epoch, print_freq=10)      40     # update the learning rate      41     lr_scheduler.step() File ~\Desktop\Object Detection and Segmentation\workspace-1655609281-1\home\torchvision_starter\engine.py:26, in train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq)      22     warmup_iters = min(1000, len(data_loader) - 1)      24     lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) ---> 26 for images, targets in metric_logger.log_every(data_loader, print_freq, header):      27     images = list(image.to(device) for image in images)      28     targets = [{k: v.to(device) for k, v in t.items()} for t in targets] File ~\Desktop\Object Detection and Segmentation\workspace-1655609281-1\home\torchvision_starter\utils.py:209, in MetricLogger.log_every(self, iterable, print_freq, header)     200     log_msg = self.delimiter.join([     201         header,     202         '[{0' + space_fmt + '}/{1}]',    (...)     206         'data: {data}'     207     ])     208 MB = 1024.0 * 1024.0 --> 209 for obj in iterable:     210     data_time.update(time.time() - end)     211     yield obj File ~\anaconda3\envs\dpl_U\lib\site-packages\torch\utils\data\dataloader.py:652, in _BaseDataLoaderIter.__next__(self)     649 if self._sampler_iter is None:     650     # TODO(https://github.com/pytorch/pytorch/issues/76750)     651     self._reset()  # type: ignore[call-arg] --> 652 data = self._next_data()     653 self._num_yielded += 1     654 if self._dataset_kind == _DatasetKind.Iterable and \     655         self._IterableDataset_len_called is not None and \     656         self._num_yielded > self._IterableDataset_len_called: File ~\anaconda3\envs\dpl_U\lib\site-packages\torch\utils\data\dataloader.py:1347, in _MultiProcessingDataLoaderIter._next_data(self)    1345 else:    1346     del self._task_info[idx] -> 1347     return self._process_data(data) File ~\anaconda3\envs\dpl_U\lib\site-packages\torch\utils\data\dataloader.py:1373, in _MultiProcessingDataLoaderIter._process_data(self, data)    1371 self._try_put_index()    1372 if isinstance(data, ExceptionWrapper): -> 1373     data.reraise()    1374 return data File ~\anaconda3\envs\dpl_U\lib\site-packages\torch\_utils.py:461, in ExceptionWrapper.reraise(self)     457 except TypeError:     458     # If the exception takes multiple arguments, don't try to     459     # instantiate since we don't know how to     460     raise RuntimeError(msg) from None --> 461 raise exception TypeError: Caught TypeError in DataLoader worker process 0. Original Traceback (most recent call last): File "C:\Users\cheng\anaconda3\envs\dpl_U\lib\site-packages\torch\utils\data\_utils\worker.py", line 302, in _worker_loop data = fetcher.fetch(index) File "C:\Users\cheng\anaconda3\envs\dpl_U\lib\site-packages\torch\utils\data\_utils\fetch.py", line 52, in fetch return self.collate_fn(data) File "C:\Users\cheng\Desktop\Object Detection and Segmentation\workspace-1655609281-1\home\torchvision_starter\utils.py", line 236, in collate_fn return tuple(zip(*batch)) TypeError: 'KeyError' object is not iterable

我尝试将 drop_last 添加到 helper.py 的函数中,例如:

data_loaders["train"] = torch.utils.data.DataLoader(
        train_data,
        batch_size=batch_size,
        sampler=train_sampler,
        num_workers=num_workers,
        collate_fn=utils.collate_fn,
        drop_last=True
    )

但是没用。顺便说一句,torch和torchvision是兼容的,并且可以使用Cuda。 我想知道如何解决它。

get_data_loaders 函数:

def get_data_loaders(
    folder, batch_size: int = 2, valid_size: float = 0.2, num_workers: int = -1, limit: int = -1, thinning: int = None
):
    """
    Create and returns the train_one_epoch, validation and test data loaders.

    :param foder: folder containing the dataset
    :param batch_size: size of the mini-batches
    :param valid_size: fraction of the dataset to use for validation. For example 0.2
                       means that 20% of the dataset will be used for validation
    :param num_workers: number of workers to use in the data loaders. Use -1 to mean
                        "use all my cores"
    :param limit: maximum number of data points to consider
    :param thinning: take every n-th frame, instead of all frames
    :return a dictionary with 3 keys: 'train_one_epoch', 'valid' and 'test' containing respectively the
            train_one_epoch, validation and test data loaders
    """

    if num_workers == -1:
        # Use all cores
        num_workers = multiprocessing.cpu_count()

    # We will fill this up later
    data_loaders = {"train": None, "valid": None, "test": None}

    # create 3 sets of data transforms: one for the training dataset,
    # containing data augmentation, one for the validation dataset
    # (without data augmentation) and one for the test set (again
    # without augmentation)
    data_transforms = {
        "train": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=True),
        "valid": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=False),
        "test": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=False),
    }

    # Create train and validation datasets
    train_data = UdacitySelfDrivingDataset(
        folder, 
        transform=data_transforms["train"], 
        train=True,
        thinning=thinning
    )
    
    # The validation dataset is a split from the train_one_epoch dataset, so we read
    # from the same folder, but we apply the transforms for validation
    valid_data = UdacitySelfDrivingDataset(
        folder, 
        transform=data_transforms["valid"], 
        train=True,
        thinning=thinning
    )

    # obtain training indices that will be used for validation
    n_tot = len(train_data)
    indices = torch.randperm(n_tot)

    # If requested, limit the number of data points to consider
    if limit > 0:
        indices = indices[:limit]
        n_tot = limit

    split = int(math.ceil(valid_size * n_tot))
    train_idx, valid_idx = indices[split:], indices[:split]

    # define samplers for obtaining training and validation batches
    train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
    valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx)  # =

    # prepare data loaders
    data_loaders["train"] = torch.utils.data.DataLoader(
        train_data,
        batch_size=batch_size,
        sampler=train_sampler,
        num_workers=num_workers,
        collate_fn=utils.collate_fn,
        drop_last=True
    )
    data_loaders["valid"] = torch.utils.data.DataLoader(
        valid_data,  # -
        batch_size=batch_size,  # -
        sampler=valid_sampler,  # -
        num_workers=num_workers,  # -
        collate_fn=utils.collate_fn,
        drop_last=True
    )

    # Now create the test data loader
    test_data = UdacitySelfDrivingDataset(
        folder, 
        transform=data_transforms["test"], 
        train=False,
        thinning=thinning
    )

    if limit > 0:
        indices = torch.arange(limit)
        test_sampler = torch.utils.data.SubsetRandomSampler(indices)
    else:
        test_sampler = None

    data_loaders["test"] = torch.utils.data.DataLoader(
        test_data, 
        batch_size=batch_size, 
        shuffle=False, 
        num_workers=num_workers, 
        sampler=test_sampler, 
        collate_fn=utils.collate_fn,
        drop_last=True
        # -
    )

    return data_loaders
torch keyerror torchvision
© www.soinside.com 2019 - 2024. All rights reserved.