正在研究用于分类的 Siamese LSTM 模型,进行该项目以了解其实现和工作原理。该模型在 CPU 上运行良好,但当我尝试将其转移到 GPU 时,会出现此错误
# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Define the Siamese LSTM model
class SiameseLSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(SiameseLSTM, self).__init__()
self.encoder = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, num_classes) # Output layer with num_classes units
def forward(self, x1, x2):
out1, _ = self.encoder(x1)
out1 = out1[:, -1, :]
out2, _ = self.encoder(x2)
out2 = out2[:, -1, :]
out1 = F.softmax(self.fc(out1), dim=1)
out2 = F.softmax(self.fc(out2), dim=1)
return out1, out2
class CSVDataset(Dataset):
def __init__(self, folder_path, transform=None):
self.folder_path = folder_path
self.transform = transform
self.file_paths = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]
self.labels = [self.extract_label(file) for file in self.file_paths] # Extract labels from file names
def __len__(self):
return len(self.file_paths)
def extract_label(self, file_name):
if 'wheat' in file_name:
return 1
elif 'mustard' in file_name:
return 2
elif 'sugarcane' in file_name:
return 3
else:
return 0 # If none of the keywords are present, assign label 0
def __getitem__(self, idx):
data = pd.read_csv(self.file_paths[idx])
if 'feature_index' in data.columns:
data.drop(columns=['feature_index'], inplace=True)
if self.transform:
data = self.transform(data)
label = self.labels[idx]
return data, label
def collate_fn(self, batch):
padded_batch = [seq.clone().detach() for seq, _ in batch]
padded_batch = pad_sequence(padded_batch, batch_first=True, padding_value=0.0)
labels = [label for _, label in batch]
return padded_batch, labels
# Custom transform function to convert data to PyTorch tensors
def transform_fn(data):
if 'date' in data.columns:
data.drop(columns=['date'], inplace=True)
data_tensor = torch.tensor(data.values, dtype=torch.float32)
return data_tensor
folder_path = r'E:\project_data\final data'
dataset = CSVDataset(folder_path, transform=transform_fn)
# Split the dataset into training and testing sets
train_indices, test_indices = train_test_split(list(range(len(dataset))), test_size=0.2, random_state=42)
# Create Subset objects for train and test datasets
train_dataset = Subset(dataset, train_indices)
test_dataset = Subset(dataset, test_indices)
# Define data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, collate_fn=dataset.collate_fn)
test_loader = DataLoader(test_dataset, batch_size=64, collate_fn=dataset.collate_fn)
# Hyperparameters
input_size = len(train_dataset[0][0])
hidden_size = 128
num_layers = 20
num_classes = len(set(dataset.labels))
# Model, loss, optimizer
model = SiameseLSTM(input_size, hidden_size, num_layers, num_classes).to(device)
model.encoder.flatten_parameters()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
model.train() # Set the model to training mode
running_loss = 0.0
for data, labels in train_loader:
data, labels = data.to(device), torch.tensor(labels).to(device) # Move data to the GPU
optimizer.zero_grad()
outputs1, outputs2 = model(data[:, 0, :, None], data[:, 1, :, None])
# Convert output probabilities to class indices
target1 = torch.argmax(outputs1, dim=1)
target2 = torch.argmax(outputs2, dim=1)
# Compute cross-entropy loss
loss = criterion(outputs1, target1) + criterion(outputs2, target2)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')
我收到此错误:
`E:\Anaconda\envs\torch\lib\site-packages\torch\nn\modules\rnn.py:878: UserWarning: RNN module weights are not part of single contiguous chunk of memory. This means they need to be compacted at every call, possibly greatly increasing memory usage. To compact weights again call flatten_parameters(). (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\cudnn\RNN.cpp:982.)
result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[24], line 106
104 data, labels = data.to(device), torch.tensor(labels).to(device) # Move data to the GPU
105 optimizer.zero_grad()
--> 106 outputs1, outputs2 = model(data[:, 0, :, None], data[:, 1, :, None])
108 # Convert output probabilities to class indices
109 target1 = torch.argmax(outputs1, dim=1)
File E:\Anaconda\envs\torch\lib\site-packages\torch\nn\modules\module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File E:\Anaconda\envs\torch\lib\site-packages\torch\nn\modules\module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
Cell In[24], line 12, in SiameseLSTM.forward(self, x1, x2)
11 def forward(self, x1, x2):
---> 12 out1, _ = self.encoder(x1)
13 out1 = out1[:, -1, :] # Get the last output
14 out2, _ = self.encoder(x2) # Use the same LSTM instance for the second input
File E:\Anaconda\envs\torch\lib\site-packages\torch\nn\modules\module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File E:\Anaconda\envs\torch\lib\site-packages\torch\nn\modules\module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File E:\Anaconda\envs\torch\lib\site-packages\torch\nn\modules\rnn.py:878, in LSTM.forward(self, input, hx)
875 hx = self.permute_hidden(hx, sorted_indices)
877 if batch_sizes is None:
--> 878 result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
879 self.dropout, self.training, self.bidirectional, self.batch_first)
880 else:
881 result = _VF.lstm(input, batch_sizes, hx, self._flat_weights, self.bias,
882 self.num_layers, self.dropout, self.training, self.bidirectional)
RuntimeError: shape '[512, 1]' is invalid for input of size 34304
我不明白为什么,因为它似乎在CPU上工作,但当我将其更改为GPU时出现问题。
尝试使用“flatten_parameters()”但没有用,并且还尝试通过相同的lstm实例传递两个输入(对于暹罗lstm分支)
这不是错误,只是警告。错误位于堆栈跟踪的底部:
RuntimeError: shape '[512, 1]' is invalid for input of size 34304
您输入的大小与模型权重的大小不匹配。