import numpy as np
# Load the noisy train dataset
train_noisy = np.loadtxt('MNIST_train_noisy.txt')
# Load the original train dataset (without noise)
train = np.loadtxt('MNIST_train.csv', delimiter=',', skiprows=1)
# Extract the labels from the original train dataset
train_labels = train[:, 0]
# Normalize the noisy train dataset
train_noisy /= 255.0
# Extract the pixel values from the noisy train dataset
train_noisy_pixels = train_noisy[:, 1:]
# Compute the mean of the pixel values
mean = np.mean(train_noisy_pixels, axis=0)
# Subtract the mean from the pixel values to center the data
centered = train_noisy_pixels - mean
# Compute the covariance matrix of the centered data
covariance = np.dot(centered.T, centered) / centered.shape[0]
# Compute the eigenvectors and eigenvalues of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eigh(covariance)
# Sort the eigenvectors in descending order of eigenvalues
sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvectors = eigenvectors[:, sorted_indices]
# Compute the cumulative explained variance ratio
cumulative_variances = np.cumsum(eigenvalues[sorted_indices]) / np.sum(eigenvalues)
# Plot the cumulative explained variance ratio
import matplotlib.pyplot as plt
plt.plot(cumulative_variances)
plt.xlabel('Number of Principal Components')
plt.ylabel('Cumulative Explained Variance Ratio')
plt.show()
一旦我尝试使用以下代码可视化组件:
# Extract the first 10 principal components (eigenvectors)
pca_components = sorted_eigenvectors[:, :10]
# Reshape the first 10 principal components into 28x28 images
pca_images = pca_components.T.reshape((10,28, 28))
# Plot the first 10 principal components
for i in range(10):
plt.subplot(2, 5, i+1)
plt.imshow(pca_images[i], cmap='gray')
plt.axis('off')
plt.show()
然后它产生这个错误 无法将大小为 7830 的数组重塑为形状 (10,28,28)
请帮助我,告诉我我做错了什么,从早上开始就一直在努力解决这个问题