我的练习是训练 10 个感知器来识别数字 (0 - 9)。每个感知器应该学习一个数字。作为训练数据,我创建了 30 张图像 (5x7 bmp)。每个数字有 3 个变体。
我有一个感知器类:
import numpy as np
def unit_step_func(x):
return np.where(x > 0, 1, 0)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
class Perceptron:
def __init__(self, learning_rate=0.01, n_iters=1000):
self.lr = learning_rate
self.n_iters = n_iters
self.activation_func = unit_step_func
self.weights = None
self.bias = None
#self.best_weights = None
#self.best_bias = None
#self.best_error = float('inf')
def fit(self, X, y):
n_samples, n_features = X.shape
self.weights = np.zeros(n_features)
self.bias = 0
#self.best_weights = self.weights.copy()
#self.best_bias = self.bias
for _ in range(self.n_iters):
for x_i, y_i in zip(X, y):
linear_output = np.dot(x_i, self.weights) + self.bias
y_predicted = self.activation_func(linear_output)
update = self.lr * (y_i - y_predicted)
self.weights += update * x_i
self.bias += update
#current_error = np.mean(np.abs(y - self.predict(X)))
#if current_error < self.best_error:
# self.best_weights = self.weights.copy()
# self.best_bias = self.bias
# self.best_error = current_error
def predict(self, X):
linear_output = np.dot(X, self.weights) + self.bias
y_predicted = self.activation_func(linear_output)
return y_predicted
我尝试了
unit_step_func
和sigmoid
、激活函数和落袋算法,看看是否有任何区别。我是菜鸟,所以我不确定这是否正确实施。
这就是我训练这些感知器的方式:
import numpy as np
from PIL import Image
from Perceptron import Perceptron
import os
def load_images_from_folder(folder, digit):
images = []
labels = []
for filename in os.listdir(folder):
img = Image.open(os.path.join(folder, filename))
if img is not None:
images.append(np.array(img).flatten())
label = 1 if filename.startswith(f"{digit}_") else 0
labels.append(label)
return np.array(images), np.array(labels)
digits_to_recognize = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
perceptrons = []
for digit_to_recognize in digits_to_recognize:
X, y = load_images_from_folder("data", digit_to_recognize)
p = Perceptron()
p.fit(X, y)
perceptrons.append(p)
简而言之:
训练数据文件名的格式为
digit
_variant
。正如我之前所说,每个数字都有 3 个变体,
所以对于数字
0
它是 0_0
, 0_1
, 0_2
,
对于数字
1
,它是:1_0
、1_1
、1_2
、
等等...
load_images_from_folder
函数加载30张图像并检查名称。如果名称的 digit
部分与 digit
输入相同,则它会在标签中附加 1
,以便感知器知道它是所需的数字。
我知道最好加载一次这些图像并将它们保存在一些
tuples
数组中,例如,但我现在不关心性能(以后也不会关心)。
对于数字
0
标签数组是 [1, 1, 1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
对于数字
1
标签数组是 [0,0,0, 1, 1, 1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
等等...
然后我使用这些数据训练 10 个感知器。
这个练习还需要某种 GUI 来让我画一个数字。我选择了
pygame
,可以用pyQT
,其实没关系。
这是代码,你可以跳过它,它不是那么重要(除了
on_rec_button
函数,但我会解决它):
import pygame
import sys
pygame.init()
cols, rows = 5, 7
square_size = 50
width, height = cols * square_size, (rows + 2) * square_size
screen = pygame.display.set_mode((width, height))
pygame.display.set_caption("Zad1")
rec_button_color = (0, 255, 0)
rec_button_rect = pygame.Rect(0, rows * square_size, width, square_size)
clear_button_color = (255, 255, 0)
clear_button_rect = pygame.Rect(0, (rows + 1) * square_size + 1, width, square_size)
mouse_pressed = False
drawing_matrix = np.zeros((rows, cols), dtype=int)
def color_square(x, y):
col = x // square_size
row = y // square_size
if 0 <= row < rows and 0 <= col < cols:
drawing_matrix[row, col] = 1
def draw_button(color, rect):
pygame.draw.rect(screen, color, rect)
def on_rec_button():
np_array_representation = drawing_matrix.flatten()
for digit_to_recognize in digits_to_recognize:
p = perceptrons[digit_to_recognize]
predicted_number = p.predict(np_array_representation)
if predicted_number == digit_to_recognize:
print(f"Image has been recognized as number {digit_to_recognize}")
def on_clear_button():
drawing_matrix.fill(0)
while True:
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
sys.exit()
elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 3:
mouse_pressed = True
elif event.type == pygame.MOUSEBUTTONUP and event.button == 3:
mouse_pressed = False
elif event.type == pygame.MOUSEMOTION:
mouse_x, mouse_y = event.pos
if mouse_pressed:
color_square(mouse_x, mouse_y)
elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 1:
if rec_button_rect.collidepoint(event.pos):
on_rec_button()
if clear_button_rect.collidepoint(event.pos):
on_clear_button()
for i in range(rows):
for j in range(cols):
if drawing_matrix[i, j] == 1:
pygame.draw.rect(screen, (255, 0, 0), (j * square_size, i * square_size, square_size, square_size))
else:
pygame.draw.rect(screen, (0, 0, 0), (j * square_size, i * square_size, square_size, square_size))
draw_button(rec_button_color, rec_button_rect)
draw_button(clear_button_color, clear_button_rect)
pygame.display.flip()
所以,现在我运行应用程序,绘制数字
3
,然后单击运行on_rec_button
功能的绿色按钮,我希望看到Image has been recognized as number 3
,但我得到了Image has been recognized as number 0
。
这是我画的:
这些是训练数据:
由于练习中所需的分辨率
5x7
,这些非常小。
当我画数字
1
时,我得到2个结果:
Image has been recognized as number 0
Image has been recognized as number 1
我应该怎样做才能让它按照我想要的方式工作?我不希望它 100% 准确,但我想它可能会更好。
我认为错误出在
on_rec_button
函数中。 p.predict(array_representation)
调用将为您提供 0 到 1 之间的数字,这与 predicted_number
没有可比性。相反,对于每个 digit_to_recognize
,您应该计算所有感知器的 predict
输出,并根据最高值进行最终猜测,因为您在训练样本中将标签设置为 1 以正确识别给定数字。因此,在我看来,使用 sigmoid
似乎是比 unit_step_func
更好的选择,以避免处理关系。