Skip to content

Instantly share code, notes, and snippets.

@qiaoxu123
Last active June 15, 2025 12:07
Show Gist options
  • Select an option

  • Save qiaoxu123/d35d35414df45158ac06699e9fea13cf to your computer and use it in GitHub Desktop.

Select an option

Save qiaoxu123/d35d35414df45158ac06699e9fea13cf to your computer and use it in GitHub Desktop.
手写数字识别
import torch
import torch.nn as nn
import torch.nn.functional as F
class MyModel(nn.Module):
def __init__(self, num_classes=10):
super(SequentialModel, self).__init__()
# 特征提取
self.features = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=3, padding=0), # 输入通道数为1,输出通道数为32,卷积核大小为3x3
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2), # 池化层,池化核大小为2x2
nn.Conv2d(32, 64, kernel_size=3, padding=0), # 输入通道数为32,输出通道数为64,卷积核大小为3x3
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2), # 池化层,池化核大小为2x2
)
# 分类网络
self.classifier = nn.Sequential(
nn.Flatten(), # 将特征图展平成一维向量
nn.Linear(64 * 5 * 5, 64), # 全连接层,输入节点数64*5*5,输出节点数64
nn.ReLU(inplace=True),
nn.Linear(64, num_classes) # 全连接层,输入节点数64,输出节点数num_classes
)
def forward(self, x):
x = self.features(x)
x = self.classifier(x)
return x
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset) # 训练集的大小,一共60000张图片
num_batches = len(dataloader) # 批次数目,1875(60000/32)
train_loss, train_acc = 0, 0 # 初始化训练损失和正确率
for X, y in dataloader: # 获取图片及其标签
X, y = X.to(choose_device()), y.to(choose_device()) # 如果可以使用GPU的话,将数据移到GPU上,否则在CPU上
# 计算预测误差
pred = model(X) # 网络输出
loss = loss_fn(pred, y) # 计算网络输出和真实值之间的差距,targets为真实值,计算二者差值即为损失,注意二者顺序
# 反向传播
optimizer.zero_grad() # grad属性归零
loss.backward() # 反向传播
optimizer.step() # 梯度更新
# 记录acc与loss
train_acc += (pred.argmax(1) == y).type(torch.float).sum().item() # 这行代码很漂亮且很关键,下面详细分析
train_loss += loss.item()
train_acc /= size
train_loss /= num_batches
return train_acc, train_loss
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset) # 测试集的大小,一共10000张图片
num_batches = len(dataloader) # 批次数目,313(10000/32=312.5,向上取整)
test_loss, test_acc = 0, 0
# 当不进行训练时,停止梯度更新,节省计算内存消耗
with torch.no_grad():
for imgs, target in dataloader:
imgs, target = imgs.to(choose_device()), target.to(choose_device())
# 计算loss
target_pred = model(imgs)
loss = loss_fn(target_pred, target)
test_loss += loss.item()
test_acc += (target_pred.argmax(1) == target).type(torch.float).sum().item()
test_acc /= size
test_loss /= num_batches
return test_acc, test_loss
# 设置超参数
epochs = 20
loss_function = nn.CrossEntropyLoss()
learning_rate = 0.1
opt = torch.optim.SGD(model.parameters(), lr=learning_rate)
train_loss = []
train_acc = []
test_loss = []
test_acc = []
# 开始训练
for epoch in range(epochs):
model.train()
epoch_train_acc, epoch_train_loss = train(train_dl, model, loss_function, opt)
model.eval()
epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_function)
train_acc.append(epoch_train_acc)
train_loss.append(epoch_train_loss)
test_acc.append(epoch_test_acc)
test_loss.append(epoch_test_loss)
template = ('Epoch:{:2d}, Train_acc:{:.1f}%, Train_loss:{:.3f}, Test_acc:{:.1f}%,Test_loss:{:.3f}')
print(template.format(epoch + 1, epoch_train_acc * 100, epoch_train_loss, epoch_test_acc * 100, epoch_test_loss))
print('Done')
import matplotlib.pyplot as plt
# 隐藏警告
import warnings
warnings.filterwarnings("ignore") # 忽略警告信息
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
plt.rcParams['figure.dpi'] = 100 # 分辨率
epochs_range = range(epochs)
plt.figure(figsize=(12, 3))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, train_acc, label='Training Accuracy')
plt.plot(epochs_range, test_acc, label='Test Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_loss, label='Training Loss')
plt.plot(epochs_range, test_loss, label='Test Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.transforms import functional as TF
import tkinter as tk
from PIL import Image, ImageDraw, ImageOps
import numpy as np
# 设备设置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = "lenet5_mnist.pth"
# 定义LeNet-5模型
class LeNet5(nn.Module):
def __init__(self):
super(LeNet5, self).__init__()
self.conv1 = nn.Conv2d(1,6,5)
self.conv2 = nn.Conv2d(6,16,5)
self.fc1 = nn.Linear(256,120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84,10)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), 2)
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(-1, 256)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
# 实例化模型
model = LeNet5().to(device)
# 如果模型文件存在,则加载
if os.path.exists(model_path):
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()
print("模型已加载,无需重新训练。")
else:
# 数据准备
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
train_dataset = datasets.MNIST('', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('', train=False, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)
# 优化器与损失函数
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
# 训练模型
print("开始训练模型...")
epochs = 5
for epoch in range(epochs):
running_loss = 0.0
model.train()
for inputs, labels in train_loader:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')
# 保存模型
torch.save(model.state_dict(), model_path)
print("模型已训练并保存。")
# 测试准确率
model.eval()
correct = 0
total = 0
with torch.no_grad():
for images, labels in test_loader:
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(f'测试准确率: {100 * correct / total:.2f}%')
# 可视化手写识别(Tkinter)
def predict_digit(img):
img = img.resize((28, 28)).convert('L')
img = ImageOps.invert(img)
img = TF.to_tensor(img).unsqueeze(0)
img = TF.normalize(img, [0.1307], [0.3081])
img = img.to(device)
with torch.no_grad():
output = model(img)
pred = torch.argmax(output, dim=1)
return pred.item()
# 手写板类
class App(tk.Tk):
def __init__(self):
super().__init__()
self.title("手写数字识别")
self.canvas = tk.Canvas(self, width=200, height=200, bg="white")
self.canvas.pack()
self.image = Image.new("RGB", (200, 200), "white")
self.draw = ImageDraw.Draw(self.image)
self.canvas.bind("<B1-Motion>", self.paint)
tk.Button(self, text="识别", command=self.recognize).pack()
tk.Button(self, text="清除", command=self.clear).pack()
self.result = tk.Label(self, text="", font=("Helvetica", 20))
self.result.pack()
def paint(self, event):
x, y = event.x, event.y
r = 8
self.canvas.create_oval(x - r, y - r, x + r, y + r, fill='black')
self.draw.ellipse([x - r, y - r, x + r, y + r], fill='black')
def recognize(self):
digit = predict_digit(self.image)
self.result.config(text=f"识别结果:{digit}")
def clear(self):
self.canvas.delete("all")
self.draw.rectangle([0, 0, 200, 200], fill="white")
self.result.config(text="")
# 启动GUI
App().mainloop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment