本课题旨在设计并实现一个基于BP(反向传播)神经网络的英文字母识别系统,实现对手写或打印的26个英文字母(A-Z)的自动分类识别。项目首先对字母图像进行预处理(如灰度化、归一化、二值化和特征提取),随后将图像特征作为神经网络输入,通过训练BP网络识别字母类别。BP神经网络具有结构简单、可调性强的优点,通过调整隐含层数量、学习率和训练函数可有效提高分类准确率。本系统可应用于OCR(光学字符识别)、人机交互和教育辅助等领域。最终在 MATLAB 平台上完成网络构建、训练和测试,并通过混淆矩阵等方式评估识别性能。
clc
clear
close all%% 数据读取和预处理
% 定义文件路径
train_images_file = '英文字母数据集/emnist-letters-train-images-idx3-ubyte.gz';
train_labels_file = '英文字母数据集/emnist-letters-train-labels-idx1-ubyte.gz';
test_images_file = '英文字母数据集/emnist-letters-test-images-idx3-ubyte.gz';
test_labels_file = '英文字母数据集/emnist-letters-test-labels-idx1-ubyte.gz';% 解压文件
gunzip(train_images_file);
gunzip(train_labels_file);
gunzip(test_images_file);
gunzip(test_labels_file);% 读取数据
[train_images, train_labels] = readEMNIST(strrep(train_images_file, '.gz', ''),strrep(train_labels_file, '.gz', ''));
[test_images, test_labels] = readEMNIST(strrep(test_images_file, '.gz', ''),strrep(test_labels_file, '.gz', ''));
train_images = [train_images;test_images];
train_labels =[train_labels;test_labels];
%% 手工特征提取(修正版HOG + LBP)
fprintf('开始手工特征提取...\n');% HOG参数配置
hogCellSize = [4 4]; % 每个cell的大小
hogBlockSize = [2 2]; % 每个block包含的cell数
hogNumBins = 9; % 方向分箱数% 计算HOG特征维度
imgSize = [28 28];
cellsPerBlock = hogBlockSize(1)*hogBlockSize(2);
blocksPerImage = floor((imgSize./hogCellSize - hogBlockSize)./[1 1] + 1);
hogSize = prod(blocksPerImage) * cellsPerBlock * hogNumBins;% LBP参数配置
lbpRadius = 2;
lbpNeighbors = 8;
lbpSize = 59; % 对于8邻域uniform LBP固定为59维% 提取训练集特征
train_features = zeros(size(train_images,1), hogSize + lbpSize);
for i = 1:size(train_images,1)img = reshape(train_images(i,:), [28 28]);% HOG特征hog = extractHOGFeatures(img, 'CellSize', hogCellSize, ...'BlockSize', hogBlockSize, ...'NumBins', hogNumBins);% LBP特征lbp = extractLBPFeatures(img, 'Radius', lbpRadius, ...'NumNeighbors', lbpNeighbors);train_features(i,:) = [hog, lbp];% 显示进度if mod(i,1000) == 0fprintf('已处理 %d/%d 训练样本\n', i, size(train_images,1));end
end% 提取测试集特征
test_features = zeros(size(test_images,1), size(train_features,2));
for i = 1:size(test_images,1)img = reshape(test_images(i,:), [28 28]);hog = extractHOGFeatures(img, 'CellSize', hogCellSize, ...'BlockSize', hogBlockSize, ...'NumBins', hogNumBins);lbp = extractLBPFeatures(img, 'Radius', lbpRadius, ...'NumNeighbors', lbpNeighbors);test_features(i,:) = [hog, lbp];
end% 特征归一化
mu = mean(train_features);
sigma = std(train_features);
train_features = (train_features - mu) ./ sigma;
test_features = (test_features - mu) ./ sigma;%% 筛选大写字母A-Z (1-26)
uppercase_indices = find(train_labels >= 1 & train_labels <= 26);
train_features = train_features(uppercase_indices, :);
train_labels = train_labels(uppercase_indices);test_indices = find(test_labels >= 1 & test_labels <= 26);
test_features = test_features(test_indices, :);
test_labels = test_labels(test_indices);% 转换为one-hot编码
train_labels_onehot = full(ind2vec(train_labels', 26))';
test_labels_onehot = full(ind2vec(test_labels', 26))';%% 构建BP神经网络
inputSize = size(train_features,2);
net = patternnet([256 128]); % 输入 -> 256 -> 128 -> 26% 配置网络参数
net.inputs{1}.size = inputSize;
net.trainFcn = 'trainscg';
net.performFcn = 'crossentropy';
net.trainParam.epochs = 200;
net.trainParam.showCommandLine = true;
net.trainParam.max_fail = 15; % 早停
net.performParam.regularization = 0.01; % L2正则化% 数据划分
net.divideParam.trainRatio = 0.8;
net.divideParam.valRatio = 0.1;
net.divideParam.testRatio = 0.1;%% 训练网络
fprintf('开始训练BP神经网络(HOG+LBP特征)...\n');
[net, tr] = train(net, train_features', train_labels_onehot');%% 测试网络性能
test_output = net(test_features');
[~, predicted_labels] = max(test_output);
true_labels = test_labels';% 计算准确率
accuracy = sum(predicted_labels == true_labels) / numel(true_labels);
fprintf('整体测试集准确率: %.2f%%\n', accuracy*100);%% 计算并显示各字母准确率
letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
class_accuracies = zeros(1, 26);
confusion_mat = zeros(26, 26);
sample_counts = zeros(1, 26); % 记录每个字母的测试样本数% 预存每个字母的错误分析结果
top_errors = cell(1, 26); for i = 1:26idx = (true_labels == i);sample_counts(i) = sum(idx); % 统计当前字母的测试样本数if sample_counts(i) > 0correct = sum(predicted_labels(idx) == i);class_accuracies(i) = correct / sample_counts(i);% 填充混淆矩阵for j = 1:26confusion_mat(i,j) = sum(predicted_labels(idx) == j);end% 分析前3个错误分类[sorted_err, err_idx] = sort(confusion_mat(i,:), 'descend');error_list = {};for j = 1:26if err_idx(j) ~= i && sorted_err(j) > 0 && length(error_list) < 3error_list{end+1} = sprintf('%c(%.1f%%)',...letters(err_idx(j)),...100*sorted_err(j)/sample_counts(i));endendtop_errors{i} = error_list;elseclass_accuracies(i) = NaN;top_errors{i} = {};end
end%% 格式化输出结果(修正版)
fprintf('\n%-5s %-8s %-12s %s\n', '字母', '准确率', '测试样本数', '错误分析');
fprintf('--------------------------------------------\n');for i = 1:26if sample_counts(i) > 0% 安全处理错误分析显示if isempty(top_errors{i})err_str = '无错误样本';elseerr_str = strjoin(top_errors{i}, ', ');endfprintf('%-5c %-8.2f%% %-12d %s\n',...letters(i),...class_accuracies(i)*100,...sample_counts(i),...err_str);elsefprintf('%-5c %-8s %-12d (无测试样本)\n',...letters(i), 'N/A', 0);end
end%% 可视化展示(改进版)
% 创建结果表格
results = cell(26, 4);
for i = 1:26results{i,1} = letters(i);if sample_counts(i) > 0results{i,2} = sprintf('%.2f%%', class_accuracies(i)*100);results{i,3} = sample_counts(i);if isempty(top_errors{i})results{i,4} = '无错误';elseresults{i,4} = strjoin(top_errors{i}, ', ');endelseresults{i,2} = 'N/A';results{i,3} = 0;results{i,4} = '无样本';end
end% 显示表格
f = figure('Position', [100 100 700 500]);
t = uitable(f, 'Data', results,...'ColumnName', {'字母', '准确率', '样本数', '主要错误'},...'ColumnWidth', {50, 80, 80, 300},...'Position', [20 20 660 460]);
title('字母识别详细结果');% 2. 混淆矩阵(改进版)
figure;
imagesc(confusion_mat./sum(confusion_mat,2));
colormap(jet); colorbar; caxis([0 1]);
xticks(1:26); yticks(1:26);
xticklabels(cellstr(letters')); yticklabels(cellstr(letters'));
xlabel('预测标签'); ylabel('真实标签');
title('归一化混淆矩阵');% 3. 随机样本展示
figure('Position', [100, 100, 1000, 800]);
for i = 1:20idx = randi(length(test_indices));original_idx = test_indices(idx);img = reshape(test_images(original_idx,:), [28,28]);subplot(4,5,i);imshow(img);true_label = letters(test_labels(idx));pred_label = letters(predicted_labels(idx));if true_label == pred_labelcolor = 'g';elsecolor = 'r';endtitle(sprintf('True: %s\nPred: %s', true_label, pred_label), 'Color', color);
end%% 保存模型
save('emnist_hog_lbp_bp.mat', 'net', 'tr', 'class_accuracies');%% 辅助函数
function [images, labels] = readEMNIST(imageFile, labelFile)% 读取图像文件fid = fopen(imageFile, 'r', 'b');magicNum = fread(fid, 1, 'int32');numImages = fread(fid, 1, 'int32');numRows = fread(fid, 1, 'int32');numCols = fread(fid, 1, 'int32');images = fread(fid, [numCols*numRows, numImages], 'uint8');images = images';fclose(fid);% 读取标签文件fid = fopen(labelFile, 'r', 'b');magicNum = fread(fid, 1, 'int32');numLabels = fread(fid, 1, 'int32');labels = fread(fid, numLabels, 'uint8');fclose(fid);% 调整图像方向(EMNIST图像需要旋转和翻转)images = reshape(images, [size(images,1), numRows, numCols]);for i = 1:size(images,1)img = squeeze(images(i,:,:));img = imrotate(img, -90);img = flip(img, 2);images(i,:,:) = img;endimages = reshape(images, [size(images,1), numRows*numCols]);
end