BERT 情感分析
一、 数据集加载与模型训练
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
import torch
import numpy as np
from sklearn.metrics import accuracy_score
mode_name_or_path = '/root/autodl-tmp/bert-base-uncased'# 1. 加载 SST-2 数据集
dataset = load_dataset("glue", "sst2")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased").to('cuda')
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model.save_pretrained(mode_name_or_path)
tokenizer.save_pretrained(mode_name_or_path)
# 2. 数据预处理(tokenization)
def preprocess(example):return tokenizer(example["sentence"], truncation=True, padding="max_length", max_length=128)encoded_dataset = dataset.map(preprocess, batched=True)
encoded_dataset = encoded_dataset.rename_column("label", "labels")
encoded_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])# 4. 训练参数
training_args = TrainingArguments(output_dir="./output",# evaluation_strategy="epoch",per_device_train_batch_size=16,per_device_eval_batch_size=32,num_train_epochs=1,logging_dir="./logs",
)# 5. 定义指标
def compute_metrics(p):preds = np.argmax(p.predictions, axis=1)return {"accuracy": accuracy_score(p.label_ids, preds)}print(encoded_dataset["train"])
# 6. 启动训练器
trainer = Trainer(model=model,args=training_args,train_dataset=encoded_dataset["train"],eval_dataset=encoded_dataset["validation"],compute_metrics=compute_metrics,
)trainer.train()
trainer.evaluate()
二、HUGGING FACE 量化
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
from analyze import *# 1. 加载训练后BERT模型和tokenizer
model_name = "./output/checkpoint-2105/"
tokenizer = AutoTokenizer.from_pretrained(model_name)
original_model = AutoModelForSequenceClassification.from_pretrained(model_name).to('cuda')# 2. 8-bit量化加载模型
quantized_model = AutoModelForSequenceClassification.from_pretrained(model_name,device_map="auto", # 自动分配到可用设备load_in_8bit=True, # 启用8-bit量化
)
- 比较模型大小
模型大小大幅度降低,降低了近3-4倍;
def print_model_size(model, model_name):param_size = 0for param in model.parameters():param_size += param.nelement() * param.element_size()buffer_size = 0for buffer in model.buffers():buffer_size += buffer.nelement() * buffer.element_size()size_all_mb = (param_size + buffer_size) / 1024**2print(f"{model_name} size: {size_all_mb:.3f}MB")print_model_size(original_model, "Original BERT")
print_model_size(quantized_model, "Quantized 8-bit BERT")
#Original BERT size: 417.655MB
#Quantized 8-bit BERT size: 127.269MB
- 模型精度比较
在大小降低的同时,在验证集上的精度也大幅度降低;
def evaluate(model, dataset, labels):model.eval()preds = []with torch.no_grad():for i in range(0, len(dataset), 32): # batch size = 32batch = dataset[i:i + 32]input_ids = batch["input_ids"].to(model.device)attention_mask = batch["attention_mask"].to(model.device)outputs = model(input_ids=input_ids, attention_mask=attention_mask)logits = outputs.logitsbatch_preds = torch.argmax(logits, dim=1).cpu()preds.extend(batch_preds.tolist())correct = sum([int(p == t) for p, t in zip(preds, labels)])acc = correct / len(labels)return acc
def preprocess(example):return tokenizer(example["sentence"], truncation=True, padding="max_length", max_length=128)from datasets import load_dataset
dataset = load_dataset("glue", "sst2")
val_dataset = dataset["validation"]encoded_val_dataset = val_dataset.map(preprocess, batched=True)
encoded_val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask"])
labels = torch.tensor(val_dataset["label"])acc_fp32 = evaluate(original_model, encoded_val_dataset, labels)
acc_int8 = evaluate(quantized_model, encoded_val_dataset, labels)print(f"Original FP32 model accuracy: {acc_fp32:.4f}")
print(f"Quantized INT8 model accuracy: {acc_int8:.4f}")#Original FP32 model accuracy: 0.9300
#Quantized INT8 model accuracy: 0.5482
- 量化分析
👉 这种量化方式虽然简单,但存在一个明显的问题,这是方式是 HuggingFace 基于 bitsandbytes
库 实现的轻量量化方式,背后用的是:
bitsandbytes
的8-bit optimizers
- 权重是
FP16 或 INT8
存储,但不是 PyTorch 的量化张量(QTensor) - 目的是节省 显存 和 内存
🎯 为此,该方法无法通过调用tensor.q_scale()
, tensor.q_zero_point()
进行逐层分析
三、PYTORCH Eager Mode 量化
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer# 1. 加载原始模型
model_name = "./output/checkpoint-2105/"
model = AutoModelForSequenceClassification.from_pretrained(model_name).eval()
tokenizer = AutoTokenizer.from_pretrained(model_name)# 2. 将模型移到 CPU(Eager 模式量化推荐在 CPU 上执行)
model.to('cpu')# 3. 准备量化配置 (动态量化)
quantized_model = torch.quantization.quantize_dynamic(model,{torch.nn.Linear}, # 指定要量化的模块类型dtype=torch.qint8 # 量化类型
)
- 量化后大小比较,结果比
huggingface
量化方式大一点
# 大小比较
# Original BERT size: 417.655MB
# Quantized 8-bit BERT size: 127.269MB# 精度比较
# Original FP32 model accuracy: 0.9300
# Quantized INT8 model accuracy: 0.5482 不变
四、PYTORCH EXPORT 量化 (存在bug)
目前的这种量化方式还有bug存在,并且还找不到错误,希望有大哥帮助一下,主要的问题是模型可以成功量化,但是量化后的模型推理时会报错误,而且量化结果的大小也很奇怪:Original BERT size: 417.655MB ; Quantized 8-bit BERT size: 0.001MB
import torch
from torch.export import export
from torch.ao.quantization.quantize_pt2e import prepare_pt2e, convert_pt2e
from torch.ao.quantization.quantizer.xnnpack_quantizer import get_symmetric_quantization_config
from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
from transformers import AutoModelForSequenceClassification, AutoTokenizer
model_name = "./output/checkpoint-2105/"
# 1. 加载原始模型
model = AutoModelForSequenceClassification.from_pretrained(model_name).eval().cuda()
tokenizer = AutoTokenizer.from_pretrained(model_name)# 2. 准备样例输入
example = tokenizer("This movie is great!", return_tensors="pt", padding="max_length", max_length=128)
example = {k: v.cuda() for k, v in example.items()}
example_inputs = (example["input_ids"], example["attention_mask"])# 3. 导出模型
ep = export(model, args=example_inputs,dynamic_shapes=None)
gm = ep.graph_module# 4. 准备量化器
quantizer = X86InductorQuantizer()
quantizer.set_global(get_symmetric_quantization_config(is_per_channel=True))# 5. 插入 observer
prepared = prepare_pt2e(gm, quantizer)
quantity_model = convert_pt2e(prepared)# 报错信息forward() missing 203 required positional arguments: 'p_bert_embeddings_position_embeddings_weight', 'p_bert_embeddings_layernorm_weight', 'p_bert_embeddings_layernorm_bias',