保留 HTML/XML 标签结构,仅翻译文本内容,避免破坏富文本格式。采用「HTML 解析 → 文本提取 → 批量翻译 → 回填」的流程。
百度翻译集成方案:富文本内容翻译系统
HTML 解析 + 百度翻译 API 集成
文件结构
app/
├── Controller/
│ └── TranslationController.php
├── Service/
│ ├── BaiduTranslator.php
│ └── HtmlParser.php
├── Job/
│ └── TranslateContentJob.php
├── Model/
│ └── Article.php
config/
│ └── autoload/
│ └── translation.php
代码实现
- 配置文件 config/autoload/translation.php
<?phpreturn ['baidu' => ['appid' => env('BAIDU_TRANSLATE_APPID', ''),'secret' => env('BAIDU_TRANSLATE_SECRET', ''),'api_url' => 'https://fanyi-api.baidu.com/api/trans/vip/fieldtranslate','chunk_size' => 30, // 每次翻译的文本段落数'max_length' => 5000, // 单次请求最大字节数'preserve_tags' => 'p,div,span,h1,h2,h3,h4,h5,h6,ul,ol,li,table,tr,td,th,img,video,a,strong,em,b,i,u','ignore_tags' => 'code,pre,script,style',],
];
- 模型 app/Model/Article.php
<?phpdeclare(strict_types=1);namespace App\Model;use Hyperf\DbConnection\Model\Model;/*** @property int $id * @property string $title * @property string $content * @property string $en_content * @property int $translation_status 0-未翻译 1-翻译中 2-翻译完成 3-翻译失败* @property \Carbon\Carbon $created_at * @property \Carbon\Carbon $updated_at */
class Article extends Model
{const STATUS_PENDING = 0;const STATUS_PROCESSING = 1;const STATUS_COMPLETED = 2;const STATUS_FAILED = 3;protected ?string $table = 'articles';protected array $fillable = ['title', 'content', 'en_content', 'translation_status'];protected array $casts = ['id' => 'integer','translation_status' => 'integer','created_at' => 'datetime','updated_at' => 'datetime'];
}
- HTML 解析器 app/Service/HtmlParser.php
<?phpdeclare(strict_types=1);namespace App\Service;use voku\helper\HtmlDomParser;class HtmlParser
{public function extractTextNodes(string $html): array{$dom = HtmlDomParser::str_get_html($html);$textNodes = [];// 遍历所有元素$dom->filter('*')->each(function ($node) use (&$textNodes) {// 跳过忽略标签$ignoreTags = explode(',', config('translation.baidu.ignore_tags', 'code,pre,script,style'));if (in_array($node->tag, $ignoreTags)) {return;}// 只处理没有子元素的文本节点if ($node->text() && !$node->hasChildren()) {$textNodes[] = ['node' => $node,'text' => $node->text(),'outer_html' => $node->outerhtml];}// 处理图片alt属性if ($node->tag === 'img' && $node->alt) {$textNodes[] = ['node' => $node,'text' => $node->alt,'is_attr' => 'alt','outer_html' => $node->outerhtml];}// 处理视频标题if ($node->tag === 'video' && $node->title) {$textNodes[] = ['node' => $node,'text' => $node->title,'is_attr' => 'title','outer_html' => $node->outerhtml];}});return $textNodes;}public function replaceTranslatedText(array $originalNodes, array $translatedTexts): string{$html = '';$index = 0;foreach ($originalNodes as $nodeInfo) {if (isset($nodeInfo['is_attr'])) {// 属性翻译$nodeInfo['node']->setAttribute($nodeInfo['is_attr'], $translatedTexts[$index] ?? $nodeInfo['text']);} else {// 文本节点翻译$nodeInfo['node']->innertext = htmlspecialchars($translatedTexts[$index] ?? $nodeInfo['text']);}$html .= $nodeInfo['node']->outerhtml;$index++;}return $html;}
}
- 百度翻译服务 app/Service/BaiduTranslator.php
<?phpdeclare(strict_types=1);namespace App\Service;use Hyperf\Di\Annotation\Inject;
use Hyperf\HttpServer\Contract\RequestInterface;
use Hyperf\Guzzle\ClientFactory;
use Hyperf\Utils\Arr;
use Hyperf\Utils\Codec\Json;class BaiduTranslator
{#[Inject]private ClientFactory $clientFactory;#[Inject]private HtmlParser $htmlParser;public function translate(string $html): string{// 1. 提取需要翻译的文本节点$textNodes = $this->htmlParser->extractTextNodes($html);if (empty($textNodes)) {return $html;}// 2. 提取纯文本内容用于翻译$texts = array_column($textNodes, 'text');// 3. 分批翻译文本$chunks = array_chunk($texts, config('translation.baidu.chunk_size', 30));$translatedChunks = [];foreach ($chunks as $chunk) {$translatedChunks[] = $this->translateChunk($chunk);}$translatedTexts = array_merge(...$translatedChunks);// 4. 将翻译结果替换回HTMLreturn $this->htmlParser->replaceTranslatedText($textNodes, $translatedTexts);}private function translateChunk(array $texts): array{$client = $this->clientFactory->create();$appId = config('translation.baidu.appid');$secret = config('translation.baidu.secret');// 生成签名$salt = time();$combinedText = implode("\n", $texts);$sign = md5($appId . $combinedText . $salt . $secret);// 构造请求参数$params = ['q' => $combinedText,'from' => 'zh','to' => 'en','appid' => $appId,'salt' => $salt,'sign' => $sign,'tag_handling' => '1', // 开启HTML标签处理'ignore_tags' => config('translation.baidu.ignore_tags', 'code,pre,script,style'),'preserve_tags' => config('translation.baidu.preserve_tags', 'p,div,span')];try {$response = $client->post(config('translation.baidu.api_url'), ['form_params' => $params,'timeout' => 15]);$result = Json::decode($response->getBody()->getContents());if (isset($result['error_code'])) {throw new \RuntimeException("百度翻译错误: {$result['error_code']} - {$result['error_msg']}");}$translated = $result['trans_result'] ?? [];return array_map(function ($item) {return $item['dst'] ?? '';}, $translated);} catch (\Throwable $e) {// 记录错误并返回原文logger()->error('百度翻译失败: ' . $e->getMessage(), ['texts' => $texts,'exception' => $e]);return $texts; // 返回原文作为降级处理}}
}
- 异步任务 app/Job/TranslateContentJob.php
declare(strict_types=1);namespace App\Job;use App\Model\Article;
use App\Service\BaiduTranslator;
use Hyperf\AsyncQueue\Job;class TranslateContentJob extends Job
{public int $articleId;public function __construct(int $articleId){$this->articleId = $articleId;}public function handle(){$article = Article::find($this->articleId);if (!$article) {logger()->warning("文章不存在: {$this->articleId}");return;}try {// 更新状态为翻译中$article->translation_status = Article::STATUS_PROCESSING;$article->save();// 执行翻译$translator = make(BaiduTranslator::class);$enContent = $translator->translate($article->content);// 更新结果$article->en_content = $enContent;$article->translation_status = Article::STATUS_COMPLETED;$article->save();} catch (\Throwable $e) {$article->translation_status = Article::STATUS_FAILED;$article->save();logger()->error("文章翻译失败: {$e->getMessage()}", ['article_id' => $this->articleId,'exception' => $e]);}}
}
- 控制器 app/Controller/TranslationController.php
<?phpdeclare(strict_types=1);namespace App\Controller;use App\Job\TranslateContentJob;
use App\Model\Article;
use App\Service\BaiduTranslator;
use Hyperf\Di\Annotation\Inject;
use Hyperf\HttpServer\Annotation\AutoController;
use Hyperf\HttpServer\Annotation\PostMapping;
use Hyperf\AsyncQueue\Driver\DriverFactory;
use Hyperf\Utils\ApplicationContext;/*** @AutoController()*/
class TranslationController
{#[Inject]private BaiduTranslator $translator;/*** 实时翻译接口* * @PostMapping(path="/translate")*/public function translate(){$html = $this->request->post('content', '');if (empty($html)) {return $this->response->json(['code' => 400,'message' => '内容不能为空']);}try {$start = microtime(true);$translatedContent = $this->translator->translate($html);$time = round(microtime(true) - $start, 3);return $this->response->json(['code' => 0,'message' => '翻译成功','data' => ['content' => $translatedContent,'time' => $time . 's']]);} catch (\Throwable $e) {return $this->response->json(['code' => 500,'message' => '翻译失败: ' . $e->getMessage()]);}}/*** 创建文章并异步翻译* * @PostMapping(path="/article")*/public function createArticle(){$title = $this->request->post('title', '');$content = $this->request->post('content', '');if (empty($title) || empty($content)) {return $this->response->json(['code' => 400,'message' => '标题和内容不能为空']);}try {// 创建文章$article = Article::create(['title' => $title,'content' => $content,'translation_status' => Article::STATUS_PENDING]);// 加入翻译队列$queue = ApplicationContext::getContainer()->get(DriverFactory::class)->get('default');$queue->push(new TranslateContentJob($article->id));return $this->response->json(['code' => 0,'message' => '文章创建成功,翻译任务已提交','data' => ['id' => $article->id,'translation_status' => $article->translation_status]]);} catch (\Throwable $e) {return $this->response->json(['code' => 500,'message' => '文章创建失败: ' . $e->getMessage()]);}}/*** 查询文章翻译状态* * @GetMapping(path="/article/status/{id}")*/public function getTranslationStatus($id){$article = Article::find($id);if (!$article) {return $this->response->json(['code' => 404,'message' => '文章不存在']);}$statusMap = [Article::STATUS_PENDING => '等待翻译',Article::STATUS_PROCESSING => '翻译中',Article::STATUS_COMPLETED => '翻译完成',Article::STATUS_FAILED => '翻译失败'];return $this->response->json(['code' => 0,'data' => ['id' => $article->id,'status' => $article->translation_status,'status_text' => $statusMap[$article->translation_status] ?? '未知状态','en_content' => $article->en_content]]);}
}
- 环境配置 .env
BAIDU_TRANSLATE_APPID=your_app_id
BAIDU_TRANSLATE_SECRET=your_secret_key
完结!