from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from mlx_lm import load, generate
import uvicorn
import os

# 禁用 Flask 相关环境变量(避免环境污染)
os.environ.pop("FLASK_APP", None)
os.environ.pop("FLASK_ENV", None)

# 初始化纯 FastAPI 应用,确保无 Flask 混入
app = FastAPI(title="MLX 翻译服务", version="1.0")

# 预加载模型(只加载一次)
print("正在加载翻译模型...")
try:
    model, tokenizer = load("alexgusevski/HY-MT1.5-1.8B-q4-mlx")
    print("模型加载完成!")
except Exception as e:
    raise RuntimeError(f"模型加载失败:{str(e)}")


# 定义请求体模型
class TranslationRequest(BaseModel):
    source_term: str = "英文"
    target_term: str = "中文"
    source_text: str


# 翻译接口(异步封装同步函数,避免阻塞事件循环)
@app.post("/translate")
async def translate(request: TranslationRequest):
    try:
        # 构建提示词
        prompt = f"""{request.source_term} 翻译成 {request.target_term}
将以下文本翻译为{request.target_term},注意只需要输出翻译结果,不要额外解释:
{request.source_text}"""

        # 处理聊天模板
        if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None:
            messages = [{"role": "user", "content": prompt}]
            prompt = tokenizer.apply_chat_template(
                messages, tokenize=False, add_generation_prompt=True
            )

        # mlx_lm.generate 是同步函数,用 run_in_threadpool 避免阻塞
        import asyncio
        response = await asyncio.get_event_loop().run_in_executor(
            None,
            lambda: generate(
                model,
                tokenizer,
                prompt=prompt,
                verbose=False,
                max_tokens=1024,
            )
        )

        return {
            "success": True,
            "source_text": request.source_text,
            "target_text": response.strip(),
            "source_language": request.source_term,
            "target_language": request.target_term
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"翻译失败:{str(e)}")


# 健康检查接口
@app.get("/health")
async def health_check():
    return {"status": "healthy", "message": "翻译服务正常运行"}


# 启动服务
if __name__ == "__main__":
    uvicorn.run(
        app="main:app",  # 若文件名为其他(如 translate_api.py),改为 "translate_api:app"
        host="0.0.0.0",
        port=8000,
        reload=False,  # 关闭重载,避免异步冲突
        workers=1  # 单进程运行,适配 mlx 模型
    )
最后修改:2026 年 01 月 11 日 08 : 14 PM
如果觉得我的文章对你有用,请随意赞赏