Noonisy
试玩Yi-34B-Chat-4bits
2023-12-30
阅读:617

试玩Yi-34B-Chat-4bits


依赖

torch       2.1.2    
vllm        0.2.6    
xformers    0.0.23.post1

下载

先安装modelscope的python库
pip install modelscope
在modelscope下载模型
from modelscope.hub.snapshot_download import snapshot_download

model_dir = snapshot_download('01ai/Yi-34B-Chat-4bits', cache_dir='LLMs', revision='master', ignore_file_pattern='.bin')

安装VLLM库

pip install vllm

运行

python -m vllm.entrypoints.openai.api_server \
    --model Yi-34B-Chat-4bits/checkpoints/01ai/Yi-34B-Chat-4bits \
    --served-model-name 01ai/Yi-34B-Chat-4bits \
    --trust-remote-code \
    --max-model-len 2048 -q awq

测试

curl http://localhost:8000/v1/completions \
    -H "Content-Type: application/json" \
    -d '{
        "model": "01ai/Yi-34B-Chat-4bits",
        "prompt": "你是谁?",
        "max_tokens": 100,
        "temperature": 0
    }'
benchmark测试
python benchmark_throughput.py \
    --backend vllm \
    --input-len 128 \
    --output-len 512 \
    --model Yi-34B-Chat-4bits/checkpoints/01ai/Yi-34B-Chat-4bits \
    --trust-remote-code \
    --max-model-len 2048 -q awq --seed 1100 --num-prompts 100

Gradio测试

版本依赖
# openai==1.6.1
pip install openai -U
pip install gradio==3.41
简单的demo
chat.py
from openai import OpenAI
import gradio as gr

openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"

client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)


def predict(message, history):
    history_openai_format = [{"role": "system", "content": "你是一个靠谱的 AI 助手,尽量详细的解答用户的提问。"}]
    for human, assistant in history:
        history_openai_format.append({"role": "user", "content": human})
        history_openai_format.append({"role": "assistant", "content": assistant})
    history_openai_format.append({"role": "user", "content": message})

    stream = client.chat.completions.create(
        model='01ai/Yi-34B-Chat-4bits',
        messages=history_openai_format,
        temperature=0,
        stream=True,
        extra_body={'repetition_penalty': 1, 'stop_token_ids': [7]}  # 该模型需要手动设置stop ids=7,不然模型输出不停止
    )

    partial_message = ""
    for chunk in stream:
        partial_message += (chunk.choices[0].delta.content or "")
        yield partial_message


if __name__ == '__main__':
    gr.ChatInterface(predict).queue().launch()
旧版本openai
# openai==0.28.1
pip install openai==0.28.1
pip install gradio==3.41
简单的demo
chat.py
import openai
import gradio as gr

openai.api_key = "EMPTY"
openai.api_base = "http://localhost:8000/v1"


def predict(message, history):
    history_openai_format = [{"role": "system", "content": "你是一个靠谱的 AI 助手,尽量详细的解答用户的提问。"}]
    for human, assistant in history:
        history_openai_format.append({"role": "user", "content": human})
        history_openai_format.append({"role": "assistant", "content": assistant})
    history_openai_format.append({"role": "user", "content": message})

    stream = openai.ChatCompletion.create(
        model='01ai/Yi-34B-Chat-4bits',
        messages=history_openai_format,
        temperature=0,
        stop_token_ids=[7],
        # stream=True,
    )

    yield stream['choices'][0]['message']['content']

# 流式输出
def predict_s(message, history):
    history_openai_format = [{"role": "system", "content": "你是一个靠谱的 AI 助手,尽量详细的解答用户的提问。"}]
    for human, assistant in history:
        history_openai_format.append({"role": "user", "content": human})
        history_openai_format.append({"role": "assistant", "content": assistant})
    history_openai_format.append({"role": "user", "content": message})

    stream = openai.ChatCompletion.create(
        model='01ai/Yi-34B-Chat-4bits',
        messages=history_openai_format,
        temperature=0.8,
        stop_token_ids=[7],
        stream=True,
    )

    partial_message = ""
    for chunk in stream:
        try:
            partial_message += chunk['choices'][0]['delta']['content']
        except:
            partial_message += ""

        yield partial_message


if __name__ == '__main__':
    gr.ChatInterface(predict).queue().launch()

References

最后编辑于:2023 年 12 月 31 日 05:26
邮箱格式错误
网址请用http://或https://开头