【代码模板】从huggingface加载tokenizer和模型,进行推理
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
tokenizer = AutoTokenizer.from_pretrained("PowerInfer/SmallThinker-3B-Preview", use_fast=True, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("PowerInfer/SmallThinker-3B-Preview", torch_dtype="auto", trust_remote_code=True)
qa_pipeline = pipeline("text-generation", model=model,tokenizer=tokenizer,device=0
)
question = "什么是激活稀疏性(activation sparsity)"
prompt = f"问:{question}\n答:"
result = qa_pipeline(prompt,max_new_tokens=256,do_sample=True, eos_token_id=tokenizer.eos_token_id
)
print(result[0]["generated_text"])