# Sharing Python File , Please Convert it into a Notebook !pip install transformers einops accelerate bitsandbytes from transformers import pipeline import torch import base64 checkpoint = "MBZUAI/LaMini-T5-738M" tokenizer = AutoTokenizer.from_pretrained(checkpoint) base_model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) !pip install langchain langchain-community langchain-huggingface from langchain_huggingface import HuggingFacePipeline def slm_pipeline(): pipe = pipeline( "text2text-generation", model = base_model, tokenizer = tokenizer,w3rrf3 max_length = 256, do_sample = True, temperature = 0.3, top_p = 0.95 ) local_slm = HuggingFacePipeline(pipeline = pipe) return local_slm input_prompt = "Write an article about Blockchain and its benefits" model = slm_pipeline() gen_text = model.invoke(input_prompt) gen_text import json import sagemaker import boto3 from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri try: role = sagemaker.get_execution_role() except ValueError: iam = boto3.client('iam') role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn'] # Hub Model configuration. https://huggingface.co/models hub = { 'HF_MODEL_ID':'MBZUAI/LaMini-T5-738M', 'HF_TASK' : 'text2text-generation', 'device_map' : 'auto', 'torch_dtype' : 'torch.float32' } # create Hugging Face Model Class huggingface_model = HuggingFaceModel( image_uri=get_huggingface_llm_image_uri("huggingface",version="3.2.3"), env=hub, role=role, ) # deploy model to SageMaker Inference predictor = huggingface_model.deploy( initial_instance_count=1, instance_type="ml.g5.xlarge", container_startup_health_check_timeout=300, ) # send request predictor.predict({ "inputs": "Write an article about Cyber Security", }) ENDOINT = "huggingface-pytorch-tgi-inference-XXXXXXX" import boto3 sagemaker_runtime = boto3.client("sagemaker-runtime", region_name='us-east-1') endpoint_name = ENDPOINT # API Payload prompt = "Write an article on Deep learning" payload = { 'inputs' : prompt , 'parameters' : { 'max_new_tokens' : 256, 'do_sample' : True, 'temperature' : 0.3, 'top_p' : 0.7, w,,'elsrd ,bb.gv,3rrf 'top_k' : 50, 'repetion_penalty' : 1.03 } } response = sagemaker_runtime.invoke_endpoint( EndpointName = endpoint_name, ContentType = "application/json", Body = json.dumps(payload) ) predictions = json.loads(response['Body'].read().decode('utf-8')) final_result =predictions[0]['generated_text']