First Custom Model
AlexNet Inference
More Information about
AlexNet
service can be found 🔗link
- Implement Custom Model using KServe API
1import argparse
2import base64
3import io
4import time
5
6from fastapi.middleware.cors import CORSMiddleware
7from torchvision import models, transforms
8from typing import Dict
9import torch
10from PIL import Image
11
12import kserve
13from kserve import Model, ModelServer, logging
14from kserve.model_server import app
15from kserve.utils.utils import generate_uuid
16
17
18class AlexNetModel(Model):
19 def __init__(self, name: str):
20 super().__init__(name, return_response_headers=True)
21 self.name = name
22 self.load()
23 self.ready = False
24
25 def load(self):
26 self.model = models.alexnet(pretrained=True)
27 self.model.eval()
28 # The ready flag is used by model ready endpoint for readiness probes,
29 # set to True when model is loaded successfully without exceptions.
30 self.ready = True
31
32 async def predict(
33 self,
34 payload: Dict,
35 headers: Dict[str, str] = None,
36 response_headers: Dict[str, str] = None,
37 ) -> Dict:
38 start = time.time()
39 # Input follows the Tensorflow V1 HTTP API for binary values
40 # https://www.tensorflow.org/tfx/serving/api_rest#encoding_binary_values
41 img_data = payload["instances"][0]["image"]["b64"]
42 raw_img_data = base64.b64decode(img_data)
43 input_image = Image.open(io.BytesIO(raw_img_data))
44 preprocess = transforms.Compose([
45 transforms.Resize(256),
46 transforms.CenterCrop(224),
47 transforms.ToTensor(),
48 transforms.Normalize(mean=[0.485, 0.456, 0.406],
49 std=[0.229, 0.224, 0.225]),
50 ])
51 input_tensor = preprocess(input_image).unsqueeze(0)
52 output = self.model(input_tensor)
53 torch.nn.functional.softmax(output, dim=1)
54 values, top_5 = torch.topk(output, 5)
55 result = values.flatten().tolist()
56 end = time.time()
57 response_id = generate_uuid()
58
59 # Custom response headers can be added to the inference response
60 if response_headers is not None:
61 response_headers.update(
62 {"prediction-time-latency": f"{round((end - start) * 1000, 9)}"}
63 )
64
65 return {"predictions": result}
66
67
68parser = argparse.ArgumentParser(parents=[kserve.model_server.parser])
69args, _ = parser.parse_known_args()
70
71if __name__ == "__main__":
72 # Configure kserve and uvicorn logger
73 if args.configure_logging:
74 logging.configure_logging(args.log_config_file)
75 model = AlexNetModel(args.model_name)
76 model.load()
77 # Custom middlewares can be added to the model
78 app.add_middleware(
79 CORSMiddleware,
80 allow_origins=["*"],
81 allow_credentials=True,
82 allow_methods=["*"],
83 allow_headers=["*"],
84 )
85 ModelServer().start([model])
- create
requirements.txt
kserve
torchvision==0.18.0
pillow>=10.3.0,<11.0.0
- create
Dockerfile
FROM m.daocloud.io/docker.io/library/python:3.11-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY model.py .
CMD ["python", "model.py", "--model_name=custom-model"]
- build and push custom docker image
docker build -t ay-custom-model .
docker tag ddfd0186813e docker-registry.lab.zverse.space/ay/ay-custom-model:latest
docker push docker-registry.lab.zverse.space/ay/ay-custom-model:latest
- create a namespace
kubectl create namespace kserve-test
- deploy a sample
custom-model
service
kubectl apply -n kserve-test -f - <<EOF
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: ay-custom-model
spec:
predictor:
containers:
- name: kserve-container
image: docker-registry.lab.zverse.space/ay/ay-custom-model:latest
EOF
- Check
InferenceService
status
kubectl -n kserve-test get inferenceservices ay-custom-model
After all pods are ready, you can access the service by using the following command
Access By
If the EXTERNAL-IP value is set, your environment has an external load balancer that you can use for the ingress gateway.
export INGRESS_HOST=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
export INGRESS_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}')
If the EXTERNAL-IP value is none (or perpetually pending), your environment does not provide an external load balancer for the ingress gateway. In this case, you can access the gateway using the service’s node port.
export INGRESS_HOST=$(minikube ip)
export INGRESS_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}')
export INGRESS_HOST=$(minikube ip)
kubectl port-forward --namespace istio-system svc/istio-ingressgateway 30080:80
export INGRESS_PORT=30080
- Perform a prediction
First, prepare your inference input request inside a file:
wget -O ./alex-net-input.json https://kserve.github.io/website/0.15/modelserving/v1beta1/custom/custom_model/input.json
- Invoke the service
export SERVICE_HOSTNAME=$(kubectl -n kserve-test get inferenceservice ay-custom-model -o jsonpath='{.status.url}' | cut -d "/" -f 3)
# http://ay-custom-model.kserve-test.example.com
curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" -X POST "http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/custom-model:predict" -d @.//alex-net-input.json