Source code for neurio.devices.virtual.tensorflow.tflite

#!/user/bin/env python

"""
Author: Simon Narduzzi
Email: simon.narduzzi@csem.ch
Copyright: CSEM, 2022
Creation: 06.02.23
Description: Evaluator for the TFLite models
"""

# Imports
import json
import multiprocessing
import time
import numpy as np
import tensorflow as tf
from tqdm import tqdm
from neurio.helpers import NpEncoder
from neurio.devices.device import Device
from neurio.common import Prediction
from neurio.converters.tflite_utils import keras_to_tflite
import os
import platform

import psutil


# supported model type
supported_types = [
    'tflite',
    "h5"
]


[docs]class TFLiteVirtual(Device): # Benchmark function def __init__(self, port: any, name: str = "", log_dir: str = None, options: dict = {'is_quantized': True}): super().__init__(port, name, log_dir) self.is_quantized = options['is_quantized'] self.model = None self.is_ready_for_inference = False raise Exception("TFLiteVirtual is not supported - old implemetation")
[docs] def is_alive(self): if self.interpreter is None: return False else: return True
[docs] def create_log_dirs(self): pass
[docs] def prepare_for_inference(self, model: tf.keras.Model, options: dict = {}): self.model_datetime = time.strftime("%a %b %d %H:%M:%S %Y") self.model = model if isinstance(model, tf.keras.Model): self.model_name = os.path.join(self.log_dir, model.name) model.save(self.model_name, save_format="h5") elif isinstance(model, str): self.model_name = model if not self.is_ready_for_inference: self.tflite_model = keras_to_tflite(model, self.model_name.replace(".h5", ".tflite")) self.compile_datetime = time.strftime("%a %b %d %H:%M:%S %Y") self.interpreter = tf.lite.Interpreter(model_path=self.tflite_model) self.interpreter.allocate_tensors() # Get input and output tensors. self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.input_shape = self.input_details[0]['shape'] self.is_ready_for_inference = True else: print("Already ready for inference")
[docs] def save_data(self, data: any, location=None): raise NotImplementedError()
[docs] def detach(self): if self.is_ready_for_inference: self.tflite_model = None self.interpreter = None self.input_details = None self.output_details = None self.input_shape = None self.is_ready_for_inference = False else: print("Already detached")
[docs] def predict(self, data: any, batch_size: int = 32, verbose: bool = True) -> Prediction: """ Predicts the data :param data: input data to predict :param batch_size: batch size for the prediction :param return_stats: return statistics of the prediction :param verbose: whether to print the progress bar :return: """ input_dataset = data try: # load the dataset if input_dataset is None: # Test the model on random input data. input_data = np.array(np.random.random_sample(self.input_shape), dtype=np.float32) inputs = np.repeat(input_data, 100, axis=0) else: if self.is_quantized: inputs = np.load(input_dataset).astype(np.uint8) else: inputs = np.load(input_dataset).astype(np.float32) # loop over input data if batch_size!=1: print("Only batch size of 1 is supported. Switching to batch size of 1.") batch_size = 1 nb = batch_size all_outputs = [] times = [] for i in tqdm(range(0, len(inputs), nb)): input_data = inputs[i:i + nb] self.interpreter.set_tensor(self.input_details[0]['index'], input_data) # Perform the inference start = time.time() self.interpreter.invoke() end = time.time() times.append(end - start) # The function `get_tensor()` returns a copy of the tensor data. # Use `tensor()` in order to get a pointer to the tensor. output_data = self.interpreter.get_tensor(self.output_details[0]['index']) all_outputs.append(output_data) y_pred = np.asarray(all_outputs) ######################################################################################### # # Post process results # ######################################################################################### # Retrive metrics c_durations = np.array(times) mean_latency = c_durations.mean() std_latency = c_durations.std() profiler = {} profiler['latency'] = mean_latency profiler['std_latency'] = std_latency # Add other metrics profiler['model_name'] = self.model_name.split('.')[0] profiler['model_type'] = self.model_name.split('.')[1] profiler['device_name'] = self.name profiler['whole_dataset'] = True profiler['batch_size'] = nb # add details about input, clean and add to profiler [self.input_details[i].pop("dtype") for i in range(len(self.input_details))] [self.output_details[i].pop("dtype") for i in range(len(self.output_details))] profiler['inputs'] = [self.input_details[i] for i in range(len(self.input_details))] profiler['outputs'] = [self.output_details[i] for i in range(len(self.output_details))] profiler['model_datetime'] = self.model_datetime profiler['compile_datetime'] = self.compile_datetime # profiler['weights'] = TODO self.model.count_params() # profiler['activations'] = # TODO # profiler['macc'] = # TODO profiler['size'] = os.path.getsize(self.model_name) profiler["runtime"] = { "name": "CSEM TFlite pipeline", "version": "0.1", "tools_version": { "tensorflow": tf.__version__, "keras": tf.keras.__version__, "numpy": np.__version__ } } uname = platform.uname() profiler["device"] = { "dev_type": uname.system, "plaftorm": platform.platform(), "release": platform.release(), "version": platform.version(), "machine": platform.machine(), "processor": platform.processor(), "sys_clock": int(psutil.cpu_freq().max * 1e6), "attr": { "cpu_count": multiprocessing.cpu_count(), "memory": psutil.virtual_memory().total, "disk": psutil.disk_usage('/').total } } # Save results in output file result_file = os.path.join(self.results_dir, "results.json") with open(result_file, 'w') as f: json.dump(profiler, f, cls=NpEncoder) print("\n\n Test completed ! Test report available at : " + str(result_file)) return Prediction(y_pred, profiler) # If an exception occured, remove DOcker container and raise the exception again. except Exception as e: print(e) raise e