diff --git a/server/clip_server/executors/clip_onnx.py b/server/clip_server/executors/clip_onnx.py index 14bfe8a7f..6afaef9c0 100644 --- a/server/clip_server/executors/clip_onnx.py +++ b/server/clip_server/executors/clip_onnx.py @@ -78,16 +78,24 @@ def __init__( self._model.start_sessions(sess_options=sess_options, providers=providers) - @monitor() + @monitor(name='preprocess_images_seconds') def _preproc_images(self, docs: 'DocumentArray'): return preproc_image( docs, preprocess_fn=self._preprocess_tensor, return_np=True ) - @monitor() + @monitor(name='preprocess_texts_seconds') def _preproc_texts(self, docs: 'DocumentArray'): return preproc_text(docs, return_np=True) + @monitor(name='encode_images_seconds') + def _encode_images(self, docs: 'DocumentArray'): + docs.embeddings = self._model.encode_image(docs.tensors) + + @monitor(name='encode_texts_seconds') + def _encode_texts(self, docs: 'DocumentArray'): + docs.embeddings = self._model.encode_text(docs.tensors) + @requests(on='/rank') async def rank(self, docs: 'DocumentArray', parameters: Dict, **kwargs): await self.encode(docs['@r,m']) @@ -108,7 +116,8 @@ async def encode(self, docs: 'DocumentArray', **kwargs): batch_size=self._minibatch_size, pool=self._pool, ): - minibatch.embeddings = self._model.encode_image(minibatch.tensors) + self._encode_images(minibatch) + # recover original content try: _ = iter(_contents) @@ -124,7 +133,7 @@ async def encode(self, docs: 'DocumentArray', **kwargs): batch_size=self._minibatch_size, pool=self._pool, ): - minibatch.embeddings = self._model.encode_text(minibatch.tensors) + self._encode_texts(minibatch) # recover original content try: _ = iter(_contents) diff --git a/server/clip_server/executors/clip_tensorrt.py b/server/clip_server/executors/clip_tensorrt.py index e1099116e..a05df304a 100644 --- a/server/clip_server/executors/clip_tensorrt.py +++ b/server/clip_server/executors/clip_tensorrt.py @@ -46,7 +46,7 @@ def __init__( self._model.start_engines() - @monitor() + @monitor(name='preprocess_images_seconds') def _preproc_images(self, docs: 'DocumentArray'): return preproc_image( docs, @@ -55,10 +55,30 @@ def _preproc_images(self, docs: 'DocumentArray'): return_np=False, ) - @monitor() + @monitor(name='preprocess_texts_seconds') def _preproc_texts(self, docs: 'DocumentArray'): return preproc_text(docs, device=self._device, return_np=False) + @monitor(name='encode_images_seconds') + def _encode_images(self, docs: 'DocumentArray'): + docs.embeddings = ( + self._model.encode_image(docs.tensors) + .detach() + .cpu() + .numpy() + .astype(np.float32) + ) + + @monitor(name='encode_texts_seconds') + def _encode_texts(self, docs: 'DocumentArray'): + docs.embeddings = ( + self._model.encode_text(docs.tensors) + .detach() + .cpu() + .numpy() + .astype(np.float32) + ) + @requests(on='/rank') async def rank(self, docs: 'DocumentArray', parameters: Dict, **kwargs): await self.encode(docs['@r,m']) @@ -79,13 +99,8 @@ async def encode(self, docs: 'DocumentArray', **kwargs): batch_size=self._minibatch_size, pool=self._pool, ): - minibatch.embeddings = ( - self._model.encode_image(minibatch.tensors) - .detach() - .cpu() - .numpy() - .astype(np.float32) - ) + self._encode_images(minibatch) + # recover original content try: _ = iter(_contents) @@ -101,13 +116,8 @@ async def encode(self, docs: 'DocumentArray', **kwargs): batch_size=self._minibatch_size, pool=self._pool, ): - minibatch.embeddings = ( - self._model.encode_text(minibatch.tensors) - .detach() - .cpu() - .numpy() - .astype(np.float32) - ) + self._encode_texts(minibatch) + # recover original content try: _ = iter(_contents) diff --git a/server/clip_server/executors/clip_torch.py b/server/clip_server/executors/clip_torch.py index a51eb9ee6..2648efa32 100644 --- a/server/clip_server/executors/clip_torch.py +++ b/server/clip_server/executors/clip_torch.py @@ -59,7 +59,7 @@ def __init__( self._pool = ThreadPool(processes=num_worker_preprocess) - @monitor() + @monitor(name='preprocess_images_seconds') def _preproc_images(self, docs: 'DocumentArray'): return preproc_image( docs, @@ -68,10 +68,22 @@ def _preproc_images(self, docs: 'DocumentArray'): return_np=False, ) - @monitor() + @monitor(name='preprocess_texts_seconds') def _preproc_texts(self, docs: 'DocumentArray'): return preproc_text(docs, device=self._device, return_np=False) + @monitor(name='encode_images_seconds') + def _encode_images(self, docs: 'DocumentArray'): + docs.embeddings = ( + self._model.encode_image(docs.tensors).cpu().numpy().astype(np.float32) + ) + + @monitor(name='encode_texts_seconds') + def _encode_texts(self, docs: 'DocumentArray'): + docs.embeddings = ( + self._model.encode_text(docs.tensors).cpu().numpy().astype(np.float32) + ) + @requests(on='/rank') async def rank(self, docs: 'DocumentArray', parameters: Dict, **kwargs): await self.encode(docs['@r,m']) @@ -93,12 +105,8 @@ async def encode(self, docs: 'DocumentArray', **kwargs): batch_size=self._minibatch_size, pool=self._pool, ): - minibatch.embeddings = ( - self._model.encode_image(minibatch.tensors) - .cpu() - .numpy() - .astype(np.float32) - ) + + self._encode_images(minibatch) # recover original content try: @@ -115,12 +123,7 @@ async def encode(self, docs: 'DocumentArray', **kwargs): batch_size=self._minibatch_size, pool=self._pool, ): - minibatch.embeddings = ( - self._model.encode_text(minibatch.tensors) - .cpu() - .numpy() - .astype(np.float32) - ) + self._encode_texts(minibatch) # recover original content try: diff --git a/server/clip_server/onnx-flow.yml b/server/clip_server/onnx-flow.yml index d4bffef8a..3368c2188 100644 --- a/server/clip_server/onnx-flow.yml +++ b/server/clip_server/onnx-flow.yml @@ -2,8 +2,6 @@ jtype: Flow version: '1' with: port: 51000 - monitoring: True - port_monitoring: 9090 executors: - name: clip_o uses: @@ -11,5 +9,3 @@ executors: metas: py_modules: - executors/clip_onnx.py - monitoring: true - port_monitoring: 9091 diff --git a/server/clip_server/tensorrt-flow.yml b/server/clip_server/tensorrt-flow.yml index f8e2393a6..5b9c0dc27 100644 --- a/server/clip_server/tensorrt-flow.yml +++ b/server/clip_server/tensorrt-flow.yml @@ -2,8 +2,6 @@ jtype: Flow version: '1' with: port: 51000 - monitoring: True - port_monitoring: 9090 executors: - name: clip_r uses: @@ -11,5 +9,3 @@ executors: metas: py_modules: - executors/clip_tensorrt.py - monitoring: true - port_monitoring: 9091 diff --git a/server/clip_server/torch-flow.yml b/server/clip_server/torch-flow.yml index a1dfb5d6e..e7ba01c5a 100644 --- a/server/clip_server/torch-flow.yml +++ b/server/clip_server/torch-flow.yml @@ -2,8 +2,6 @@ jtype: Flow version: '1' with: port: 51000 - monitoring: True - port_monitoring: 9090 executors: - name: clip_t uses: @@ -11,5 +9,3 @@ executors: metas: py_modules: - executors/clip_torch.py - monitoring: true - port_monitoring: 9091