Skip to content

Commit

Permalink
Fix a bug that causes embedding regression for indexes with normalise…
Browse files Browse the repository at this point in the history
…_embeddings set to False
  • Loading branch information
papa99do committed Oct 9, 2024
1 parent 9d2452f commit cee2d13
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 32 deletions.
2 changes: 1 addition & 1 deletion src/marqo/s2_inference/multimodal_model_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def __init__(self, model):
self.model = model

def encode(self, content, modality, **kwargs):
return self.model.encode(content)
return self.model.encode(content, **kwargs)


@contextmanager
Expand Down
44 changes: 13 additions & 31 deletions tests/s2_inference/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

_load_model = functools.partial(og_load_model, calling_func = "unit_test")


class TestEncoding(unittest.TestCase):

def setUp(self) -> None:
Expand Down Expand Up @@ -48,13 +49,15 @@ def test_vectorize(self):
model = _load_model(model_properties['name'], model_properties=model_properties, device=device)

for sentence in sentences:
output_v = vectorise(name, sentence, model_properties, device, normalize_embeddings=True)
for normalize_embeddings in [True, False]:
output_v = vectorise(name, sentence, model_properties, device,
normalize_embeddings=normalize_embeddings)

assert _check_output_type(output_v)
assert _check_output_type(output_v)

output_m = model.encode(sentence, normalize=True)
output_m = model.encode(sentence, normalize=normalize_embeddings)

assert abs(torch.FloatTensor(output_m) - torch.FloatTensor(output_v)).sum() < eps
assert abs(torch.FloatTensor(output_m) - torch.FloatTensor(output_v)).sum() < eps

clear_loaded_models()

Expand Down Expand Up @@ -252,29 +255,6 @@ def test_model_un_normalization(self):

clear_loaded_models()

def test_onnx_clip_vectorise(self):
names = ["onnx16/open_clip/ViT-B-32/laion400m_e32", 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e32']

sentences = ['hello', 'this is a test sentence. so is this.',
['hello', 'this is a test sentence. so is this.']]
device = 'cpu'
eps = 1e-9

for name in names:
model_properties = get_model_properties_from_registry(name)
model = _load_model(model_properties['name'], model_properties=model_properties, device=device)

for sentence in sentences:
output_v = vectorise(name, sentence, model_properties, device, normalize_embeddings=True)

assert _check_output_type(output_v)

output_m = model.encode(sentence, normalize=True)

assert abs(torch.FloatTensor(output_m) - torch.FloatTensor(output_v)).sum() < eps

clear_loaded_models()


class TestOpenClipModelEncoding(unittest.TestCase):
'''
Expand Down Expand Up @@ -307,13 +287,15 @@ def test_open_clip_vectorize(self):
model = _load_model(model_properties['name'], model_properties=model_properties, device=device)

for sentence in sentences:
output_v = vectorise(name, sentence, model_properties, device, normalize_embeddings=True)
for normalize_embeddings in [True, False]:
output_v = vectorise(name, sentence, model_properties, device,
normalize_embeddings=normalize_embeddings)

assert _check_output_type(output_v)
assert _check_output_type(output_v)

output_m = model.encode(sentence, normalize=True)
output_m = model.encode(sentence, normalize=normalize_embeddings)

assert abs(torch.FloatTensor(output_m) - torch.FloatTensor(output_v)).sum() < eps
assert abs(torch.FloatTensor(output_m) - torch.FloatTensor(output_v)).sum() < eps

clear_loaded_models()

Expand Down

0 comments on commit cee2d13

Please sign in to comment.