def build_batch():
d_emb = 128
n_emb = 100
np.random.seed(42)
embeddings = np.random.randn(n_emb+1, d_emb)
query = Query.from_minimal(embedding=embeddings[-1])
results = [Item.from_minimal(id=i, embedding=embeddings[i]) for i in range(n_emb)]
query.add_query_results(results)
batch = Batch(queries=[query])
expected_scores = [np.linalg.norm(i.embedding) for i in results]
return batch, expected_scores
class NormScore():
def __init__(self, test_fails, test_nulls):
self.test_fails = test_fails
self.test_nulls = test_nulls
def __call__(self, inputs: List[Item]) -> List[ScoreResponse]:
embeddings = np.array([i.embedding for i in inputs])
norms = np.linalg.norm(embeddings, axis=-1)
if self.test_fails:
results = [ScoreResponse(valid=False, score=i, data={'norm':i}) for i in norms]
elif self.test_nulls:
results = [ScoreResponse(valid=True, score=None if np.random.uniform()<0.5 else i,
data={'norm':i}) for i in norms]
else:
results = [ScoreResponse(valid=True, score=i, data={'norm':i}) for i in norms]
return results
batch, scores = build_batch()
score_func = NormScore(False, False)
score_module = ScoreModule(score_func)
batch = score_module(batch)
assert np.allclose([i.score for i in batch[0]], scores)
batch, scores = build_batch()
score_func = NormScore(True, False)
score_module = ScoreModule(score_func)
batch = score_module(batch)
assert batch[0].internal.removed
batch, scores = build_batch()
score_func = NormScore(False, True)
score_module = ScoreModule(score_func)
batch = score_module(batch)
for _, result in batch.enumerate_query_results(skip_removed=False):
if result.score is None:
assert result.internal.removed
else:
assert not result.internal.removedScore
The Score step assigns a numeric score to each Item result. This score is used to drive the hill climbing algorithm. The score step is formalized by the ScoreFunction schema, which maps inputs List[Item] to outputs List[ScoreResponse].
The ScoreModule manages execution of a ScoreFunction. The ScoreModule gathers valid items, sends them to the ScoreFunction, and processes the results.
ScoreModule
ScoreModule (function:Callable[[List[emb_opt.schemas.Item]],List[emb_opt. schemas.ScoreResponse]])
Module - module base class
Given an input Batch, the Module: 1. gathers inputs to the function 2. executes the function 3. validates the results of the function with output_schema 4. scatters results back into the Batch
| Type | Details | |
|---|---|---|
| function | typing.Callable[[typing.List[emb_opt.schemas.Item]], typing.List[emb_opt.schemas.ScoreResponse]] | score function |
ScorePlugin
ScorePlugin ()
ScorePlugin - documentation for plugin functions to ScoreFunction
A valid ScoreFunction is any function that maps List[Item] to List[ScoreResponse]. The inputs will be given as Item objects. The outputs can be either a list of ScoreResponse objects or a list of valid json dictionaries that match the ScoreResponse schema.
Item schema:
{ 'id' : Optional[Union[str, int]] 'item' : Optional[Any], 'embedding' : List[float], 'score' : None, # will be None at this stage 'data' : Optional[Dict], }
Input schema:
List[Item]
ScoreResponse schema:
{ 'valid' : bool, 'score' : Optional[float], # can be None if valid=False 'data' : Optional[Dict], }
Output schema:
List[ScoreResponse]
The CompositeScorePlugin can be used to chain together a list of valid ScoreFunction
CompositeScorePlugin
CompositeScorePlugin (functions:List[Callable[[List[emb_opt.schemas.Item] ],List[emb_opt.schemas.ScoreResponse]]])
Initialize self. See help(type(self)) for accurate signature.
| Type | Details | |
|---|---|---|
| functions | typing.List[typing.Callable[[typing.List[emb_opt.schemas.Item]], typing.List[emb_opt.schemas.ScoreResponse]]] | list of score functions |
d_emb = 128
n_emb = 100
np.random.seed(42)
embeddings = np.random.randn(n_emb+1, d_emb)
query = Query.from_minimal(embedding=embeddings[-1])
results = [Item.from_minimal(id=i, embedding=embeddings[i]) for i in range(n_emb)]
query.add_query_results(results)
batch = Batch(queries=[query])
def norm_score(inputs: List[Item]) -> List[ScoreResponse]:
embeddings = np.array([i.embedding for i in inputs])
norms = np.linalg.norm(embeddings, axis=-1)
return [ScoreResponse(valid=True, score=i, data={'norm':i}) for i in norms]
def sum_score(inputs: List[Item]) -> List[ScoreResponse]:
embeddings = np.array([i.embedding for i in inputs])
sums = embeddings.sum(-1)
return [ScoreResponse(valid=True, score=i, data={'sum':i}) for i in sums]
score_function = CompositeScorePlugin([norm_score, sum_score])
score_module = ScoreModule(score_function)
batch = score_module(batch)