Source code for search.core

"""
Search API core module

Contains the main functions to perform a search.
"""
from search import utils, config
from search.matchers import lazy_match as matcher


[docs]class SearchEngine: """ Creates callable objects to perform lazy search in iterables of objects. The engine is customizable upon creation, and when the function is called. Search can be performed calling directly the object >>> search = SearchEngine(['attr_name'], limit=10) >>> result = search('john doe', [people]) or through the search method >>> search_engine = SearchEngine(['attr_name'], limit=10) >>> result = search_engine.search('john doe') For actual documentation on the search functionality and parameters refer to the :any:`SearchEngine.search` method documentation. """ def __init__(self, attributes, limit=-1, threshold=config.THRESHOLD, weights=None): self.attributes = attributes self.limit = limit self.threshold = threshold self.weights = weights if not self.weights or len(attributes) != len(weights): self.weights = utils.generate_weights(attributes) def __call__(self, query, dataset, attributes=None, limit=None, threshold=None, weights=None): return self.search( query=query, dataset=dataset, attributes=attributes, limit=limit, threshold=threshold, weights=weights, )
[docs] def search( self, query, dataset, attributes=None, limit=None, threshold=None, weights=None): """ Main function of the package, allows to do a fuzzy full-text search on the rows of the given `table` model, looking up the value on the given `attributes` list against the passed `query` string. Extra arguments allows customization on the search results (see below) .. note:: Since this function implements the core functionality, it has the shortcut import >>> from search.core import search >>> from search import search Will have the same effect Arguments: query (str): String to search for attributes (list): The names of thetable columns to search into. dataset (iterable): iterable of `objects` to lookup. All objects in the dataset **must** have the specified attribute(s) limit (int): max number of results to return. if ``-1`` will return everything. threshold (float): paragon for validating match results. weights (list): matching `attributes` argument, describes the attributes weights. if not provided **or** if different length the weight will generated automatically, considering the index of the attribute name, reversed. Returns: list: A list containing ``[0:limit]`` resources from the given dataset, sorted by relevance. Raises: AttributeError: if one of the object does not have one of the given attribute(s). Example: Assuming a random number of items in the Item table, that defines `name`, `category`, `description`, `availability`, one can do: >>> from models import Item >>> from search import search >>> results = search('aweso', ['name', 'category'], Item.select()) [ <Item name: 'awesome item' cat: 'generic'>, <Item name: 'normal item', cat: 'awesome'>, ] Note that even though the category is a perfect match, it's ranked lower priority, so it comes after. """ attributes = attributes or self.attributes weights = weights or self.weights limit = limit or self.limit threshold = threshold or self.threshold matches = [] if not weights or len(weights) != len(attributes): # list of integers of the same length of `attributes` as in # [3, 2, 1] for attributes = ['a', 'b', 'c'] weights = list(range(len(attributes), 0, -1)) weights = utils.scale_to_one(weights) weights = {attr: w for attr, w in zip(attributes, weights)} for obj in dataset: partial_matches = [] for attr in attributes: attrval = getattr(obj, attr) match = matcher(query, attrval) partial_matches.append({'attr': attr, 'match': match}) # get the highest match for each attribute and multiply it by the # attribute weight, so we can get the weighted average to return match = max(partial_matches, key=lambda m: m['match']) # match = match['match'] # * weights[match['attr']] match, attr_weight = match['match'], weights[match['attr']] rating = match + attr_weight if match >= threshold: result_data = {'data': obj, 'match': match, 'rating': rating} matches.append(result_data) matches.sort(key=lambda m: m['rating'], reverse=True) if limit > 0: matches = matches[:limit] return [m['data'] for m in matches]