From 9e3d8b317dbaa9a18a3e0bcc670b36695812d71a Mon Sep 17 00:00:00 2001 From: Eevee Date: Mon, 24 Aug 2009 22:04:55 -0700 Subject: [PATCH] Wildcard lookup! #89 Test suite no longer reloads the entire database. Takes too long. Factored out some magic numbers in lookup(). --- pokedex/lookup.py | 20 +++++++++++++------- pokedex/tests/__init__.py | 3 +-- pokedex/tests/test_lookup.py | 13 +++++++++++++ 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/pokedex/lookup.py b/pokedex/lookup.py index cb6d5f6..35c98e2 100644 --- a/pokedex/lookup.py +++ b/pokedex/lookup.py @@ -20,6 +20,9 @@ from pokedex.roomaji import romanize __all__ = ['open_index', 'lookup'] +INTERMEDIATE_LOOKUP_RESULTS = 25 +MAX_LOOKUP_RESULTS = 10 + # Dictionary of table name => table class. # Need the table name so we can get the class from the table name after we # retrieve something from the index @@ -242,10 +245,13 @@ def lookup(input, valid_types=[], session=None, indices=None, exact_only=False): # provided valid_types = prefixes - # If the input provided is a number, match it as an id. Otherwise, name. - # Term objects do an exact match, so we don't have to worry about a query - # parser tripping on weird characters in the input - if rx_is_number.match(name): + # Do different things depending what the query looks like + # Note: Term objects do an exact match, so we don't have to worry about a + # query parser tripping on weird characters in the input + if '*' in name or '?' in name: + exact_only = True + query = whoosh.query.Wildcard(u'name', name) + elif rx_is_number.match(name): # Don't spell-check numbers! exact_only = True query = whoosh.query.Term(u'row_id', name) @@ -285,14 +291,14 @@ def lookup(input, valid_types=[], session=None, indices=None, exact_only=False): searcher.weighting = LanguageWeighting() # XXX kosher? docs say search() # takes a weighting kw but it # certainly does not - results = searcher.search(query) + results = searcher.search(query, limit=INTERMEDIATE_LOOKUP_RESULTS) # Look for some fuzzy matches if necessary if not exact_only and not results: exact = False results = [] - for suggestion in speller.suggest(name, 25): + for suggestion in speller.suggest(name, INTERMEDIATE_LOOKUP_RESULTS): query = whoosh.query.Term('name', suggestion) results.extend(searcher.search(query)) @@ -319,4 +325,4 @@ def lookup(input, valid_types=[], session=None, indices=None, exact_only=False): # should have more than 10 here and lost a few. The speller returns 25 to # give us some padding, and should avoid that problem. Not a big deal if # we lose the 25th-most-likely match anyway. - return objects[:10] + return objects[:MAX_LOOKUP_RESULTS] diff --git a/pokedex/tests/__init__.py b/pokedex/tests/__init__.py index 29b7a56..7565cc7 100644 --- a/pokedex/tests/__init__.py +++ b/pokedex/tests/__init__.py @@ -7,9 +7,8 @@ from pokedex.db.load import load def setup(): # Reload data just in case session = connect() - load(session, verbose=False, drop_tables=True) open_index(session=session, recreate=True) def teardown(): - print "teardown" + pass diff --git a/pokedex/tests/test_lookup.py b/pokedex/tests/test_lookup.py index b6bef60..703cde9 100644 --- a/pokedex/tests/test_lookup.py +++ b/pokedex/tests/test_lookup.py @@ -89,3 +89,16 @@ def test_fuzzy_lookup(): top_names = [_.object.name for _ in results[0:2]] assert_true(u'Nidoran♂' in top_names, u'Nidoran♂ is a top result for "Nidoran"') assert_true(u'Nidoran♀' in top_names, u'Nidoran♀ is a top result for "Nidoran"') + +def test_wildcard_lookup(): + tests = [ + (u'pokemon:*meleon', u'Charmeleon'), + (u'item:master*', u'Master Ball'), + (u'ee?ee', u'Eevee'), + ] + + for wildcard, name in tests: + results = pokedex.lookup.lookup(wildcard) + first_result = results[0] + assert_equal(first_result.object.name, name, + u'Wildcards work correctly')