mirror of
https://github.com/veekun/pokedex.git
synced 2024-08-20 18:16:34 +00:00
Speed up import pokedex.db
slightly.
Importing pokedex can take several seconds due to its rather large dependencies—in particular, sqlalchemy, whoosh, and pkg_resources seem to be the largest offenders. Normally, it would be possible to import only the submodules one needs (pokedex.db, say), but pokedex.__init__ brings in all the submodules, for use by the command-line interface. The fix is rather obvious: - Move the command-line stuff into pokedex.main. Note: because the submodules are no longer imported by default, any script which expects `import pokedex` to be useful will likely break. Note: the `pokedex` command will not work until you re-run `python setup.py develop`, to update entry_points.txt. - Don't import pkg_resources until necessary.
This commit is contained in:
parent
629d99885c
commit
e7c40a08af
4 changed files with 290 additions and 289 deletions
|
@ -1,287 +0,0 @@
|
|||
# encoding: utf8
|
||||
from optparse import OptionParser
|
||||
import os
|
||||
import sys
|
||||
|
||||
# XXX importing pokedex.whatever should not import all these
|
||||
import pokedex.db
|
||||
import pokedex.db.load
|
||||
import pokedex.db.tables
|
||||
import pokedex.lookup
|
||||
from pokedex import defaults
|
||||
|
||||
def main():
|
||||
if len(sys.argv) <= 1:
|
||||
command_help()
|
||||
|
||||
command = sys.argv[1]
|
||||
args = sys.argv[2:]
|
||||
|
||||
# XXX there must be a better way to get Unicode argv
|
||||
# XXX this doesn't work on Windows durp
|
||||
enc = sys.stdin.encoding or 'utf8'
|
||||
args = [_.decode(enc) for _ in args]
|
||||
|
||||
# Find the command as a function in this file
|
||||
func = globals().get("command_%s" % command, None)
|
||||
if func:
|
||||
func(*args)
|
||||
else:
|
||||
command_help()
|
||||
|
||||
|
||||
def get_parser(verbose=True):
|
||||
"""Returns an OptionParser prepopulated with the global options.
|
||||
|
||||
`verbose` is whether or not the options should be verbose by default.
|
||||
"""
|
||||
parser = OptionParser()
|
||||
parser.add_option('-e', '--engine', dest='engine_uri', default=None)
|
||||
parser.add_option('-i', '--index', dest='index_dir', default=None)
|
||||
parser.add_option('-q', '--quiet', dest='verbose', default=verbose, action='store_false')
|
||||
parser.add_option('-v', '--verbose', dest='verbose', default=verbose, action='store_true')
|
||||
return parser
|
||||
|
||||
def get_session(options):
|
||||
"""Given a parsed options object, connects to the database and returns a
|
||||
session.
|
||||
"""
|
||||
|
||||
engine_uri = options.engine_uri
|
||||
got_from = 'command line'
|
||||
|
||||
if engine_uri is None:
|
||||
engine_uri, got_from = defaults.get_default_db_uri_with_origin()
|
||||
|
||||
session = pokedex.db.connect(engine_uri)
|
||||
|
||||
if options.verbose:
|
||||
print "Connected to database %(engine)s (from %(got_from)s)" \
|
||||
% dict(engine=session.bind.url, got_from=got_from)
|
||||
|
||||
return session
|
||||
|
||||
def get_lookup(options, session=None, recreate=False):
|
||||
"""Given a parsed options object, opens the whoosh index and returns a
|
||||
PokedexLookup object.
|
||||
"""
|
||||
|
||||
if recreate and not session:
|
||||
raise ValueError("get_lookup() needs an explicit session to regen the index")
|
||||
|
||||
index_dir = options.index_dir
|
||||
got_from = 'command line'
|
||||
|
||||
if index_dir is None:
|
||||
index_dir, got_from = defaults.get_default_index_dir_with_origin()
|
||||
|
||||
if options.verbose:
|
||||
print "Opened lookup index %(index_dir)s (from %(got_from)s)" \
|
||||
% dict(index_dir=index_dir, got_from=got_from)
|
||||
|
||||
lookup = pokedex.lookup.PokedexLookup(index_dir, session=session)
|
||||
|
||||
if recreate:
|
||||
lookup.rebuild_index()
|
||||
|
||||
return lookup
|
||||
|
||||
def get_csv_directory(options):
|
||||
"""Prints and returns the csv directory we're about to use."""
|
||||
|
||||
if not options.verbose:
|
||||
return
|
||||
|
||||
csvdir = options.directory
|
||||
got_from = 'command line'
|
||||
|
||||
if csvdir is None:
|
||||
csvdir, got_from = defaults.get_default_csv_dir_with_origin()
|
||||
|
||||
print "Using CSV directory %(csvdir)s (from %(got_from)s)" \
|
||||
% dict(csvdir=csvdir, got_from=got_from)
|
||||
|
||||
return csvdir
|
||||
|
||||
|
||||
### Plumbing commands
|
||||
|
||||
def command_dump(*args):
|
||||
parser = get_parser(verbose=True)
|
||||
parser.add_option('-d', '--directory', dest='directory', default=None)
|
||||
options, tables = parser.parse_args(list(args))
|
||||
|
||||
session = get_session(options)
|
||||
get_csv_directory(options)
|
||||
|
||||
pokedex.db.load.dump(session, directory=options.directory,
|
||||
tables=tables,
|
||||
verbose=options.verbose)
|
||||
|
||||
def command_load(*args):
|
||||
parser = get_parser(verbose=True)
|
||||
parser.add_option('-d', '--directory', dest='directory', default=None)
|
||||
parser.add_option('-D', '--drop-tables', dest='drop_tables', default=False, action='store_true')
|
||||
parser.add_option('-S', '--safe', dest='safe', default=False, action='store_true',
|
||||
help="Do not use backend-specific optimalizations.")
|
||||
options, tables = parser.parse_args(list(args))
|
||||
|
||||
if not options.engine_uri:
|
||||
print "WARNING: You're reloading the default database, but not the lookup index. They"
|
||||
print " might get out of sync, and pokedex commands may not work correctly!"
|
||||
print "To fix this, run `pokedex reindex` when this command finishes. Or, just use"
|
||||
print "`pokedex setup` to do both at once."
|
||||
print
|
||||
|
||||
session = get_session(options)
|
||||
get_csv_directory(options)
|
||||
|
||||
pokedex.db.load.load(session, directory=options.directory,
|
||||
drop_tables=options.drop_tables,
|
||||
tables=tables,
|
||||
verbose=options.verbose,
|
||||
safe=options.safe)
|
||||
|
||||
def command_reindex(*args):
|
||||
parser = get_parser(verbose=True)
|
||||
options, _ = parser.parse_args(list(args))
|
||||
|
||||
session = get_session(options)
|
||||
lookup = get_lookup(options, session=session, recreate=True)
|
||||
|
||||
print "Recreated lookup index."
|
||||
|
||||
|
||||
def command_setup(*args):
|
||||
parser = get_parser(verbose=False)
|
||||
options, _ = parser.parse_args(list(args))
|
||||
|
||||
options.directory = None
|
||||
|
||||
session = get_session(options)
|
||||
get_csv_directory(options)
|
||||
pokedex.db.load.load(session, directory=None, drop_tables=True,
|
||||
verbose=options.verbose,
|
||||
safe=False)
|
||||
|
||||
lookup = get_lookup(options, session=session, recreate=True)
|
||||
|
||||
print "Recreated lookup index."
|
||||
|
||||
|
||||
def command_status(*args):
|
||||
parser = get_parser(verbose=True)
|
||||
options, _ = parser.parse_args(list(args))
|
||||
options.verbose = True
|
||||
options.directory = None
|
||||
|
||||
# Database, and a lame check for whether it's been inited at least once
|
||||
session = get_session(options)
|
||||
print " - OK! Connected successfully."
|
||||
|
||||
if pokedex.db.tables.Pokemon.__table__.exists(session.bind):
|
||||
print " - OK! Database seems to contain some data."
|
||||
else:
|
||||
print " - WARNING: Database appears to be empty."
|
||||
|
||||
# CSV; simple checks that the dir exists
|
||||
csvdir = get_csv_directory(options)
|
||||
if not os.path.exists(csvdir):
|
||||
print " - ERROR: No such directory!"
|
||||
elif not os.path.isdir(csvdir):
|
||||
print " - ERROR: Not a directory!"
|
||||
else:
|
||||
print " - OK! Directory exists."
|
||||
|
||||
if os.access(csvdir, os.R_OK):
|
||||
print " - OK! Can read from directory."
|
||||
else:
|
||||
print " - ERROR: Can't read from directory!"
|
||||
|
||||
if os.access(csvdir, os.W_OK):
|
||||
print " - OK! Can write to directory."
|
||||
else:
|
||||
print " - WARNING: Can't write to directory! " \
|
||||
"`dump` will not work. You may need to sudo."
|
||||
|
||||
# Index; the PokedexLookup constructor covers most tests and will
|
||||
# cheerfully bomb if they fail
|
||||
lookup = get_lookup(options, recreate=False)
|
||||
print " - OK! Opened successfully."
|
||||
|
||||
|
||||
### User-facing commands
|
||||
|
||||
def command_lookup(*args):
|
||||
parser = get_parser(verbose=False)
|
||||
options, words = parser.parse_args(list(args))
|
||||
|
||||
name = u' '.join(words)
|
||||
|
||||
session = get_session(options)
|
||||
lookup = get_lookup(options, session=session, recreate=False)
|
||||
|
||||
results = lookup.lookup(name)
|
||||
if not results:
|
||||
print "No matches."
|
||||
elif results[0].exact:
|
||||
print "Matched:"
|
||||
else:
|
||||
print "Fuzzy-matched:"
|
||||
|
||||
for result in results:
|
||||
if hasattr(result.object, 'full_name'):
|
||||
name = result.object.full_name
|
||||
else:
|
||||
name = result.object.name
|
||||
|
||||
print "%s: %s" % (result.object.__tablename__, name),
|
||||
if result.language:
|
||||
print "(%s in %s)" % (result.name, result.language)
|
||||
else:
|
||||
print
|
||||
|
||||
|
||||
def command_help():
|
||||
print u"""pokedex -- a command-line Pokédex interface
|
||||
usage: pokedex {command} [options...]
|
||||
Run `pokedex setup` first, or nothing will work!
|
||||
See http://bugs.veekun.com/projects/pokedex/wiki/CLI for more documentation.
|
||||
|
||||
Commands:
|
||||
help Displays this message.
|
||||
lookup [thing] Look up something in the Pokédex.
|
||||
|
||||
System commands:
|
||||
load Load Pokédex data into a database from CSV files.
|
||||
dump Dump Pokédex data from a database into CSV files.
|
||||
reindex Rebuilds the lookup index from the database.
|
||||
setup Combines load and reindex.
|
||||
status No effect, but prints which engine, index, and csv
|
||||
directory would be used for other commands.
|
||||
|
||||
Global options:
|
||||
-e|--engine=URI By default, all commands try to use a SQLite database
|
||||
in the pokedex install directory. Use this option (or
|
||||
a POKEDEX_DB_ENGINE environment variable) to specify an
|
||||
alternate database.
|
||||
-i|--index=DIR By default, all commands try to put the lookup index in
|
||||
the pokedex install directory. Use this option (or a
|
||||
POKEDEX_INDEX_DIR environment variable) to specify an
|
||||
alternate loction.
|
||||
-q|--quiet Don't print system output. This is the default for
|
||||
non-system commands and setup.
|
||||
-v|--verbose Print system output. This is the default for system
|
||||
commands, except setup.
|
||||
|
||||
System options:
|
||||
-d|--directory=DIR By default, load and dump will use the CSV files in the
|
||||
pokedex install directory. Use this option to specify
|
||||
a different directory.
|
||||
-D|--drop-tables With load, drop all tables before loading data.
|
||||
|
||||
Additionally, load and dump accept a list of table names (possibly with
|
||||
wildcards) and/or csv fileames as an argument list.
|
||||
""".encode(sys.getdefaultencoding(), 'replace')
|
||||
|
||||
sys.exit(0)
|
|
@ -1,13 +1,13 @@
|
|||
""" pokedex.defaults - logic for finding default paths """
|
||||
|
||||
import os
|
||||
import pkg_resources
|
||||
|
||||
def get_default_db_uri_with_origin():
|
||||
uri = os.environ.get('POKEDEX_DB_ENGINE', None)
|
||||
origin = 'environment'
|
||||
|
||||
if uri is None:
|
||||
import pkg_resources
|
||||
sqlite_path = pkg_resources.resource_filename('pokedex',
|
||||
'data/pokedex.sqlite')
|
||||
uri = 'sqlite:///' + sqlite_path
|
||||
|
@ -20,6 +20,7 @@ def get_default_index_dir_with_origin():
|
|||
origin = 'environment'
|
||||
|
||||
if index_dir is None:
|
||||
import pkg_resources
|
||||
index_dir = pkg_resources.resource_filename('pokedex',
|
||||
'data/whoosh-index')
|
||||
origin = 'default'
|
||||
|
@ -27,6 +28,7 @@ def get_default_index_dir_with_origin():
|
|||
return index_dir, origin
|
||||
|
||||
def get_default_csv_dir_with_origin():
|
||||
import pkg_resources
|
||||
csv_dir = pkg_resources.resource_filename('pokedex', 'data/csv')
|
||||
origin = 'default'
|
||||
|
||||
|
|
286
pokedex/main.py
Normal file
286
pokedex/main.py
Normal file
|
@ -0,0 +1,286 @@
|
|||
# encoding: utf8
|
||||
from optparse import OptionParser
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pokedex.db
|
||||
import pokedex.db.load
|
||||
import pokedex.db.tables
|
||||
import pokedex.lookup
|
||||
from pokedex import defaults
|
||||
|
||||
def main():
|
||||
if len(sys.argv) <= 1:
|
||||
command_help()
|
||||
|
||||
command = sys.argv[1]
|
||||
args = sys.argv[2:]
|
||||
|
||||
# XXX there must be a better way to get Unicode argv
|
||||
# XXX this doesn't work on Windows durp
|
||||
enc = sys.stdin.encoding or 'utf8'
|
||||
args = [_.decode(enc) for _ in args]
|
||||
|
||||
# Find the command as a function in this file
|
||||
func = globals().get("command_%s" % command, None)
|
||||
if func:
|
||||
func(*args)
|
||||
else:
|
||||
command_help()
|
||||
|
||||
|
||||
def get_parser(verbose=True):
|
||||
"""Returns an OptionParser prepopulated with the global options.
|
||||
|
||||
`verbose` is whether or not the options should be verbose by default.
|
||||
"""
|
||||
parser = OptionParser()
|
||||
parser.add_option('-e', '--engine', dest='engine_uri', default=None)
|
||||
parser.add_option('-i', '--index', dest='index_dir', default=None)
|
||||
parser.add_option('-q', '--quiet', dest='verbose', default=verbose, action='store_false')
|
||||
parser.add_option('-v', '--verbose', dest='verbose', default=verbose, action='store_true')
|
||||
return parser
|
||||
|
||||
def get_session(options):
|
||||
"""Given a parsed options object, connects to the database and returns a
|
||||
session.
|
||||
"""
|
||||
|
||||
engine_uri = options.engine_uri
|
||||
got_from = 'command line'
|
||||
|
||||
if engine_uri is None:
|
||||
engine_uri, got_from = defaults.get_default_db_uri_with_origin()
|
||||
|
||||
session = pokedex.db.connect(engine_uri)
|
||||
|
||||
if options.verbose:
|
||||
print "Connected to database %(engine)s (from %(got_from)s)" \
|
||||
% dict(engine=session.bind.url, got_from=got_from)
|
||||
|
||||
return session
|
||||
|
||||
def get_lookup(options, session=None, recreate=False):
|
||||
"""Given a parsed options object, opens the whoosh index and returns a
|
||||
PokedexLookup object.
|
||||
"""
|
||||
|
||||
if recreate and not session:
|
||||
raise ValueError("get_lookup() needs an explicit session to regen the index")
|
||||
|
||||
index_dir = options.index_dir
|
||||
got_from = 'command line'
|
||||
|
||||
if index_dir is None:
|
||||
index_dir, got_from = defaults.get_default_index_dir_with_origin()
|
||||
|
||||
if options.verbose:
|
||||
print "Opened lookup index %(index_dir)s (from %(got_from)s)" \
|
||||
% dict(index_dir=index_dir, got_from=got_from)
|
||||
|
||||
lookup = pokedex.lookup.PokedexLookup(index_dir, session=session)
|
||||
|
||||
if recreate:
|
||||
lookup.rebuild_index()
|
||||
|
||||
return lookup
|
||||
|
||||
def get_csv_directory(options):
|
||||
"""Prints and returns the csv directory we're about to use."""
|
||||
|
||||
if not options.verbose:
|
||||
return
|
||||
|
||||
csvdir = options.directory
|
||||
got_from = 'command line'
|
||||
|
||||
if csvdir is None:
|
||||
csvdir, got_from = defaults.get_default_csv_dir_with_origin()
|
||||
|
||||
print "Using CSV directory %(csvdir)s (from %(got_from)s)" \
|
||||
% dict(csvdir=csvdir, got_from=got_from)
|
||||
|
||||
return csvdir
|
||||
|
||||
|
||||
### Plumbing commands
|
||||
|
||||
def command_dump(*args):
|
||||
parser = get_parser(verbose=True)
|
||||
parser.add_option('-d', '--directory', dest='directory', default=None)
|
||||
options, tables = parser.parse_args(list(args))
|
||||
|
||||
session = get_session(options)
|
||||
get_csv_directory(options)
|
||||
|
||||
pokedex.db.load.dump(session, directory=options.directory,
|
||||
tables=tables,
|
||||
verbose=options.verbose)
|
||||
|
||||
def command_load(*args):
|
||||
parser = get_parser(verbose=True)
|
||||
parser.add_option('-d', '--directory', dest='directory', default=None)
|
||||
parser.add_option('-D', '--drop-tables', dest='drop_tables', default=False, action='store_true')
|
||||
parser.add_option('-S', '--safe', dest='safe', default=False, action='store_true',
|
||||
help="Do not use backend-specific optimalizations.")
|
||||
options, tables = parser.parse_args(list(args))
|
||||
|
||||
if not options.engine_uri:
|
||||
print "WARNING: You're reloading the default database, but not the lookup index. They"
|
||||
print " might get out of sync, and pokedex commands may not work correctly!"
|
||||
print "To fix this, run `pokedex reindex` when this command finishes. Or, just use"
|
||||
print "`pokedex setup` to do both at once."
|
||||
print
|
||||
|
||||
session = get_session(options)
|
||||
get_csv_directory(options)
|
||||
|
||||
pokedex.db.load.load(session, directory=options.directory,
|
||||
drop_tables=options.drop_tables,
|
||||
tables=tables,
|
||||
verbose=options.verbose,
|
||||
safe=options.safe)
|
||||
|
||||
def command_reindex(*args):
|
||||
parser = get_parser(verbose=True)
|
||||
options, _ = parser.parse_args(list(args))
|
||||
|
||||
session = get_session(options)
|
||||
lookup = get_lookup(options, session=session, recreate=True)
|
||||
|
||||
print "Recreated lookup index."
|
||||
|
||||
|
||||
def command_setup(*args):
|
||||
parser = get_parser(verbose=False)
|
||||
options, _ = parser.parse_args(list(args))
|
||||
|
||||
options.directory = None
|
||||
|
||||
session = get_session(options)
|
||||
get_csv_directory(options)
|
||||
pokedex.db.load.load(session, directory=None, drop_tables=True,
|
||||
verbose=options.verbose,
|
||||
safe=False)
|
||||
|
||||
lookup = get_lookup(options, session=session, recreate=True)
|
||||
|
||||
print "Recreated lookup index."
|
||||
|
||||
|
||||
def command_status(*args):
|
||||
parser = get_parser(verbose=True)
|
||||
options, _ = parser.parse_args(list(args))
|
||||
options.verbose = True
|
||||
options.directory = None
|
||||
|
||||
# Database, and a lame check for whether it's been inited at least once
|
||||
session = get_session(options)
|
||||
print " - OK! Connected successfully."
|
||||
|
||||
if pokedex.db.tables.Pokemon.__table__.exists(session.bind):
|
||||
print " - OK! Database seems to contain some data."
|
||||
else:
|
||||
print " - WARNING: Database appears to be empty."
|
||||
|
||||
# CSV; simple checks that the dir exists
|
||||
csvdir = get_csv_directory(options)
|
||||
if not os.path.exists(csvdir):
|
||||
print " - ERROR: No such directory!"
|
||||
elif not os.path.isdir(csvdir):
|
||||
print " - ERROR: Not a directory!"
|
||||
else:
|
||||
print " - OK! Directory exists."
|
||||
|
||||
if os.access(csvdir, os.R_OK):
|
||||
print " - OK! Can read from directory."
|
||||
else:
|
||||
print " - ERROR: Can't read from directory!"
|
||||
|
||||
if os.access(csvdir, os.W_OK):
|
||||
print " - OK! Can write to directory."
|
||||
else:
|
||||
print " - WARNING: Can't write to directory! " \
|
||||
"`dump` will not work. You may need to sudo."
|
||||
|
||||
# Index; the PokedexLookup constructor covers most tests and will
|
||||
# cheerfully bomb if they fail
|
||||
lookup = get_lookup(options, recreate=False)
|
||||
print " - OK! Opened successfully."
|
||||
|
||||
|
||||
### User-facing commands
|
||||
|
||||
def command_lookup(*args):
|
||||
parser = get_parser(verbose=False)
|
||||
options, words = parser.parse_args(list(args))
|
||||
|
||||
name = u' '.join(words)
|
||||
|
||||
session = get_session(options)
|
||||
lookup = get_lookup(options, session=session, recreate=False)
|
||||
|
||||
results = lookup.lookup(name)
|
||||
if not results:
|
||||
print "No matches."
|
||||
elif results[0].exact:
|
||||
print "Matched:"
|
||||
else:
|
||||
print "Fuzzy-matched:"
|
||||
|
||||
for result in results:
|
||||
if hasattr(result.object, 'full_name'):
|
||||
name = result.object.full_name
|
||||
else:
|
||||
name = result.object.name
|
||||
|
||||
print "%s: %s" % (result.object.__tablename__, name),
|
||||
if result.language:
|
||||
print "(%s in %s)" % (result.name, result.language)
|
||||
else:
|
||||
print
|
||||
|
||||
|
||||
def command_help():
|
||||
print u"""pokedex -- a command-line Pokédex interface
|
||||
usage: pokedex {command} [options...]
|
||||
Run `pokedex setup` first, or nothing will work!
|
||||
See http://bugs.veekun.com/projects/pokedex/wiki/CLI for more documentation.
|
||||
|
||||
Commands:
|
||||
help Displays this message.
|
||||
lookup [thing] Look up something in the Pokédex.
|
||||
|
||||
System commands:
|
||||
load Load Pokédex data into a database from CSV files.
|
||||
dump Dump Pokédex data from a database into CSV files.
|
||||
reindex Rebuilds the lookup index from the database.
|
||||
setup Combines load and reindex.
|
||||
status No effect, but prints which engine, index, and csv
|
||||
directory would be used for other commands.
|
||||
|
||||
Global options:
|
||||
-e|--engine=URI By default, all commands try to use a SQLite database
|
||||
in the pokedex install directory. Use this option (or
|
||||
a POKEDEX_DB_ENGINE environment variable) to specify an
|
||||
alternate database.
|
||||
-i|--index=DIR By default, all commands try to put the lookup index in
|
||||
the pokedex install directory. Use this option (or a
|
||||
POKEDEX_INDEX_DIR environment variable) to specify an
|
||||
alternate loction.
|
||||
-q|--quiet Don't print system output. This is the default for
|
||||
non-system commands and setup.
|
||||
-v|--verbose Print system output. This is the default for system
|
||||
commands, except setup.
|
||||
|
||||
System options:
|
||||
-d|--directory=DIR By default, load and dump will use the CSV files in the
|
||||
pokedex install directory. Use this option to specify
|
||||
a different directory.
|
||||
-D|--drop-tables With load, drop all tables before loading data.
|
||||
|
||||
Additionally, load and dump accept a list of table names (possibly with
|
||||
wildcards) and/or csv fileames as an argument list.
|
||||
""".encode(sys.getdefaultencoding(), 'replace')
|
||||
|
||||
sys.exit(0)
|
2
setup.py
2
setup.py
|
@ -16,7 +16,7 @@ setup(
|
|||
|
||||
entry_points = {
|
||||
'console_scripts': [
|
||||
'pokedex = pokedex:main',
|
||||
'pokedex = pokedex.main:main',
|
||||
],
|
||||
},
|
||||
)
|
||||
|
|
Loading…
Add table
Reference in a new issue