mirror of
https://github.com/veekun/pokedex.git
synced 2024-08-20 18:16:34 +00:00
Fixed a slew of foriegn key import problems. #29
Curse's type_id was 0, which is bogus; this has been fixed by creating a real ????? type. Fourth-gen moves all had zero as a contest effect id, which was also bogus. Pokémon 494 and 495 were junk and have been scrapped entirely. pokemon_form_groups's description column was too short. pokedex's connect() now takes kwargs passed to sessionmaker(). A more major change: some tables, like pokemon, are self-referential and contain rows that refer to rows later in the table (for example, Pikachu evolves from Pichu, which has a higher id). At the moment such a row is loaded, the foreign key is thus bogus. I solved this by turning on autocommit and wrapping add() in a try block, then attempting to readd every failed row again after the rest of the table is finished. Slows the import down a bit, but makes it work perfectly with foreign key checks on.
This commit is contained in:
parent
185264a288
commit
634ef3ed1e
9 changed files with 156 additions and 140 deletions
pokedex
|
@ -1,6 +1,7 @@
|
|||
# encoding: utf8
|
||||
import sys
|
||||
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
import sqlalchemy.types
|
||||
|
||||
from .db import connect, metadata, tables as tables_module
|
||||
|
@ -25,7 +26,8 @@ def csvimport(engine_uri, directory='.'):
|
|||
|
||||
from sqlalchemy.orm.attributes import instrumentation_registry
|
||||
|
||||
session = connect(engine_uri)
|
||||
# Use autocommit in case rows fail due to foreign key incest
|
||||
session = connect(engine_uri, autocommit=True, autoflush=False)
|
||||
|
||||
metadata.create_all()
|
||||
|
||||
|
@ -59,6 +61,7 @@ def csvimport(engine_uri, directory='.'):
|
|||
|
||||
# Print the table name but leave the cursor in a fixed column
|
||||
print table_name + '...', ' ' * (40 - len(table_name)),
|
||||
sys.stdout.flush()
|
||||
|
||||
try:
|
||||
csvfile = open("%s/%s.csv" % (directory, table_name), 'rb')
|
||||
|
@ -70,6 +73,12 @@ def csvimport(engine_uri, directory='.'):
|
|||
reader = csv.reader(csvfile, lineterminator='\n')
|
||||
column_names = [unicode(column) for column in reader.next()]
|
||||
|
||||
# Self-referential tables may contain rows with foreign keys of
|
||||
# other rows in the same table that do not yet exist. We'll keep
|
||||
# a running list of these and try inserting them again after the
|
||||
# rest are done
|
||||
failed_rows = []
|
||||
|
||||
for csvs in reader:
|
||||
row = table_class()
|
||||
|
||||
|
@ -91,11 +100,33 @@ def csvimport(engine_uri, directory='.'):
|
|||
|
||||
setattr(row, column_name, value)
|
||||
|
||||
session.add(row)
|
||||
try:
|
||||
session.add(row)
|
||||
session.flush()
|
||||
except IntegrityError as e:
|
||||
failed_rows.append(row)
|
||||
|
||||
session.commit()
|
||||
print 'loaded'
|
||||
# Loop over the failed rows and keep trying to insert them. If a loop
|
||||
# doesn't manage to insert any rows, bail.
|
||||
do_another_loop = True
|
||||
while failed_rows and do_another_loop:
|
||||
do_another_loop = False
|
||||
|
||||
for i, row in enumerate(failed_rows):
|
||||
try:
|
||||
session.add(row)
|
||||
session.flush()
|
||||
|
||||
# Success!
|
||||
del failed_rows[i]
|
||||
do_another_loop = True
|
||||
except IntegrityError as e:
|
||||
pass
|
||||
|
||||
if failed_rows:
|
||||
print len(failed_rows), "rows failed"
|
||||
else:
|
||||
print 'loaded'
|
||||
|
||||
def csvexport(engine_uri, directory='.'):
|
||||
import csv
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue