veekun_pokedex/pokedex/extract/oras.py

932 lines
32 KiB
Python
Raw Normal View History

2016-02-26 10:05:51 -08:00
"""Dumps data from Omega Ruby and Alpha Sapphire.
Filesystem reference: http://www.projectpokemon.org/wiki/ORAS_File_System
"""
import argparse
from collections import Counter
2016-02-26 10:05:51 -08:00
from collections import OrderedDict
from collections import defaultdict
2016-02-26 10:05:51 -08:00
from contextlib import contextmanager
import io
2016-02-26 10:05:51 -08:00
import itertools
import math
from pathlib import Path
import re
2016-02-26 10:05:51 -08:00
import shutil
import struct
from construct import Array, BitField, Bitwise, Magic, OptionalGreedyRange, Padding, Pointer, Struct, SLInt8, SLInt16, ULInt8, ULInt16, ULInt32
import png
2016-02-26 10:05:51 -08:00
import yaml
from .lib.garc import GARCFile, decrypt_xy_text
from .lib.text import merge_japanese_texts
# TODO fix some hardcoding in here
# TODO finish converting garc parsing to use construct, if possible, i think (i would not miss substream)
# way way more sprite work in here...
CANON_LANGUAGES = ('ja', 'en', 'fr', 'it', 'de', 'es', 'ko')
ORAS_SCRIPT_FILES = {
'ja-kana': 'rom/a/0/7/1',
'ja-kanji': 'rom/a/0/7/2',
'en': 'rom/a/0/7/3',
'fr': 'rom/a/0/7/4',
'it': 'rom/a/0/7/5',
'de': 'rom/a/0/7/6',
'es': 'rom/a/0/7/7',
'ko': 'rom/a/0/7/8',
}
ORAS_SCRIPT_ENTRIES = {
'form-names': 5,
# TODO these might be backwards, i'm just guessing
'species-flavor-alpha-sapphire': 6,
'species-flavor-omega-ruby': 7,
'move-contest-flavor': 13,
'move-names': 14,
# Note: table 15 is also a list of move names, but with a few at the end
# missing? XY leftovers?
'move-flavor': 16,
'type-names': 18,
'ability-flavor': 36,
'ability-names': 37,
'nature-names': 51,
'species-names': 98,
}
# The first element in each list is the name of the BASE form -- if it's not
# None, the base form will be saved under two filenames
ORAS_EXTRA_SPRITE_NAMES = {
# Cosplay Pikachu
25: (None, 'rockstar', 'belle', 'popstar', 'phd', 'libre', 'cosplay'),
# Unown
201: tuple('abcdefghijklmnopqrstuvwxyz') + ('exclamation', 'question'),
# Castform
351: (None, 'sunny', 'rainy', 'snowy'),
# Kyogre and Groudon
382: (None, 'primal',),
383: (None, 'primal',),
# Deoxys
386: ('normal', 'attack', 'defense', 'speed'),
# Burmy and Wormadam
412: ('plant', 'sandy', 'trash'),
413: ('plant', 'sandy', 'trash'),
# Cherrim
421: ('overcast', 'sunshine',),
# Shellos and Gastrodon
422: ('west', 'east',),
423: ('west', 'east',),
# Rotom
479: (None, 'heat', 'wash', 'frost', 'fan', 'mow'),
# Giratina
487: ('altered', 'origin',),
# Shaymin
492: ('land', 'sky',),
# Arceus
493: (
'normal', 'fighting', 'flying', 'poison', 'ground', 'rock', 'bug',
'ghost', 'steel', 'fire', 'water', 'grass', 'electric', 'psychic',
'ice', 'dragon', 'dark', 'fairy',
),
# Basculin
550: ('red-striped', 'blue-striped',),
# Darmanitan
555: ('standard', 'zen',),
# Deerling and Sawsbuck
585: ('spring', 'summer', 'autumn', 'winter'),
586: ('spring', 'summer', 'autumn', 'winter'),
2016-02-26 10:05:51 -08:00
# Tornadus, Thundurus, and Landorus
641: ('incarnate', 'therian'),
642: ('incarnate', 'therian'),
645: ('incarnate', 'therian'),
# Kyurem
646: (None, 'white', 'black'),
# Keldeo
647: ('ordinary', 'resolute'),
# Meloetta
648: ('aria', 'pirouette'),
# Genesect
649: (None, 'douse', 'shock', 'burn', 'chill'),
# Vivillon
666: (
'icy-snow', 'polar', 'tundra', 'continental', 'garden', 'elegant',
'meadow', 'modern', 'marine', 'archipelago', 'high-plains',
'sandstorm', 'river', 'monsoon', 'savanna', 'sun', 'ocean', 'jungle',
'fancy', 'poke-ball',
),
# Flabébé/Floette/Florges
669: ('red', 'yellow', 'orange', 'blue', 'white'),
670: ('red', 'yellow', 'orange', 'blue', 'white', 'eternal'),
671: ('red', 'yellow', 'orange', 'blue', 'white'),
# Furfrou
676: (
'natural', 'heart', 'star', 'diamond', 'debutante', 'matron', 'dandy',
'la-reine', 'kabuki', 'pharaoh',
),
# Meowstic
# TODO uh oh, this is handled as forms in boxes but as gender in sprites, maybe?
678: ('male', 'female'),
2016-02-26 10:05:51 -08:00
# Aegislash
681: ('shield', 'blade'),
# Pumpkaboo/Gourgeist
710: ('average', 'small', 'large', 'super'),
711: ('average', 'small', 'large', 'super'),
# Xerneas
716: ('neutral', 'active'),
# Hoopa
720: ('confined', 'unbound'),
}
pokemon_struct = Struct(
'pokemon',
ULInt8('stat_hp'),
ULInt8('stat_atk'),
ULInt8('stat_def'),
ULInt8('stat_speed'),
ULInt8('stat_spatk'),
ULInt8('stat_spdef'),
ULInt8('type1'),
ULInt8('type2'),
ULInt8('catch_rate'),
ULInt8('stage'),
ULInt16('effort'),
ULInt16('held_item1'),
ULInt16('held_item2'),
ULInt16('held_item3'), # dark grass from bw, unused in oras?
ULInt8('gender_rate'),
ULInt8('steps_to_hatch'),
ULInt8('base_happiness'),
ULInt8('exp_curve'),
ULInt8('egg_group1'),
ULInt8('egg_group2'),
ULInt8('ability1'),
ULInt8('ability2'),
ULInt8('ability_dream'),
ULInt8('safari_escape'),
ULInt16('form_species_start'),
ULInt16('form_sprite_start'),
ULInt8('form_count'),
ULInt8('color'),
ULInt16('base_exp'),
ULInt16('height'),
ULInt16('weight'),
Bitwise(
BitField('machines', 14 * 8, swapped=True),
),
Padding(2),
ULInt32('tutors'),
ULInt16('mystery1'),
ULInt16('mystery2'),
ULInt32('bp_tutors1'),
ULInt32('bp_tutors2'),
ULInt32('bp_tutors3'),
ULInt32('bp_tutors4'),
)
pokemon_mega_evolutions_struct = Array(
3,
Struct(
'pokemon_mega_evolutions',
ULInt16('number'),
ULInt16('mode'),
ULInt16('mega_stone_itemid'),
Padding(2),
)
)
egg_moves_struct = Struct(
'egg_moves',
ULInt16('count'),
Array(
lambda ctx: ctx.count,
ULInt16('moveids'),
),
)
level_up_moves_struct = OptionalGreedyRange(
Struct(
'level_up_pair',
SLInt16('moveid'),
SLInt16('level'),
),
)
move_struct = Struct(
'move',
ULInt8('type'),
ULInt8('category'),
ULInt8('damage_class'),
ULInt8('power'),
ULInt8('accuracy'),
ULInt8('pp'),
SLInt8('priority'),
ULInt8('min_max_hits'),
SLInt16('caused_effect'),
ULInt8('effect_chance'),
ULInt8('status'),
ULInt8('min_turns'),
ULInt8('max_turns'),
ULInt8('crit_rate'),
ULInt8('flinch_chance'),
ULInt16('effect'),
SLInt8('recoil'),
ULInt8('healing'),
ULInt8('range'), # ok
Bitwise(
BitField('stat_change', 24),
),
Bitwise(
BitField('stat_amount', 24),
),
Bitwise(
BitField('stat_chance', 24),
),
ULInt8('padding0'), # ok
ULInt8('padding1'), # ok
ULInt16('flags'),
ULInt8('padding2'), # ok
ULInt8('extra'),
)
move_container_struct = Struct(
'move_container',
Magic(b'WD'), # waza... descriptions?
ULInt16('record_ct'),
Array(
lambda ctx: ctx.record_ct,
Struct(
'records',
ULInt32('offset'),
Pointer(lambda ctx: ctx.offset, move_struct),
),
),
)
pokemon_sprite_struct = Struct(
'pokemon_sprite_config',
ULInt16('index'),
ULInt16('female_index'),
ULInt32('form_index_offset'),
ULInt32('right_index_offset'),
ULInt16('form_count'),
ULInt16('right_count'),
)
# There are 63 tutor move bits in use, but only 60 move tutors -- the moves
# appear to be largely inherited from B2W2 but these are just not exposed in
# ORAS
ORAS_UNUSED_MOVE_TUTORS = {'dark-pulse', 'roost', 'sleep-talk'}
# Unsure where this is in the binary
ORAS_NORMAL_MOVE_TUTORS = (
'grass-pledge',
'fire-pledge',
'water-pledge',
'frenzy-plant',
'blast-burn',
'hydro-cannon',
'draco-meteor',
'dragon-ascent',
)
@contextmanager
def read_garc(path):
with path.open('rb') as f:
yield GARCFile(f)
# XXX christ lol. taken from SO. fodder for camel maybe
def represent_ordereddict(dumper, data):
value = []
for item_key, item_value in data.items():
node_key = dumper.represent_data(item_key)
node_value = dumper.represent_data(item_value)
value.append((node_key, node_value))
return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)
yaml.add_representer(OrderedDict, represent_ordereddict)
def represent_tuple(dumper, data):
return yaml.nodes.SequenceNode(
u'tag:yaml.org,2002:seq',
[dumper.represent_data(item) for item in data],
flow_style=True,
)
yaml.add_representer(tuple, represent_tuple)
def dump_to_yaml(data, f):
# TODO gonna need a better way to handle flow style
yaml.dump(
data, f,
default_flow_style=False,
allow_unicode=True,
)
def extract_data(root, out):
# TODO big conceptual question for the yaml thing: how do we decide how the
# identifiers work in the per-version data? the "global" identifiers are
# in theory based on the names from the latest version, and the game dump
# scripts shouldn't have to care about what the latest version is
# 1. make the canon data not be keyed by identifier (makes it hard to
# follow what's going on in flavor text files etc, and unclear how to match
# up items across versions)
# 2. make each version's data keyed by its own identifiers (makes it hard
# to align them all when loading everything, and unclear how to match up
# items whose names change across versions)
# 3. hardcode a mapping of version+identifier pairs to their current
# identifiers, when they changed, which is a little ugly but also solves
# all the match-up problems and is what we'd basically have to do anyway
# -------------------------------------------------------------------------
# Names and flavor text
texts = {}
for lang, fn in ORAS_SCRIPT_FILES.items():
texts[lang] = {}
with read_garc(root / fn) as garc:
for entryname, entryid in ORAS_SCRIPT_ENTRIES.items():
entry = garc[entryid][0]
texts[lang][entryname] = decrypt_xy_text(entry.read())
# Japanese text is special! It's written in both kanji and kana, and we
# want to combine them
texts['ja'] = {}
for entryname in ORAS_SCRIPT_ENTRIES:
kanjis = texts['ja-kanji'][entryname]
kanas = texts['ja-kana'][entryname]
# But not if they're names of things.
# (TODO this might not be true in the case of, say, towns? in which
# case, what do we do? we want to ultimately put these in urls and
# whatnot, right, but we don't want furigana there :S do we need a
# separate "identifier" field /per language/?)
if entryname.endswith('names'):
assert kanjis == kanas
texts['ja'][entryname] = kanjis
else:
assert len(kanas) == len(kanjis)
texts['ja'][entryname] = [
merge_japanese_texts(kanji, kana)
for (kanji, kana) in zip(kanjis, kanas)
]
del texts['ja-kanji']
del texts['ja-kana']
identifiers = {}
identifiers['species'] = [
# TODO better identifier creation, to be determined later, but surely
# want to lose . and '
# TODO handling forms here is awkward since the form names are
# contained in the personal struct
((species_name or '') + '-' + form_name).lower().replace(' ', '-')
for (species_name, form_name) in itertools.zip_longest(
texts['en']['species-names'],
texts['en']['form-names'],
)
]
identifiers['move'] = [
# TODO better identifier creation, to be determined later, but surely
# want to lose . and '
name.lower().replace(' ', '-')
for name in texts['en']['move-names']
]
textdir = out / 'script'
if not textdir.exists():
textdir.mkdir()
for lang in CANON_LANGUAGES:
with (textdir / (lang + '.yaml')).open('w') as f:
# TODO this should use identifiers, not be lists
# TODO need to skip slot 0 which is junk
dump_to_yaml(texts[lang], f)
# -------------------------------------------------------------------------
# Scrape some useful bits from the binary
with (root / 'exe/code.bin').open('rb') as f:
# Tutored moves
tutor_moves = dict(tutors=ORAS_NORMAL_MOVE_TUTORS)
f.seek(0x004960f8)
for n in range(1, 5):
key = "bp_tutors{}".format(n)
moves = tutor_moves[key] = []
while True:
moveid, = struct.unpack('<H', f.read(2))
if moveid >= len(identifiers['move']):
break
moves.append(identifiers['move'][moveid])
# TMs
machines = []
f.seek(0x004a67ee)
machineids = struct.unpack('<107H', f.read(2 * 107))
# Order appears to be based on some gen 4 legacy: TMs 1 through 92, HMs
# 1 through 6, then the other eight TMs and the last HM. But the bits
# in the Pokémon structs are in the expected order of 1 through 100, 1
# through 7
machines = [
identifiers['move'][moveid]
for moveid in
machineids[0:92] +
machineids[98:106] +
machineids[92:98] +
machineids[106:]
]
# -------------------------------------------------------------------------
# Pokémon structs
pokemon_data = []
with read_garc(root / 'rom/a/1/9/5') as garc:
personals = [subfile[0].read() for subfile in garc]
_pokemon_forms = {} # "real" species id => (base species id, form name id)
_next_name_form_id = 723
for i, personal in enumerate(personals[:-1]):
record = pokemon_struct.parse(personal)
# TODO transform to an OD somehow probably
pokemon_data.append(record)
#print("{:3d} {:15s} {} {:5d} {:5d}".format(
# i,
# identifiers['species'][baseid],
# ('0'*16 + bin(record.mystery1)[2:])[-16:],
# record.mystery2,
# record.stage,
#))
# TODO some pokemon have sprite starts but no species start, because their sprites vary obv
if record.form_count > 1:
# The form names appear to be all just jammed at the end in order,
# completely unrelated to either of the "start" offsets here
for offset in range(record.form_count - 1):
#form_name = texts['en']['form-names'][_next_name_form_id]
if record.form_species_start:
# TODO still no idea how "intangible" forms are being
# handled in the new schema
_pokemon_forms[record.form_species_start + offset] = i, _next_name_form_id
_next_name_form_id += 1
if record.form_species_start:
for offset in range(record.form_count - 1):
# TODO grab the form names argh
identifiers['species'][record.form_species_start + offset] = identifiers['species'][i]
#for i in range(723, 825 + 1):
# base_species_id, form_name_id = _pokemon_forms[i]
# species_name = texts['en']['species-names'][base_species_id]
# form_name = texts['en']['form-names'][form_name_id]
# print(i, species_name, '/', form_name)
# -------------------------------------------------------------------------
# Move stats
movesets = OrderedDict()
with read_garc(root / 'rom/a/1/8/9') as garc:
# Only one subfile
data = garc[0][0].read()
container = move_container_struct.parse(data)
for n, record in enumerate(container.records):
m = record.move
# TODO with the release of oras all moves have contest types and effects again! where are they??
#print("{:3d} {:20s} | {m.type:3d} {m.power:3d} {m.pp:2d} {m.accuracy:3d} / {m.priority:2d} {m.range:2d} {m.damage_class:1d} / {m.effect:3d} {m.caused_effect:3d} {m.effect_chance:3d} -- {m.status:3d} {m.min_turns:3d} {m.max_turns:3d} {m.crit_rate:3d} {m.flinch_chance:3d} {m.recoil:4d} {m.healing:3d} / {m.stat_change:06x} {m.stat_amount:06x} {m.stat_chance:06x} / {m.padding0:3d} {m.padding1:3d} {m.flags:04x} {m.padding2:3d} {m.extra:3d}".format(
# n,
# identifiers['move'][n],
# m=record.move,
#))
# Egg moves
with read_garc(root / 'rom/a/1/9/0') as garc:
for i, subfile in enumerate(garc):
ident = identifiers['species'][i]
data = subfile[0].read()
if not data:
continue
container = egg_moves_struct.parse(data)
moveset = movesets.setdefault(ident, OrderedDict())
eggset = moveset['egg'] = []
for moveid in container.moveids:
eggset.append(identifiers['move'][moveid])
# Level-up moves
with read_garc(root / 'rom/a/1/9/1') as garc:
for i, subfile in enumerate(garc):
ident = identifiers['species'][i]
level_up_moves = subfile[0].read()
moveset = movesets.setdefault(ident, OrderedDict())
levelset = moveset['level'] = []
lastlevel = None
order = 1
for pair in level_up_moves_struct.parse(level_up_moves):
# End is indicated with -1, -1
if pair.moveid <= 0:
break
levelset.append((
pair.level,
identifiers['move'][pair.moveid],
))
if pair.level == lastlevel:
order += 1
else:
lastlevel = pair.level
order = 1
# Evolution
#with read_garc(root / 'rom/a/1/9/2') as garc:
# for subfile in garc:
# evolution = subfile[0].read()
# print(repr(evolution))
# Mega evolution
#with read_garc(root / 'rom/a/1/9/3') as garc:
# for subfile in garc:
# evolution = subfile[0].read()
# print(repr(evolution))
# TODO what is a/1/9/4? 8 files of 404 bytes each
# Baby Pokémon
#with read_garc(root / 'rom/a/1/9/6') as garc:
# for subfile in garc:
# baby_pokemon = subfile[0].read()
# print(repr(baby_pokemon))
# Item stats
#with read_garc(root / 'rom/a/1/9/7') as garc:
# for subfile in garc:
# item_stats = subfile[0].read()
# print(repr(item_stats))
# Tutor moves (from the personal structs)
for i, datum in enumerate(pokemon_data):
ident = identifiers['species'][i]
moveset = movesets.setdefault(ident, OrderedDict())
tutorset = moveset['tutor'] = []
for key, tutors in tutor_moves.items():
for bit, moveident in enumerate(tutors):
if moveident in ORAS_UNUSED_MOVE_TUTORS:
continue
if not datum[key] & (1 << bit):
continue
tutorset.append(moveident)
# TMs
machineset = moveset['machine'] = []
for bit, moveident in enumerate(machines):
if not datum['machines'] & (1 << bit):
continue
machineset.append(moveident)
with (out / 'movesets.yaml').open('w') as f:
dump_to_yaml(movesets, f)
def get_mega_counts(root):
"""Return a dict mapping Pokémon ids to how many mega evolutions each one
has.
"""
mega_counts = {} # pokemonid => number of mega evos
with read_garc(root / 'rom/a/1/9/3') as garc:
for pokemonid, subfile in enumerate(garc):
mega_evos = pokemon_mega_evolutions_struct.parse_stream(subfile[0])
mega_counts[pokemonid] = max(
mega_evo.number for mega_evo in mega_evos)
return mega_counts
class SpriteFileNamer:
"""Do you have a big set of sprites, and a separate list of stuff
identifying them, as happens in XY and ORAS? I will sort that all out for
you.
"""
def __init__(self, out, mega_counts, form_names):
self.out = out
self.mega_counts = mega_counts
self.form_names = form_names
self.index_to_filenames = defaultdict(list)
self.seen = set()
def add(self, index, pokemonid, formid=0, right=False, back=False, shiny=False, female=False):
# Check that we don't try to do the same one twice
if index in self.index_to_filenames:
raise ValueError("Index {} is already {}".format(
index, self.index_to_filenames[index]))
key = (pokemonid, formid, right, back, shiny, female)
if key in self.seen:
raise ValueError("Duplicate sprite: {!r}".format(key))
self.seen.add(key)
# Figure out the form name
# TODO this assumes a Pokémon cannot have both forms and mega
# evolutions, which is true... for now
if pokemonid in self.form_names:
form = self.form_names[pokemonid][formid]
elif formid == 0:
form = None
elif self.mega_counts[pokemonid]:
if self.mega_counts[pokemonid] == 1:
form = ['mega'][formid - 1]
elif self.mega_counts[pokemonid] == 2:
form = ['mega-x', 'mega-y'][formid - 1]
else:
raise ValueError(
"Don't know how to name {} mega evolutions for Pokémon {}"
.format(self.mega_counts[pokemonid], pokemonid))
else:
raise ValueError("Pokemon {} doesn't have forms".format(pokemonid))
# Construct the directory
parts = []
if right:
parts.append('right')
if back:
parts.append('back')
if shiny:
parts.append('shiny')
if female:
parts.append('female')
# Build the final filename
bare_filename = "{}.png".format(pokemonid)
if form:
parts.append("{}-{}.png".format(pokemonid, form))
else:
parts.append(bare_filename)
filename = '/'.join(parts)
self.index_to_filenames[index].append(filename)
# For named "default" forms, create two output files
if form and formid == 0:
parts[-1] = bare_filename
self.index_to_filenames[index].append('/'.join(parts))
# Special case for Meowstic: duplicate its female form as a formless
# female sprite
if form == 'female' and not female:
parts.insert(-1, 'female')
parts[-1] = bare_filename
self.index_to_filenames[index].append('/'.join(parts))
def inject(self, index, filename):
"""Manually specify the filename for an index. Helpful for edge cases
like egg sprites.
"""
if index in self.index_to_filenames:
raise ValueError("Index {} is already {}".format(
index, self.index_to_filenames[index]))
self.index_to_filenames[index].append(filename)
# TODO we oughta create aliases for any that are missing?
# pumpkaboo/gourgeist and arceus don't have separate box icons, for
# example.
@contextmanager
def open(self, index, prefix=None):
out = self.out
if prefix:
out /= prefix
filenames = self.index_to_filenames[index]
if len(filenames) == 0:
raise RuntimeError("Don't have filenames for index {}".format(index))
fn = out / filenames[0]
if not fn.parent.exists():
fn.parent.mkdir(parents=True)
with fn.open('wb') as f:
yield f
for path in filenames[1:]:
fn2 = out / path
# TODO this duplication is annoying and we can probably do it in
# one fell swoop instead of constantly rechecking, maybe during the
# same timeframe that we fill in missing forms
if not fn2.parent.exists():
fn2.parent.mkdir(parents=True)
shutil.copyfile(str(fn), str(fn2))
2016-02-26 10:05:51 -08:00
def extract_box_sprites(root, out):
namer = SpriteFileNamer(
out, get_mega_counts(root), ORAS_EXTRA_SPRITE_NAMES)
2016-02-26 10:05:51 -08:00
with (root / 'exe/code.bin').open('rb') as f:
# Form configuration, used to put sprites in the right order
# NOTE: in x/y the address is 0x0043ea98
f.seek(0x0047d650)
# TODO magic number
for n in range(722):
sprite = pokemon_sprite_struct.parse_stream(f)
namer.add(sprite.index, n)
2016-02-26 10:05:51 -08:00
if sprite.female_index != sprite.index:
namer.add(sprite.female_index, n, female=True)
2016-02-26 10:05:51 -08:00
# Note that these addresses are relative to RAM, and the binary is
# loaded into RAM starting at 0x100000, so we need to subtract that
# to get a file position
pos = f.tell()
form_indices = ()
right_indices = ()
if sprite.form_index_offset:
f.seek(sprite.form_index_offset - 0x100000)
form_indices = struct.unpack(
"<{}H".format(sprite.form_count),
f.read(2 * sprite.form_count),
)
for form, form_idx in enumerate(form_indices):
# Ignore the first form, since it's the default and thus
# covered by `index` already
if form == 0:
continue
if form_idx == sprite.index:
continue
namer.add(form_idx, n, form)
2016-02-26 10:05:51 -08:00
if sprite.right_index_offset:
f.seek(sprite.right_index_offset - 0x100000)
right_indices = struct.unpack(
"<{}H".format(sprite.right_count),
f.read(2 * sprite.right_count),
)
if sprite.form_count:
assert sprite.right_count == sprite.form_count
for form, (form_idx, right_idx) in enumerate(zip(form_indices, right_indices)):
if form_idx == right_idx:
continue
namer.add(right_idx, n, form, right=True)
2016-02-26 10:05:51 -08:00
else:
assert sprite.right_count == 2
assert right_indices[0] == right_indices[1]
if right_indices[0] != sprite.index:
namer.add(right_indices[0], n, right=True)
2016-02-26 10:05:51 -08:00
f.seek(pos)
pokemon_sprites_dir = out
if not pokemon_sprites_dir.exists():
pokemon_sprites_dir.mkdir()
with read_garc(root / 'rom/a/0/9/1') as garc:
from .lib.clim import decode_clim
for i, subfile in enumerate(garc):
if i == 0:
# Dummy blank sprite, not interesting to us
continue
elif i == 333:
# Duplicate Entei sprite that's not used
continue
2016-02-26 10:05:51 -08:00
elif i == len(garc) - 1:
# Very last one is egg
namer.inject(i, 'egg.png')
2016-02-26 10:05:51 -08:00
data = subfile[0].read()
width, height, color_depth, palette, pixels = decode_clim(data)
2016-02-26 10:05:51 -08:00
png_writer = png.Writer(
width=width,
height=height,
palette=palette,
2016-02-26 10:05:51 -08:00
)
# TODO this is bad.
if 'right/' in namer.index_to_filenames[i][0]:
for row in pixels:
row.reverse()
# I want to preserve Zhorken's good idea of indicating the original
# bit depth with an sBIT chunk, but PyPNG can't do that directly,
# so we need to do a bit of nonsense.
buf = io.BytesIO()
png_writer.write(buf, pixels)
# Read the PNG as chunks, and manually add an sBIT chunk
buf.seek(0)
png_reader = png.Reader(buf)
chunks = list(png_reader.chunks())
sbit = bytes([color_depth] * 3)
chunks.insert(1, ('sBIT', sbit))
# Write chunks to an actual file
with namer.open(i) as f:
png.write_chunks(f, chunks)
2016-02-26 10:05:51 -08:00
def extract_dex_sprites(root, out):
# Some Pokémon have dex sprites for their forms, too, and they're all
# clustered together, so we have to do a little work to fix the numbering.
# Luckily the dex sprites are in the same order as the models
# (unsurprising, as they're just model renders), which also tells us what
# Pokémon have female forms. The mega evolution map tells us which forms
# are megas, and the rest are listed manually above as
# ORAS_EXTRA_SPRITE_NAMES.
namer = SpriteFileNamer(
out, get_mega_counts(root), ORAS_EXTRA_SPRITE_NAMES)
2016-02-26 10:05:51 -08:00
with read_garc(root / 'rom/a/0/0/8') as garc:
f = garc[0][0]
# TODO magic number
for n in range(721):
# Unlike /virtually everywhere else/, Pokémon are zero-indexed here
pokemonid = n + 1
# Index of the first model (also zero-indexed), how many models the
# Pokémon has, and some flags
start, count, flags = struct.unpack('<HBB', f.read(4))
model_num = start + 1
# For some asinine reason, Xerneas is counted as two separate
# Pokémon in the dex sprites but not the models, so we have to
# shift everything after it back by 1
if pokemonid == 716:
count = 2
elif pokemonid >= 717:
model_num += 1
namer.add(model_num, pokemonid)
2016-02-26 10:05:51 -08:00
form_count = count - 1 # discount "base" form
total_model_count = model_num + count - 1
# Don't know what flag 1 is; everything has it.
# Flag 2 means the first alternate form is a female variant.
if flags & 2:
assert form_count > 0
form_count -= 1
model_num += 1
namer.add(model_num, pokemonid, female=True)
2016-02-26 10:05:51 -08:00
# Flag 4 just means there are more forms?
if flags & 4:
assert form_count
for formid in range(1, form_count + 1):
2016-02-26 10:05:51 -08:00
model_num += 1
namer.add(model_num, pokemonid, formid)
2016-02-26 10:05:51 -08:00
# And now, do the ripping
pokemon_sprites_dir = out
with read_garc(root / 'rom/a/2/6/3') as garc:
from .lib.clim import decode_clim
for i, subfile in enumerate(garc):
shiny_prefix = None
2016-02-26 10:05:51 -08:00
if i > total_model_count:
i -= total_model_count
# TODO this should be a real feature, as should the 'right'
# hack in the other code
2016-02-26 10:05:51 -08:00
shiny_prefix = 'shiny/'
if i == 0:
# Dummy blank sprite, not interesting to us
continue
elif 37 <= i <= 41:
# Cosplay Pikachu's outfits -- the sprites are blank, so saving
# these is not particularly useful
continue
data = subfile[0].read()
width, height, color_depth, palette, pixels = decode_clim(data)
assert not palette
2016-02-26 10:05:51 -08:00
png_writer = png.Writer(
width=width,
height=height,
alpha=True,
)
with namer.open(i, prefix=shiny_prefix) as f:
2016-02-26 10:05:51 -08:00
png_writer.write(f, (itertools.chain(*row) for row in pixels))
def _munge_source_arg(strpath):
path = Path(strpath)
if not path.is_dir():
raise argparse.ArgumentTypeError(
"{!r} is not a directory".format(strpath))
# TODO something something romfs, exefs
return path
def make_arg_parser():
p = argparse.ArgumentParser()
p.add_argument('what', choices=('data', 'dex-sprites', 'box-sprites'), help='what to extract')
# TODO should verify that this is an actual game dump, and find the rom/exe
p.add_argument('source', type=_munge_source_arg, help='path to an unpacked game image')
p.add_argument('dest', type=_munge_source_arg, help='directory to dump the results into')
return p
def main(args):
parser = make_arg_parser()
args = parser.parse_args(args)
# TODO support 'all', and just make some subdirectories per thing
# TODO or maybe merge all the sprite things together since stuff will need moving around anyway idk
if args.what == 'data':
extract_data(args.source, args.dest)
elif args.what == 'dex-sprites':
extract_dex_sprites(args.source, args.dest)
elif args.what == 'box-sprites':
extract_box_sprites(args.source, args.dest)
if __name__ == '__main__':
import sys
main(sys.argv[1:])