import io import struct import zlib from .hexdump import tohex, toout import crodump.koddecoder class Datafile: """Represent a single .dat with it's .tad index file""" def __init__(self, name, dat, tad, compact, kod): self.name = name self.dat = dat self.tad = tad self.compact = compact self.readdathdr() self.readtad() self.dat.seek(0, io.SEEK_END) self.datsize = self.dat.tell() self.kod = kod if not kod or self.isencrypted() else crodump.koddecoder.new() def isencrypted(self): return self.version in (b'01.04', b'01.05') or self.isv4() def isv3(self): # 01.02: 32 bit file offsets # 01.03: 64 bit file offsets # 01.04: encrypted?, 32bit # 01.05: encrypted?, 64bit return self.version in (b'01.02', b'01.03', b'01.04', b'01.05') def isv4(self): # 01.11 v4 ( 64bit ) # 01.14 v4 ( 64bit ), encrypted? # 01.13 ?? I have not seen this version anywhere yet. return self.version in (b'01.11', b'01.13', b'01.14') def isv7(self): # 01.19 ?? I have not seen this version anywhere yet. return self.version in (b'01.19',) def readdathdr(self): """ Read the .dat file header. Note that the 19 byte header if followed by 0xE9 random bytes, generated by 'srand(time())' followed by 0xE9 times obfuscate(rand()) """ self.dat.seek(0) hdrdata = self.dat.read(19) ( magic, # +00 8 bytes self.hdrunk, # +08 uint16 self.version, # +0a 5 bytes self.encoding, # +0f uint16 self.blocksize, # +11 uint16 ) = struct.unpack("<8sH5sHH", hdrdata) if magic != b"CroFile\x00": print("unknown magic: ", magic) raise Exception("not a Crofile") self.use64bit = self.version in (b"01.03", b"01.05", b"01.11") # blocksize # 0040 -> Bank # 0400 -> Index or Sys # 0200 -> Stru or Sys # encoding # bit0 = 'KOD encoded' # bit1 = compressed def readtad(self): """ read and decode the .tad file. """ self.tad.seek(0) if self.isv3(): hdrdata = self.tad.read(2 * 4) self.nrdeleted, self.firstdeleted = struct.unpack("<2L", hdrdata) elif self.isv4(): hdrdata = self.tad.read(4 * 4) unk1, self.nrdeleted, self.firstdeleted, unk2 = struct.unpack("<4L", hdrdata) else: raise Exception("unsupported .tad version") self.tadhdrlen = self.tad.tell() self.tadentrysize = 16 if self.use64bit else 12 if self.compact: self.tad.seek(0, io.SEEK_END) else: self.idxdata = self.tad.read() self.tadsize = self.tad.tell() - self.tadhdrlen self.nrofrecords = self.tadsize // self.tadentrysize if self.tadsize % self.tadentrysize: print("WARN: leftover data in .tad") def tadidx(self, idx): """ If we're not supposed to be more compact but slower, lookup from a cached .tad """ if self.compact: return self.tadidx_seek(idx) if self.use64bit: # 01.03 and 01.11 have 64 bit file offsets return struct.unpack_from("> 24 ln &= 0xFFFFFFF elif self.isv4(): flags = ofs >> 56 ofs &= (1<<56)-1 dat = self.readdata(ofs, ln) if not dat: # empty record encdat = dat elif not flags: if self.use64bit: extofs, extlen = struct.unpack(" o: yield o, start - o o = end if o < filesize: yield o, filesize - o def dump(self, args): """ Dump decodes all data referenced from the .tad file. And optionally print out all unreferenced byte ranges in the .dat file. This function is mostly useful for reverse-engineering the database format. the `args` object controls how data is decoded. """ print("hdr: %-6s dat: %04x %s enc:%04x bs:%04x, tad: %08x %08x" % ( self.name, self.hdrunk, self.version, self.encoding, self.blocksize, self.nrdeleted, self.firstdeleted)) ranges = [] # keep track of used bytes in the .dat file. for i in range(self.nrofrecords): (ofs, ln, chk) = self.tadidx(i) idx = i + 1 if args.maxrecs and i==args.maxrecs: break if ln == 0xFFFFFFFF: print("%5d: %08x %08x %08x" % (idx, ofs, ln, chk)) continue if self.isv3(): flags = ln >> 24 ln &= 0xFFFFFFF elif self.isv4(): flags = ofs >> 56 # 04 --> data, v3compdata # 02,03 --> deleted # 00 --> extrec ofs &= (1<<56)-1 dat = self.readdata(ofs, ln) ranges.append((ofs, ofs + ln, "item #%d" % i)) decflags = [" ", " "] infostr = "" tail = b"" if not dat: # empty record encdat = dat elif not flags: if self.use64bit: extofs, extlen = struct.unpack("HH", data, o) if flag != 0x800 and flag != 0x008: return o += size + 2 return True def decompress(self, data): """ Decompress a record. Compressed records can have several chunks of compressed data. Note that the compression header uses a mix of big-endian and little numbers. each chunk has the following format: size - big endian uint16, size of flag + crc + compdata flag - big endian uint16 - always 0x800 crc - little endian uint32, crc32 of the decompressed data the final chunk has only 3 bytes: a zero size followed by a 2. the crc algorithm is the one labeled 'crc-32' on this page: http://crcmod.sourceforge.net/crcmod.predefined.html """ result = b"" o = 0 while o < len(data) - 3: # note the mix of bigendian and little endian numbers here. size, flag = struct.unpack_from(">HH", data, o) storedcrc, = struct.unpack_from("