# Casio dictionary ROM image dumper
#
# This software is provided ‘as-is’, without any express or implied
# warranty. In no event will the authors be held liable for any damages
# arising from the use of this software.
# 
# Permission is granted to anyone to use this software for any purpose,
# including commercial applications, and to alter it and redistribute it
# freely, subject to the following restrictions:
# 
# 1. The origin of this software must not be misrepresented; you must not
# claim that you wrote the original software. If you use this software
# in a product, an acknowledgment in the product documentation would be
# appreciated but is not required.
# 
# 2. Altered source versions must be plainly marked as such, and must not be
# misrepresented as being the original software.
# 
# 3. This notice may not be removed or altered from any source
# distribution.


import struct, ctypes, sys, os, os.path, array
from optparse import OptionParser
from pprint import pprint
from structs import *

patch_info = [
  {
    'checksum' : 0xae9afce,
    'offset' : 0x7f428,
    'original' : array.array('B', [0xed, 0x01]),
    'new' : array.array('B', [0xed, 0x00]),
  },
  {
    'checksum' : 0xae9ff05,
    'offset' : 0x7f340,
    'original' : array.array('B', [0xed, 0x01]),
    'new' : array.array('B', [0xed, 0x00]),
  },
  {
    'checksum' : 0x90d3e9b,
    'offset' : 0x7f914,
    'original' : array.array('B', [0xed, 0x01]),
    'new' : array.array('B', [0xed, 0x00]),
  },
  {
    'checksum' : 0x90e5116,
    'offset' : 0x7fb34,
    'original' : array.array('B', [0xed, 0x01]),
    'new' : array.array('B', [0xed, 0x00]),
  },
  {
    'checksum' : 0xae891ee,
    'offset' : 0x7ecfa,
    'original' : array.array('B', [0xed, 0x01]),
    'new' : array.array('B', [0xed, 0x00]),
  },
  { #XD-D9800
    'checksum' : 0xa78438c,
    'offset' : 0x9216c,
    'original' : array.array('B', [0xed, 0x01]),
    'new' : array.array('B', [0xed, 0x00]),
  },
  { #E-D800
    'checksum' : 0xa793d30,
    'offset' : 0x92194,
    'original' : array.array('B', [0xed, 0x01]),
    'new' : array.array('B', [0xed, 0x00]),
  },
  { #XD-D1000
    'checksum' : 0x9a9f689,
    'offset' : 0x8f4c4,
    'original' : array.array('B', [0xed, 0x01]),
    'new' : array.array('B', [0xed, 0x00]),
  },
  { #XD-B9800
    'checksum' : 0x90d3ead,
    'offset' : 0x7f914,
    'original' : array.array('B', [0xed, 0x01]),
    'new' : array.array('B', [0xed, 0x00]),
  },
]

def read_struct(f, struct):
    s = struct()
    slen = ctypes.sizeof(s)
    if isinstance(f, bytes):
      byte_data = f
    else:
      byte_data = f.read(slen)
    fit = min(len(byte_data), slen)
    ctypes.memmove(ctypes.addressof(s), byte_data, fit)
    return s

def print_line_or_lines(results, indent):
    """short values on same line, multi-line on later ones..."""
    if len(results) == 1:
        print(results[0])
    else:
        print()
        for result in results:
            print(indent + result)

def ctypes_pprint(cstruct, indent=""):
    """pretty print a ctypes Structure or Union"""
    
    for field_name, field_ctype in cstruct._fields_:
        field_value = getattr(cstruct, field_name)
        print(indent + field_name, end=' ')
        next_indent = indent + "    "
        pprint_name = "pprint_%s" % field_name
        pformat_name = "pformat_%s" % field_name
        if hasattr(cstruct, pprint_name):
            # no longer used
            getattr(cstruct, pprint_name)(next_indent)
        elif hasattr(cstruct, pformat_name):
            # counted-array and other common cases
            print_line_or_lines(getattr(cstruct, pformat_name)(), next_indent)
        elif hasattr(field_value, "pformat"):
            # common for small compound types
            print_line_or_lines(field_value.pformat(), next_indent)
        elif hasattr(field_value, "pprint"):
            # useful for Union selectors
            field_value.pprint(next_indent)
        elif hasattr(field_value, "_fields_"):
            # generic recursion
            print()
            ctypes_pprint(field_value, next_indent)
        else:
            # generic simple (or unknown/uninteresting) value
            try:
              print("0x%X" % field_value)
            except:
              print(field_value)

def find_last_non_ff(f, filesize):
    chunk_size = 4096
    pos = filesize
    while pos > 0:
        read_size = min(chunk_size, pos)
        pos -= read_size
        f.seek(pos)
        chunk = f.read(read_size)
        
        if chunk.count(b'\xff') != read_size:
            for i in range(read_size - 1, -1, -1):
                if chunk[i] != 0xFF:
                    return pos + i + 1
    return 0

def analyze_file_trimming(f, flash_size):
    print("\n--- Trimming Analysis ---")
    original_pos = f.tell()
    f.seek(0, 2)
    filesize = f.tell()
    f.seek(original_pos)

    if filesize < flash_size:
        print("File appears to be trimmed.")
        print("  - Actual size:   0x%X (%d bytes)" % (filesize, filesize))
        print("  - Expected size: 0x%X (%d bytes)" % (flash_size, flash_size))
    elif filesize > flash_size:
        print("Warning: File size is larger than expected.")
        print("  - Actual size:   0x%X (%d bytes)" % (filesize, filesize))
        print("  - Expected size: 0x%X (%d bytes)" % (flash_size, flash_size))
    else:
        print("File is not trimmed (full size)." )
        last_non_ff_addr = find_last_non_ff(f, filesize) - 1
        
        if last_non_ff_addr >= 0:
            trim_offset = (last_non_ff_addr & ~0xF) + 0x10
            
            if trim_offset < filesize:
                print("  - Recommended trim size: 0x%X (%d bytes)" % (trim_offset, trim_offset))
                print("  - This would save: %d bytes" % (filesize - trim_offset))
            else:
                print("  - No trailing 0xFF data found to trim.")
        else:
            print("  - File appears to be empty or contain only 0xFF bytes.")
    print("-------------------------\n")

def analyze_patch_status(f):
    print("--- Patch Analysis ---")
    found_status = False
    for info in patch_info:
        try:
            original_pos = f.tell()
            f.seek(info['offset'])
            read_bytes = f.read(len(info['original']))
            f.seek(original_pos)

            if read_bytes == info['new'].tobytes():
                print("File appears to be PATCHED.")
                print("  - Patch found for original checksum 0x%X." % info['checksum'])
                print("  - Bytes at offset 0x%X match the patched sequence." % info['offset'])
                found_status = True
                break 
            elif read_bytes == info['original'].tobytes():
                print("File appears to be ORIGINAL (not patched)." )
                print("  - Found patchable bytes for checksum 0x%X at offset 0x%X." % (info['checksum'], info['offset']))
                found_status = True
                break
        except Exception:
            continue

    if not found_status:
        print("File patch status is UNKNOWN.")
        print("  - No known original or patched byte sequences found.")
    print("----------------------\n")

def get_tail(f, r_offset, hdr_r):
  f.seek(0,2)
  filesize = f.tell()
  f.seek(filesize - 0x05, 0)
  thetail = f.read(0x05)
  f.seek(filesize - 0x10, 0)
  if ((filesize < hdr_r.flash_size) and (f.read(0x05) != thetail)):
    tail = filesize
  else:
    i = 6
    f.seek(filesize - i, 0)
    while (f.read(0x05) == thetail):
      f.seek(filesize - i,0)
      i= i + 5
    tail=f.tell()
  if r_offset == 0xff80:
    tail = tail + (-tail & 0xfff)
  else:
    tail = tail + (-tail & 0xf)
  return tail

def analyze_checksums(f, r_offset, hdr_r, hdr_b, hdr_a):
    print("--- Checksum Analysis ---")
    original_pos = f.tell()

    mod = 0x500
    boot_cksum = CheckSum32()
    datasum1 = CheckSum32()
    datasum2 = CheckSum32()
    datasum = CheckSum32()
    rom_cksum = CheckSum32()
    hdr_r_cksum = CheckSum16()
    hdr_b_cksum = CheckSum16()
    hdr_a_cksum = CheckSum16()

    if r_offset == 0xff80:
        mod = 0x300
    
    f.seek(0, 2)
    filesize = f.tell()

    hdr_r_cksum.update(bytes(hdr_r)[:-2])
    hdr_b_cksum.update(bytes(hdr_b)[:-2])
    hdr_a_cksum.update(bytes(hdr_a)[:-2])

    f.seek(0, 0)
    boot_cksum.update(f.read(r_offset - mod))
    boot_cksum.update(b"\xff" * mod)
    if r_offset == 0xff80:
        f.seek(r_offset + 0x80)
        boot_cksum.update(f.read(hdr_r.b_offset - f.tell() - 0xa0));

    f.seek(hdr_b.entrypoint & 0x7fffffff, 0)
    datasum1.update(f.read(hdr_b.datalen))
    
    tail = get_tail(f, r_offset, hdr_r)
    f.seek(hdr_b.datalen + hdr_b.farea_off, 0)
    datasum2len = tail - hdr_b.datalen - hdr_b.farea_off
    datasum2.update(f.read(datasum2len))
    if tail > filesize:
        datasum2.update(b"\xff" * (tail - filesize))
    
    f.seek(hdr_a.entrypoint & 0x7fffffff, 0)
    datasum.update(f.read(hdr_a.datalen))
    
    f.seek(0)
    rom_cksum.update(f.read(r_offset - mod))
    rom_cksum.update(b"\xff" * mod)
    f.seek(r_offset, 0)
    rom_cksum.update(f.read(hdr_r.b_offset - f.tell()))
    f.seek(hdr_r.b_offset + 0x80, 0)
    rom_cksum.update(f.read(filesize - f.tell()))
    if filesize < hdr_r.flash_size:
        padding_bytes = b"\xff" * (hdr_r.flash_size - filesize)
        rom_cksum.update(padding_bytes)

    print("R Header: 0x%X (calculated) vs 0x%X (file) - %s" % (hdr_r_cksum.chksum, hdr_r.checksum, "OK" if hdr_r.checksum == hdr_r_cksum.chksum else "Fail"))
    print("B Header: 0x%X (calculated) vs 0x%X (file) - %s" % (hdr_b_cksum.chksum, hdr_b.checksum, "OK" if hdr_b.checksum == hdr_b_cksum.chksum else "Fail"))
    print("A Header: 0x%X (calculated) vs 0x%X (file) - %s" % (hdr_a_cksum.chksum, hdr_a.checksum, "OK" if hdr_a.checksum == hdr_a_cksum.chksum else "Fail"))
    print("Boot    : 0x%X (calculated) vs 0x%X (file) - %s" % (boot_cksum.chksum, hdr_r.boot_cksum, "OK" if hdr_r.boot_cksum == boot_cksum.chksum else "Fail"))
    print("Data 1  : 0x%X (calculated) vs 0x%X (file) - %s" % (datasum1.chksum, hdr_b.datasum1, "OK" if hdr_b.datasum1 == datasum1.chksum else "Fail"))
    print("Data 2  : 0x%X (calculated) vs 0x%X (file) - %s" % (datasum2.chksum, hdr_b.datasum2, "OK" if hdr_b.datasum2 == datasum2.chksum else "Fail"))
    print("Data A  : 0x%X (calculated) vs 0x%X (file) - %s" % (datasum.chksum, hdr_a.datasum, "OK" if hdr_a.datasum == datasum.chksum else "Fail"))
    print("ROM     : 0x%X (calculated) vs 0x%X (file) - %s" % (rom_cksum.chksum, hdr_b.rom_cksum, "OK" if hdr_b.rom_cksum == rom_cksum.chksum else "Fail"))

    f.seek(original_pos)
    print("-------------------------\n")

def dump_pvos(f, extract = True, short = False, output_dir=None):
  hdr1_offset = f.tell()
  hdr1 = read_struct(f, PVOSHeader1)
  if not short:
    print("Casio PVOS binary detected")
    print("Header1: (at 0x%X)" % hdr1_offset)
    ctypes_pprint(hdr1)
  else:
    print("[hdr1] %s: %s" % (hdr1.signature, hdr1.model))

  flash_size = hdr1.flash_nblks * hdr1.blocksize
  if flash_size > 0:
      analyze_file_trimming(f, flash_size)

  newFormat = hdr1.signature[:9] == b"CASIOPVOS"
  hdr1_2_offset = hdr1.blk2off(hdr1.hdr1_2_blk)
  f.seek(hdr1_2_offset)
  if newFormat:
    hdr1_2 = read_struct(f, PVOSHeader1)
    if not short:
      print("\nHeader1_2: (at 0x%X)" % hdr1_2_offset)
      ctypes_pprint(hdr1_2)
    else:
      print("[hdr1_2] %s: %s" % (hdr1_2.signature, hdr1_2.model))
    f.seek(hdr1.blk2off(hdr1.hdr2_blk))
  hdr2_offset = f.tell()
  hdr2 = read_struct(f, PVOSHeader2)
  if not short:
    print("\nHeader2: (at 0x%X)" % hdr2_offset)
    ctypes_pprint(hdr2)
  else:
    print("[hdr2] %s: %s; ver %02X.%02X" % (hdr2.signature, hdr2.datetime, hdr2.version >> 8, hdr2.version & 0xFF))
    return

  dir_offset = f.tell()
  print("\nFile directory: (at 0x%X)" % dir_offset)
  if newFormat:
    f.seek(hdr1.blk2off(hdr1.dir_blk+1))
    etype = PVDirEntryNew
    elen = ctypes.sizeof(etype())
    entries_cnt = hdr1.blk2off(hdr1.dir_nblks-1) // elen
  else:
    f.seek(hdr1.blk2off(hdr1.dir_blk))
    etype = PVDirEntry
    elen = ctypes.sizeof(etype())
    entries_cnt = hdr1.blk2off(hdr1.hdr2_blk) // elen
  dummy = b'\xFF'*elen
  dirs = {}
  files = []
  for i in range(entries_cnt-1):
    s = f.read(elen)
    if s == dummy:
      continue
    print()
    print(i)
    entry = read_struct(s, etype)
    ctypes_pprint(entry)
    if entry.dir_id != 0xFFFF:
      dirs[entry.dir_id] = entry
    else:
      files.append(entry)

  print("\nFile list:")
  for fe in files:
    if newFormat:
      name = fe.filename # bytes
    else:
      name = fe.filename.tostr() # str
    parent = fe.parent
    while parent != 0xFFFF:
      pe = dirs[parent]
      if newFormat:
        name = os.path.join(pe.filename, name) # bytes
      else:
        name = os.path.join(pe.filename.tostr(), name) # str
      parent = pe.parent
    
    if isinstance(name, bytes):
        name = name.decode('ascii', 'ignore').rstrip('\x00')

    off = hdr1.blk2off(fe.block_no)
    print(name, "%08X" % off, "%X" % fe.size)
    if extract and output_dir:
      output_path = os.path.join(output_dir, name)
      dirname = os.path.dirname(output_path)
      if not os.path.exists(dirname):
        os.makedirs(dirname)
      f.seek(off)
      open(output_path, "wb").write(f.read(fe.size))

def dump_dics(f, extract = True, short = False, output_dir=None, metadata_dir=None):

  r_offset = f.tell()
  hdr_r = read_struct(f, CasioDicHeaderR)

  if not short:
    print("Casio DICS binary detected")
    print("Header R: (at 0x%X)" % r_offset)
    ctypes_pprint(hdr_r)
    print()
  else:
    print("%s: %s" % (hdr_r.signature, hdr_r.model))
    if hdr_r.extmodel[0] != b'\xFF':
      print("Ext. model: %s" % hdr_r.extmodel)
  
  if hdr_r.flash_size > 0:
      analyze_file_trimming(f, hdr_r.flash_size)

  b_offset = hdr_r.b_offset
  f.seek(b_offset)
  hdr_b = read_struct(f, CasioDicHeaderB)

  if not short:
    print("Header B: (at 0x%X)" % b_offset)
    ctypes_pprint(hdr_b)
    print()
  else:
    print("%s: %s; %s; ver %02X.%02X" % (hdr_b.signature, hdr_b.datetime1, hdr_b.datetime2, hdr_b.version >> 8, hdr_b.version & 0xFF))
    s = str(hdr_b.extmodel)
    if s:
      print("Ext. model: %s" % hdr_b.extmodel)

  analyze_patch_status(f)

  a_offset = hdr_b.a_offset
  f.seek(a_offset)
  hdr_a = read_struct(f, CasioDicHeaderA)

  if not short:
    print("Header A: (at 0x%X)" % a_offset)
    ctypes_pprint(hdr_a)
    print()
  else:
    print("%s: %s; %s; ver %02X.%02X" % (hdr_a.signature, hdr_a.name, hdr_a.datetime, hdr_a.version >> 8, hdr_a.version & 0xFF))
    if hdr_a.extmodel[0] != b'\xFF':
      print("Ext. model: %s" % hdr_a.extmodel)
    return

  analyze_checksums(f, r_offset, hdr_r, hdr_b, hdr_a)

  farea_offset = hdr_b.farea_off
  f.seek(farea_offset)
  fhdr = read_struct(f, DicsFareaHeader)
  if fhdr.count != 0:
    print("Preload files: (at 0x%X)" % farea_offset)
    for i in range(fhdr.count):
      pf = read_struct(f, DicsPreloadFile)
      print("%s (%d)" % (pf.filename, pf.idx))
    etype = DicsDirEntry
  else:
    etype = DicsDirEntryOld
    f.seek(farea_offset)
  
  dir_offset = f.tell()
  print("\nFile directory: (at 0x%X)" % dir_offset)
  fend = hdr_b.farea_off + hdr_b.farea_len
  dirs = {}
  files = []
  elen = ctypes.sizeof(etype())
  dummy = b'\xFF'*elen
  entries_cnt = (hdr_b.farea_len - (fhdr.count*32 + 16)) // elen
  for i in range(entries_cnt):
    s = f.read(elen)
    if s == dummy:
      continue
    print()
    print(i)
    i = i + 1
    entry = read_struct(s, etype)
    ctypes_pprint(entry)
    if entry.dir_id != 0xFFFF:
      dirs[entry.dir_id] = entry
    else:
      files.append(entry)

  if metadata_dir:
      f.seek(0, 2)
      filesize = f.tell()
      tail_offset = get_tail(f, r_offset, hdr_r)
      
      manifest_lines = []

      def write_block(start, end, filename):
          nonlocal f, metadata_dir, manifest_lines
          if start >= end:
              return
          
          f.seek(start)
          data = f.read(end - start)
          if not data:
              return

          with open(os.path.join(metadata_dir, filename), "wb") as bf:
              bf.write(data)
          manifest_lines.append(f"{filename}: start=0x{start:X}, end=0x{end:X}, size=0x{len(data):X}")

      hdr_r_size = ctypes.sizeof(hdr_r)
      hdr_b_size = ctypes.sizeof(hdr_b)
      hdr_a_size = ctypes.sizeof(hdr_a)

      blocks = [
          ('block_01_boot.bin', 0, r_offset),
          ('block_02_header_R.bin', r_offset, r_offset + hdr_r_size),
          ('block_03_after_R_before_B.bin', r_offset + hdr_r_size, b_offset),
          ('block_04_header_B.bin', b_offset, b_offset + hdr_b_size),
          ('block_05_after_B_before_A.bin', b_offset + hdr_b_size, a_offset),
          ('block_06_header_A.bin', a_offset, a_offset + hdr_a_size),
          ('block_07_after_A_before_farea.bin', a_offset + hdr_a_size, farea_offset),
          ('block_08_farea.bin', farea_offset, farea_offset + hdr_b.farea_len),
          ('block_09_content_data.bin', farea_offset + hdr_b.farea_len, tail_offset),
          ('block_10_final_tail.bin', tail_offset, filesize)
      ]

      for name, start, end in blocks:
          write_block(start, end, name)

      with open(os.path.join(metadata_dir, "manifest.txt"), "w") as mf:
          mf.write("\n".join(manifest_lines))

  print("File list:")
  for fe in files:
    name = fe.name # bytes
    parent = fe.parent
    while parent != 0xFFFF:
      pe = dirs[parent]
      name = os.path.join(pe.name, name) # bytes
      parent = pe.parent

    name_str = name.decode('ascii', 'ignore').rstrip('\x00')
    
    print(name_str, "%08X" % fe.offset, "%X" % fe.size)
    if extract and output_dir:
      output_path = os.path.join(output_dir, name_str)
      dirname = os.path.dirname(output_path)
      if not os.path.exists(dirname):
        os.makedirs(dirname)
      f.seek(fe.offset)
      open(output_path, "wb").write(f.read(fe.size))

# class for transparent decryption of newer Casio updates
class EncWrapper:
  def __init__(self, f):
    keyfname = os.path.join(os.path.dirname(os.path.abspath(__file__)), "enckey.bin")
    self.keydata = array.array('B', open(keyfname, "rb").read())
    self.file = f
  def seek(self, off, whence = 0):
    return self.file.seek(off, whence)
  def tell(self):
    return self.file.tell()
  def read(self, size):
    curoff = self.file.tell()
    arrdata = array.array('B', self.file.read(size))
    for i in range(size):
      arrdata[i] ^= self.keydata[curoff&0x3FF]
      curoff += 1
    return arrdata.tobytes()

def get_dics(f):
  encw = EncWrapper(f)
  for offs in [0xFF80, 0x1FF80]:
    f.seek(offs)
    sig = f.read(9)
    if sig == b"CASIODICS":
      f.seek(-9, 1)
      return f
    encw.seek(offs)
    sig = encw.read(9)
    if sig == b"CASIODICS":
      encw.seek(-9, 1)
      return encw
  return None

parser = OptionParser(usage="usage: %prog [options] filename")
parser.add_option('-e', '--extract', dest="extract", action='store_true', help='extract the bundled files', default=False)
parser.add_option('-s', '--short',   dest="short",   action='store_true', help='print short info (model ID)', default=False)
# parser.add_option('filename', help='ROM file to dump')

(options, args) = parser.parse_args()
if len(args) != 1:
  parser.print_help()
  sys.exit(1)

input_filename = args[0]
print("Parsing file %s" % input_filename)

output_dir = None
metadata_dir = None
if options.extract:
    base_filename = os.path.basename(input_filename)
    base_filename_no_ext = os.path.splitext(base_filename)[0]
    
    base_output_dir = os.path.join("tmp-output", base_filename_no_ext)
    
    content_dir = os.path.join(base_output_dir, "content")
    if not os.path.exists(content_dir):
        os.makedirs(content_dir)
    print("Extracting files to: %s" % content_dir)

    metadata_dir = os.path.join(base_output_dir, "metadata")
    if not os.path.exists(metadata_dir):
        os.makedirs(metadata_dir)
    print("Extracting metadata to: %s" % metadata_dir)
    
    output_dir = content_dir

f = open(input_filename, "rb")
sig = f.read(9)
if sig == b"CASIOPVOS":
  f.seek(0)
  dump_pvos(f, options.extract, options.short, output_dir)
else:
  f2 = get_dics(f)
  if f2 != None:
    r_offset = f2.tell()
    check_offset = 0
    check_len = 0

    if r_offset == 0x1FF80: # C series (dp5)
        check_offset = 0x1F000
        check_len = r_offset - check_offset
    elif r_offset == 0xFF80: # L series (dp4)
        check_offset = 0xfb90
        check_len = r_offset - check_offset

    if check_len > 0:
        current_pos = f.tell() # Save current position
        f.seek(check_offset)
        pre_header_data = f.read(check_len)
        f.seek(current_pos) # Restore position
        
        is_empty = all(b == 0xFF for b in pre_header_data)
        
        if is_empty:
            print("Pre-header data at 0x%X (length 0x%X) is empty (0xFF filled)." % (check_offset, check_len))
        else:
            print("Pre-header data at 0x%X (length 0x%X) contains data." % (check_offset, check_len))
    dump_dics(f2, options.extract, options.short, output_dir, metadata_dir)
  else:
    print("not a Casio update?")
