#!/usr/bin/env python
"""
 * dump-child.py:
 *
 * Copyright (C) 2006 Danny Milosavljevic, Fabasoft (danny.milosavljevic@fabalabs.org)
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 or later of the GNU General Public
 * License as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
"""

# B BYTE
# H WORD
# I DWORD

"""typedef struct tagGUID
{
  unsigned long Data1;
  unsigned short Data2;
  unsigned short Data3;
  unsigned char Data4[8];
} GUID;
IHHBBBBBBBB
"""

class VT:
  VT_EMPTY           = 0
  VT_NULL            = 1
  VT_I2              = 2
  VT_I4              = 3 
  VT_R4              = 4 
  VT_R8              = 5 
  VT_CY              = 6 
  VT_DATE            = 7 
  VT_BSTR            = 8 
  VT_DISPATCH        = 9 
  VT_ERROR           = 10 
  VT_BOOL            = 11 
  VT_VARIANT         = 12 
  VT_UNKNOWN         = 13 
  VT_DECIMAL         = 14 
  VT_I1              = 16 
  VT_UI1             = 17 
  VT_UI2             = 18 
  VT_UI4             = 19 
  VT_I8              = 20 
  VT_UI8             = 21 
  VT_INT             = 22 
  VT_UINT            = 23 
  VT_VOID            = 24 
  VT_HRESULT         = 25 
  VT_PTR             = 26 
  VT_SAFEARRAY       = 27 
  VT_CARRAY          = 28 
  VT_USERDEFINED     = 29 
  VT_LPSTR           = 30 
  VT_LPWSTR          = 31 
  VT_FILETIME        = 64 
  VT_BLOB            = 65 
  VT_STREAM          = 66 
  VT_STORAGE         = 67 
  VT_STREAMED_OBJECT = 68 
  VT_STORED_OBJECT   = 69 
  VT_BLOB_OBJECT     = 70 
  VT_CF              = 71 
  VT_CLSID           = 72 
  VT_VECTOR          = 0x1000 # "bit-ored" with others

  def from_string(klass, name):
    return getattr(klass, name)

  def to_string(klass, value):
    for x_name in dir(klass):
       x_value = getattr(klass, x_name)
       if x_value == value:
         return x_name

    return str(value)

  from_string = classmethod(from_string)
  to_string = classmethod(to_string)

def guess_property_size(property_raw):
  fixed_sizes = {
   VT.VT_EMPTY: 0,
   VT.VT_NULL: 0,
   VT.VT_I2             :2, 
   VT.VT_I4             :4, 
   VT.VT_R4             :4, 
   VT.VT_R8             :8, 
   VT.VT_CY             :8,  # 	8-byte two's complement integer (scaled by 10,000).
   VT.VT_DATE           :8, # A 64-bit floating-point number representing the number of days (not seconds) since December 31, 1899. (That is, January 1, 1900, is 2.0, January 2, 1900, is 3.0, and so on.) -> VT_R8. 
   #VT.VT_BSTR           :, 
   #VT.VT_DISPATCH       :, 
   #VT.VT_ERROR          :, 
   VT.VT_BOOL           :4,  # 0; -1
   #VT.VT_VARIANT        :, 
   #VT.VT_UNKNOWN        :, 
   #VT.VT_DECIMAL        :, 
   VT.VT_I1             :1, 
   VT.VT_UI1            :1, 
   VT.VT_UI2            :2, 
   VT.VT_UI4            :4, 
   VT.VT_I8             :8, 
   VT.VT_UI8            :8, 
   VT.VT_INT            :4, 
   VT.VT_UINT           :4, 
   VT.VT_VOID           :0, 
   VT.VT_HRESULT        :4, 
   #VT.VT_PTR            :, 
   #VT.VT_SAFEARRAY      :, 
   #VT.VT_CARRAY         :, 
   #VT.VT_USERDEFINED    :, 
   #VT.VT_LPSTR          :, 
   #VT.VT_LPWSTR         :, 
   VT.VT_FILETIME       :8,
   #VT.VT_BLOB           :, 
   #VT.VT_STREAM         :, 
   #VT.VT_STORAGE        :, 
   #VT.VT_STREAMED_OBJECT:, 
   #VT.VT_STORED_OBJECT  :, 
   #VT.VT_BLOB_OBJECT    :, 
   #VT.VT_CF             :,  # clipboard: DWORD(size), ...
   VT.VT_CLSID          : 16, 
   # VT.VT_VECTOR:
  }

  property_type, = struct.unpack("I", property_raw[:4])
  if property_type in fixed_sizes:
    return fixed_sizes[property_type]

  if property_type == VT.VT_BSTR:
    property_value_size, = struct.unpack("I", property_raw[4:8])

    return property_value_size + 4

  if property_type == VT.VT_LPSTR:
    property_value_size, = struct.unpack("I", property_raw[4:8])
    return property_value_size + 4

  if property_type == VT.VT_LPWSTR:
    property_value_size, = struct.unpack("I", property_raw[4:8])
    return property_value_size * 2  + 4


  if property_type == VT.VT_BLOB:
    property_value_size, = struct.unpack("I", property_raw[4:8])

    return property_value_size + 4

  if property_type == VT.VT_VARIANT:
    return None

  if property_type == VT.VT_CF:
    property_value_size, = struct.unpack("I", property_raw[4:8])

    # this wrecks havoc in diff, so disable for now.

    return 4

    return property_value_size + 4

  if property_type == VT.VT_VECTOR | VT.VT_LPSTR:
    property_num_elements, = struct.unpack("I", property_raw[4:8])

    print "#elements", property_num_elements,

    offset = 8
    for i in range(property_num_elements):
      string_byte_count, = struct.unpack("I", property_raw[offset:offset+4])

      offset = offset + 4

      stringout("string #%d: " % i, property_raw[offset:offset+string_byte_count])

      offset = offset + string_byte_count

  if property_type == VT.VT_VECTOR | VT.VT_VARIANT:
    property_num_elements, = struct.unpack("I", property_raw[4:8])
    offset = 8
    for i in range(property_num_elements):
      variant_type, = struct.unpack("I", property_raw[offset:offset+4])
      item_size = guess_property_size(property_raw[offset:])
      if item_size == None:
        hexout("item #%d (%s) (partial): " % (i, VT.to_string(variant_type)), property_raw[offset:])
        print "breaking..."
        break
      else:
        hexout("item #%d (%s): " % (i, VT.to_string(variant_type)), property_raw[offset:offset + item_size + 4])
        #variant_type, = struct.unpack("I", property_raw[offset:offset+4])
      
      offset = offset + item_size + 4

    return 4 #offset
  
  return None

import struct
import sys

data = file(sys.argv[1], "rb").read()

#print struct.calcsize("HHIIHHBBBBBBBBI")

# BOM, Format, OSVersion, GUID, Reserved

bom, format, osversion, guid_data1, guid_data2, guid_data3, guid_data4_1, guid_data4_2, guid_data4_3, guid_data4_4, guid_data4_5, guid_data4_6, guid_data4_7, guid_data4_8, cnt_sections = struct.unpack("HHIIHHBBBBBBBBI", data[:28])
assert(bom == 0xFFFE)
assert(format == 0)
osversion_h = int(osversion / 65536)
osversion_l = int(osversion & 65535)
#print osversion
#print osversion_h

assert(osversion_h == 0 or osversion_h == 1 or osversion_h == 2) # Win16, mac, Win32

section_position_by_id = {}

print "#sections", cnt_sections

for i in range(cnt_sections):
  start = 28 + 20 * i
  fmt_id_1, fmt_id_2, fmt_id_3, fmt_id_4_1, fmt_id_4_2, fmt_id_4_3, fmt_id_4_4, fmt_id_4_5, fmt_id_4_6, fmt_id_4_7, fmt_id_4_8, fmt_offset = struct.unpack("IHHBBBBBBBBI", data[start:start + 20])

  #print "U", fmt_id_1, fmt_id_2, fmt_id_3, fmt_id_4_1, fmt_id_4_2, fmt_id_4_3, fmt_id_4_4, fmt_id_4_5, fmt_id_4_6, fmt_id_4_7, fmt_id_4_8, 

  fmt_id_4 = fmt_id_4_1 * 256 + fmt_id_4_2
  fmt_id_5 = 0
  for fmt_id_4_part in fmt_id_4_3, fmt_id_4_4, fmt_id_4_5, fmt_id_4_6, fmt_id_4_7, fmt_id_4_8:
    fmt_id_5 = fmt_id_5 * 256 + fmt_id_4_part 

  # chr(fmt_id_1) + chr(fmt_id_2) + chr(fmt_id_3) + chr(fmt_id_4) + chr(fmt_id_5) + chr(fmt_id_6) + chr(fmt_id_7) + chr(fmt_id_8) + chr(fmt_id_9) + chr(fmt_id_10) + chr(fmt_id_11) + chr(fmt_id_12) + chr(fmt_id_13) + chr(fmt_id_14) + chr(fmt_id_15) + chr(fmt_id_16)

  fmt_id = "{%08X-%04X-%04X-%04X-%012X}" % (fmt_id_1, fmt_id_2, fmt_id_3, fmt_id_4, fmt_id_5)
  # fmt_offset

  print fmt_id
  section_position_by_id[fmt_id] = fmt_offset

#print "format and offset", fmt_id_1, fmt_id_2, fmt_id_3, fmt_id_4, fmt_offset


def hexout(header, data):
  i = -100
  if len(data) < 30 or not sys.stdout.isatty():
    print header, ":",
  else:
    i = 0

    print header
    print "====="
    print "  ",


  for c in data:
    print "%02X" % ord(c), 
    i = i + 1
    if i == 10 and sys.stdout.isatty():
      i = 0
      print
      print "  ",

  s = ""
  for c in data:
    if ord(c) < 32 or ord(c) > 127:
      s = s + "."
    else:
      s = s + c

  print " str: ", s

  print

  if len(data) >= 30 and sys.stdout.isatty():
    print "===== end %s" % header

def stringout(header, data):
  print header, ":", 
  for c in data:
    c_ord = ord(c)
    if c_ord >= 32 and c_ord < 128:
      sys.stdout.write(c)
    else:
      sys.stdout.write(".") # % c_ord,

  print


for fmt_id, fmt_offset in section_position_by_id.items():
  print "SECTION", fmt_id # , fmt_offset

  section = data[fmt_offset:]

  section_size, = struct.unpack("I", section[:4])
  assert(section_size >= 4)
  num_properties, = struct.unpack("I", section[4:8])

  section_raw = section[:section_size]

  section_2 = section[section_size:]
  #file("section2", "wb").write(section_2)

  codepage = None
  
  property_id_sorted = []
  properties = {}
  property_descriptions = {}
  
  print "#properties %d" % num_properties
  #hexout("SECTION", section_raw)
  #num_properties = 9
  
  for i in range(num_properties):
    property_id, property_offset = struct.unpack("II", section_raw[(i + 1) * 8: (i + 1) * 8 + 8]) 
    #print "property %d offset %d" % (property_id, property_offset)
    assert(property_offset >= 16)
    # offset: from the start of the section to that
  
    property_id_sorted.append(property_id)
    properties[property_id] = property_offset
  
  property_id_sorted.sort()
 
  for property_id in property_id_sorted:
    property_offset = properties[property_id]

    #section_raw = section_raw[8:]

    property_raw = section_raw[property_offset:]

    #hexout("property_raw of %d (partial)" % property_id, property_raw[:50])

    property_type, = struct.unpack("I", property_raw[:4])

    if property_id == 1: # code page indicator
      assert(property_type == VT.VT_I2)

      codepage, = struct.unpack("H", property_raw[4:6])
      print "code page indicator", codepage
 
  for property_id in property_id_sorted:
    property_offset = properties[property_id] 
  
    #section_raw = section_raw[8:]
  
    property_raw = section_raw[property_offset:]
  
    property_type, = struct.unpack("I", property_raw[:4])
    #sys.stdout.write("property %s (type %s), " % (property_id, VT.to_string(property_type)))

    if property_id in property_descriptions:
      name = property_descriptions[property_id]
    else:
      name = "%d" % property_id

    sys.stdout.write("property %s (type %s), " % (name, VT.to_string(property_type)))
  
    #print "property_type", property_type
  
    if property_id == 1: # code page indicator 
      print "(cached)", codepage
      pass # already done
  
    elif property_id == 0: # property displaystring dictionary
      # untested
      # assert(False)
      is_double = codepage == None or codepage == 1200 or codepage == 1202

      print "is_double", is_double
      #hexout("descr", property_raw[4:100])
 
      cnt_descriptions, = struct.unpack("I", property_raw[0:4]) # misuses type field for len
      print "#descriptions", cnt_descriptions
 
      start_offset = 0
      property_c = property_raw[4:]

      for j in range(cnt_descriptions): 
        description_property_id, description_byte_count = struct.unpack("II", property_c[start_offset:start_offset+8])

        if is_double:
          description_byte_count = description_byte_count * 2

        property_descriptions[description_property_id] = property_c[start_offset+8:start_offset+8+description_byte_count]
        #print "description[%d:%d]" % (start_offset, start_offset + description_byte_count), is_double, description_property_id, description_byte_count, property_c[start_offset+8:start_offset+8+description_byte_count]
	print "description[%d]=" % (description_property_id), description_byte_count, property_c[start_offset+8:start_offset+8+description_byte_count]
        start_offset = start_offset + 8 + description_byte_count

        if is_double:
          # DWORD-align

          if (start_offset % 4) != 0: 
            start_offset = start_offset + 4 - (start_offset % 4)
          pass
     
  
    else:
      property_size = guess_property_size(property_raw)
  
      if property_size == None:
        hexout("property content (partial)", property_raw[4:100])
      else:
        hexout("property content (complete)", property_raw[4:4 + property_size])

