#!/usr/bin/env python
__description__ = 'Analyze OLE files (Compound Binary Files)'
__author__ = 'Didier Stevens'
__version__ = '0.0.56'
__date__ = '2020/12/04'
"""
Source code put in public domain by Didier Stevens, no Copyright
https://DidierStevens.com
Use at your own risk
# http://www.wordarticles.com/Articles/Formats/StreamCompression.php
History:
2014/08/21: start
2014/08/22: added ZIP support
2014/08/23: added stdin support
2014/08/25: added options extract and info
2014/08/26: bugfix pipe
2014/09/01: added * as selection option
2014/09/15: exception handling for import OleFileIO_PL
2014/11/12: added plugins
2014/11/15: continued plugins
2014/11/21: added pluginoptions
2014/12/14: 0.0.3: Added YARA support; added decoders
2014/12/19: 0.0.4: fixed bug when file was not OLE
2014/12/24: 0.0.5: fixed storage bug and added MacrosContainsOnlyAttributes
2014/12/25: 0.0.6: added support for ZIP containers with OLE files, like .docx
2014/12/26: added printing of filename OLE files inside ZIP
2014/12/31: suppressed printing of filename when selecting
2015/02/09: 0.0.7: added handling of .docx, ... inside ZIP file; Added option yarastrings
2015/02/10: 0.0.8: added YARACompile
2015/02/19: 0.0.9: added option -q
2015/02/23: 0.0.10: handle errors in compressed macros
2015/02/24: continue
2015/03/02: 0.0.11: added option -M
2015/03/05: added support for .xml files
2015/03/11: 0.0.12: added code pages identification
2015/03/13: Fixed oElement.firstChild.nodeValue UnicodeEncodeError bug
2015/03/19: 0.0.13: added option -c
2015/03/24: added man page
2015/03/25: added option --decompress
2015/03/26: changed --raw option
2015/04/10: 0.0.14: fixed bug SearchAndDecompressSub
2015/05/08: 0.0.15: added direct support for ActiveMime files
2015/05/13: 0.0.16: changed HeuristicDecompress with findall; renamed MacrosContainsOnlyAttributes to MacrosContainsOnlyAttributesOrOptions
2015/06/08: 0.0.17: Fix HexAsciiDump
2015/06/14: Added exit code
2015/07/26: 0.0.18: Added option --vbadecompresscorrupt
2015/09/12: added option --cut
2015/09/13: changed exit code to 2 when macros detected
2015/09/16: Rename old OleFileIO_PL to new olefile so that local copy of the module can be used
2015/09/17: added help for pip install olefile
2015/09/22: fixed os.path.isfile(filename) bug
2015/10/30: 0.0.19 added option -E and environment variable OLEDUMP_EXTRA; added MD5 to option -i
2015/11/08: 0.0.20 added man text for option -E; changed OptionsEnvironmentVariables so option takes precedence over environment variable
2015/11/09: continued -E
2015/11/12: 0.0.21 added dslsimulationdb
2015/11/17: added support for :-number in --cut option
2015/12/16: 0.0.22 some enhancements for --raw option
2015/12/22: 0.0.23 updated cut syntax
2016/06/08: 0.0.24 option -v works with option -E
2016/08/01: 0.0.25 added Magic to info
2016/10/16: decompressed.replace('\r\n', '\n'); added plugindir and decoderdir options by Remi Pointel
2016/12/11: 0.0.26 added indicator O for OLE10Native
2017/03/04: 0.0.27 added externals for YARA rules
2017/07/20: 0.0.28 added # to option -y
2017/10/14: 0.0.29 added options -t, -S; and \x00Attribut bugfix provided by Charles Smutz
2017/11/01: 0.0.30 replaced hexdump and hexasciidump with cDump
2017/11/04: added return codes -1 and 1
2017/12/13: 0.0.31 corrected man
2017/12/16: 0.0.32 added indexQuiet to cPlugin
2018/02/18: 0.0.33 added option -j
2018/05/06: 0.0.34 -s is more userfriendly
2018/07/01: 0.0.35 rename option --json to --jsonoutput
2018/07/01: fix for json output with OOXML files
2018/07/07: 0.0.36: updated to version 2 of jsonoutput
2018/08/04: 0.0.37 added option --vbadecompressskipattributes
2018/08/13: 0.0.38 changed output processing of plugins like plugin_ppt: if a plugin returns a string, that string is dumped with option -q
2018/11/25: 0.0.39 started VBA/dir parsing for modules, to display with option -i
2018/11/26: continued VBA/dir parsing for modules; added c and s selection; added selection warning; added option -A and option -T; added yara #x#
2018/11/30: added yara #r#; updated ParseCutTerm
2018/12/18: 0.0.40 added option --password
2019/02/16: 0.0.41 updated Cut
2019/03/12: 0.0.42 added warning for ZIP container without ole file; fixed selectiong warning
2019/07/21: 0.0.43 added option --storages, %CLSID%, stream UNICODE name
2019/11/04: fixed plugin path when compiled with pyinstaller
2019/11/05: Python 3 support
2019/11/24: changed HeuristicDecompress; Python 3 fixes
2019/12/18: 0.0.44 added option -f
2020/01/06: 0.0.45 added verbose YARACompile
2020/03/06: 0.0.46 added %CLSIDDESC% and Root Entry to --storages
2020/03/08: 0.0.47 updated man
2020/03/09: 0.0.48 Python 3 bug fix
2020/03/28: 0.0.49 -s (selection) is no longer case sensitive with letter prefixes
2020/05/21: 0.0.50 fixed typos man page
2020/07/18: 0.0.51 small fix ASCII dump: 0x7F is not printable
2020/07/25: 0.0.52 added support for pyzipper
2020/08/??: 0.0.53 added ole plugin class
2020/08/28: added support to select streams by name
2020/08/30: fixed & updated raw VBA decompression
2020/09/05: 0.0.54 added extra info parameter %MODULEINFO%
2020/09/29: bugfix for Python 2 (mro)
2020/11/08: 0.0.55: added support for -v with --jsonoutput; added ! indicator
2020/12/04: 0.0.56 Python 3 Fixes
Todo:
add support for pyzipper
"""
import optparse
import sys
import math
import os
import binascii
import xml.dom.minidom
import zlib
import hashlib
import textwrap
import re
import string
import codecs
import json
import struct
if sys.version_info[0] >= 3:
from io import StringIO
else:
from cStringIO import StringIO
if sys.version_info[0] >= 3:
from io import BytesIO as DataIO
else:
from cStringIO import StringIO as DataIO
try:
import dslsimulationdb
except ImportError:
dslsimulationdb = None
try:
import yara
except ImportError:
pass
try:
import olefile
except ImportError:
print('This program requires module olefile.\nhttp://www.decalage.info/python/olefileio\n')
if sys.version >= '2.7.9':
print("You can use PIP to install olefile like this: pip install olefile\npip is located in Python's Scripts folder.\n")
exit(-1)
try:
from oletools.common.clsid import KNOWN_CLSIDS
except ImportError:
KNOWN_CLSIDS = {}
try:
import pyzipper as zipfile
except ImportError:
import zipfile
dumplinelength = 16
MALWARE_PASSWORD = 'infected'
OLEFILE_MAGIC = b'\xD0\xCF\x11\xE0'
ACTIVEMIME_MAGIC = b'ActiveMime'
REGEX_STANDARD = b'[\x09\x20-\x7E]'
def PrintManual():
manual = '''
Manual:
oledump is a tool to analyze OLE files (officially: Compound File Binary Format, CFBF). Many file formats are in fact OLE files, like Microsoft Office files, MSI files, ... Even the new Microsoft Office Open XML (OOXML) format uses OLE files for VBA macros.
oledump can analyze OLE files directly, or indirectly when they are contained in some file format (like .docm, .xml, ...).
oledump uses 2 modules that are not part of Python 2: olefile (http://www.decalage.info/python/olefileio) and YARA.
You need to install the olefile module for this program to work.
The YARA module is not mandatory if you don't use YARA rules.
Running oledump with a spreadsheet (.xls binary format) lists al the streams found in the OLE file (an OLE file is a virtual filesystem with folders and files, known as streams), like this:
C:\Demo>oledump.py Book1.xls
1: 4096 '\\x05DocumentSummaryInformation'
2: 4096 '\\x05SummaryInformation'
3: 4096 'Workbook'
The first column is an index assigned to the stream by oledump. This index is used to select streams. The second column is the size of the stream (number of bytes inside the stream), and the last column is the name of the stream.
To select a stream for analysis, use option -s with the index (number of the stream, or a for all