Analyzing a build system on Windows with the Process Monitor and Groovy

It’s often a challenge to understand how a big software stack is built. To help me in this task on a new project, I did the following:

  • Start recording the system events using the Process Monitor. This includes what processes are started, by which process and which file they access
  • Run the build
  • When the build is finished, export the results from the Process Monitor in XML
  • Parse the XML
  • Analyse the data and print the results

You will find below a Groovy script that does the parsing and the analysis. For a C/C++ program, the output could look like:

build.exe
  codegenerator.exe ...
  codegenerator.exe ...
  gmake.exe ...
    compiler.exe ...
    compiler.exe ...
    linker.exe
  gmake.exe ...
    compiler.exe ...
    compiler.exe ...
    compiler.exe ...
    compiler.exe ...
    linker.exe ...
  cp.exe ...

The complexer the project is, the more interesting the output is of course.

Here are some possible command lines:

groovy procmon-parsing.groovy --help
groovy procmon-parsing.groovy --xml Logfile.XML
groovy procmon-parsing.groovy --xml Logfile.XML -p ProcessIndex,ProcessName,CommandLine -r build.exe

And here is the code. It’s my first Groovy program, and kind of a quick hack.

import javax.xml.parsers.SAXParserFactory
import org.xml.sax.helpers.DefaultHandler
import org.xml.sax.*

class Process {
    def ProcessIndex;
    def ProcessId;
    def ParentProcessId;
    def ParentProcessIndex;
    def CreateTime;
    def FinishTime;
    def ProcessName;
    def ImagePath;
    def CommandLine;
}

class Event {
    def ProcessIndex;
    def Time_of_Day;
    def Process_Name;
    def PID;
    def Operation;
    def Path;
    def Result;
    def Detail;
}

class RootHandler extends DefaultHandler {
    XMLReader reader;
    def objectsByType = [:]

    RootHandler(XMLReader reader) {
        this.reader = reader;
    }

    void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
        if (name.equals("process")) {
            reader.setContentHandler(new ProcessHandler(reader, this, name, new Process()));
        }
        else if (name.equals("event")) {
            reader.setContentHandler(new ProcessHandler(reader, this, name, new Event()));
        }
    }
}

class ProcessHandler extends DefaultHandler {
    XMLReader reader;
    RootHandler parent;
    Object object;
    StringBuilder content;
    String elementName

    ProcessHandler(XMLReader reader, RootHandler parent, String elementName, Object object) {
        this.reader = reader;
        this.parent = parent;
        this.content = new StringBuilder();
        this.elementName = elementName
        this.object = object;
        if ( ! parent.objectsByType[elementName] )
            parent.objectsByType[elementName] = []
    }

    void characters(char[] ch, int start, int length) throws SAXException {
        content.append(ch, start, length);
    }

    void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
        content.setLength(0);
    }

    void endElement(String uri, String localName, String elementName) throws SAXException {
        if (elementName.equals(this.elementName)) {
            parent.objectsByType[elementName].add(this.object)
            // Switch handler back to our parent
            reader.setContentHandler(parent);
        }
        else if ( Process.metaClass.getMetaProperty(elementName) ) {
            def value = content.toString()
            try {
                // Convert value to integer if possible
                value = value.toBigInteger()
            }
            catch(Exception e) {
            }
            this.object.setProperty(elementName, value);
        }
    }
}

def scriptName = new File(getClass().protectionDomain.codeSource.location.path).name

def cli = new CliBuilder(
   usage: "$scriptName -x  [options]",
   header: '\nAvailable options (use -h for help):\n')
import org.apache.commons.cli.Option

cli.with
{
   h(longOpt: 'help', 'Help', args: 0, required: false)
   p(longOpt: 'property', 'Properties to print out, comma-separated', args: Option.UNLIMITED_VALUES, valueSeparator: ',')
   x(longOpt: 'xml', 'XML file from ProcessMonitor', args: 1, required: true)
   r(longOpt: 'rootproc', 'Name of the process to start at', args: 1, required: false)
}
def options = cli.parse(args)
if (!options) return
if (options.h) {
    cli.usage()
    System.exit(0)
}

propertiesToPrint = (options.ps && options.ps.size() > 0 ? options.ps : ["ProcessName"])
rootProcessName = (options.r ? options.r : null)

def reader = SAXParserFactory.newInstance().newSAXParser().XMLReader
def handler = new RootHandler(reader)
reader.setContentHandler(handler)

InputStream inputStream = new FileInputStream(new File(options.x));
InputSource inputSource = new InputSource(new InputStreamReader(inputStream));

reader.parse(new InputSource(inputStream))

processesByIndex = [:]
handler.objectsByType['process'].each{
     processesByIndex[it.ProcessIndex] = it
}

def rootProcesses = []
processesByIndex.each {
    index = it.key
    process = it.value
    isRoot = false
    if ( rootProcessName ) {
        if ( process.ProcessName.equals(rootProcessName) ) {
            isRoot = true
        }
    }
    else if ( ! processesByIndex[process.ParentProcessIndex] )
        isRoot = true
    if ( isRoot )
        rootProcesses.add(process.ProcessIndex)
}

def printProcessLine(process, depth) {
    prefix = "  ".multiply(depth)
    fields = propertiesToPrint.collect { process.getProperty(it) }
    fieldsStr = fields.join("\t")
    println prefix + fieldsStr
}

def printProcessTreeRecursively(index, depth) {
    process = processesByIndex[index]
    printProcessLine(process, depth )
    depth++
    processesByIndex.each {
        if ( it.value.ParentProcessIndex == index )
            printProcessTreeRecursively(it.value.ProcessIndex, depth)
    }
}

rootProcesses.each {
    printProcessTreeRecursively(it, 0)
}

Leave a comment

This site uses Akismet to reduce spam. Learn how your comment data is processed.