Skip to content

Instantly share code, notes, and snippets.

@JodiTheTigger
Created January 14, 2014 01:52
Show Gist options
  • Select an option

  • Save JodiTheTigger/8411686 to your computer and use it in GitHub Desktop.

Select an option

Save JodiTheTigger/8411686 to your computer and use it in GitHub Desktop.

Revisions

  1. JodiTheTigger created this gist Jan 14, 2014.
    219 changes: 219 additions & 0 deletions gdbBacktraceToJson.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,219 @@
    #!/usr/bin/python2
    #
    # gdbBacktraceToJson.py. Parses gdb backtraces into json.
    # Copyright (C) 2014 Richard Maxwell <jodi.the.tigger@gmail.com>
    #
    # This program is free software: you can redistribute it and/or modify
    # it under the terms of the GNU General Public License as published by
    # the Free Software Foundation, either version 3 of the License, or
    # (at your option) any later version.
    #
    # This program is distributed in the hope that it will be useful,
    # but WITHOUT ANY WARRANTY; without even the implied warranty of
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    # GNU General Public License for more details.
    #
    # You should have received a copy of the GNU General Public License
    # along with this program. If not, see <http://www.gnu.org/licenses/>
    #
    # Description:
    # gdbBacktraceToJson.py parses the output of the command "thread apply all bt full" and turns it into a json array.
    # Useful for automating the analysis of coredump files generated when an application crashes. Use the tokenised json
    # to seach a database of crashes for similar crashes or make a nice web interface for viewing back traces. You could
    # make a backtrace diff tool. It's much easier to use and write tools using a standard data format.

    # Usage:
    # python2 gdbBacktraceToJson.py <backtrace file>
    # It will parse the file and output the backtrace as a json array to std out.
    # you can get the backtrace file from a core dump file by running gdb in the following way:
    # gdb [app with debug symbols] [core file] --eval-command "thread apply all bt full" --eval-command "quit" > mybacktrace.txt

    import sys
    import os
    import re
    import json
    import string
    import datetime

    def parseLocals(lines):
    result = {}

    index = 0
    while index < len(lines):
    simpleVars = re.match( r'\s*(.*)\s=\s(.*)\s*', lines[index], re.I|re.M)

    if lines[index].find('{') == -1:

    if simpleVars:
    result[simpleVars.group(1)] = simpleVars.group(2).strip().strip(',')

    else:
    # find the closing brace.
    closingIndex = index + 1
    closingIndexFound = -1
    depth = 1
    while closingIndex < len(lines):
    closingIndexFound = closingIndex

    if lines[closingIndex].find('{') != -1:
    depth = depth + 1
    else:
    if lines[closingIndex].find('}') != -1:
    depth = depth - 1

    if depth < 1:
    closingIndexFound = closingIndex
    break

    closingIndex = closingIndex + 1

    if closingIndexFound == -1:
    # wtf?
    print "*ERROR* Coreline: parseLocals: Can't find closing brace."
    return result

    # deal with nested braces using recursion.
    joinedLines = '\n'.join(lines[index+1:closingIndex])

    if simpleVars:
    result[simpleVars.group(1)] = parseLocals(lines[index+1:closingIndex])
    index = closingIndex
    else:
    return result

    index = index + 1

    return result

    def coreLinesToObject(coreLine):
    coreObject = {}

    # line format is:
    # #frame [0x12345678] in (<function>) [from|at] [library|file]
    # (?:....) means don't capture that group (?:)
    matchResult = re.match( r'\#(\d+)\s+(?:(0x(?:[0-9A-F])*) in |)(\S+) (\((?:.|\n|\r)*\))(?: (?:at|from) (.*)|$)', coreLine, re.I|re.M)

    if matchResult:
    # matches are:
    # 1: frame
    # 2: address or no match
    # 3: function name
    # 4: argument list (including braces)
    # 5: source / library
    coreObject['frame'] = matchResult.group(1)
    coreObject['address'] = matchResult.group(2)
    coreObject['function'] = matchResult.group(3)
    coreObject['source'] = matchResult.group(5)
    coreObject['arguments'] = {}

    # right, parse in the argument list
    # arguments can have the @ symbol in them 'this@entry=0x12345678'
    argSearch = re.findall( r'([\w@]+)=(\w+|<optimized out>)', matchResult.group(4), re.I|re.M)
    for (argKey, argValue) in argSearch:
    coreObject['arguments'][argKey] = argValue

    # bt full stuff will come here. Stack variables and source files too.
    arguments = coreLine.split('\n')[1:]
    if len(arguments) > 1:
    if coreObject['source'] == None:
    sourceMatch = re.match( r'\s+(?:at|from) (.*)\w', arguments[0], re.I|re.M)

    if sourceMatch:
    coreObject['source'] = sourceMatch.group(1)

    # parse the arguments.
    coreObject['locals'] = parseLocals(arguments[1:])

    else:
    # really should complain.
    print "*ERROR* Coreline mismatch: ", coreLine

    return coreObject

    def textToList(filePath, fileText):
    core = {}
    core['filePath'] = filePath
    core['fileName'] = os.path.splitext(os.path.basename(filePath))[0]
    core['threads'] = []
    core['jsonCreationTimeUtc'] = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")

    threadIndex = -1;
    threadId = ""
    multipleLines = ""

    for line in fileText:
    # Search for core dump global meta
    # (command line and termination reason)
    # Core was generated by `.....'.
    # Program terminated with ...
    # ---------------------------------------
    if not core.has_key('commandLine'):
    if line.find("Core was generated by") == 0:
    # [23:-3] manually deduced so I can keep what's in quotes
    # If I did it properly I would use a regex.
    core['commandLine'] = line[23:-3]

    if not core.has_key('coreReason'):
    if line.find("Program terminated with") == 0:
    # [:-1] remove line ending
    core['coreReason'] = line[:-1]


    # Parse core dumps per thread.
    # ---------------------------------------
    if line.find("Thread")== 0:
    #right, make sure we purge the last line of the last stack trace please.
    if len(multipleLines) > 0:
    core['threads'][threadIndex]['stackTrace'].append(coreLinesToObject(multipleLines))
    multipleLines = "";

    threadIndex += 1

    threadId = line[:-2]
    core['threads'].append({})
    core['threads'][threadIndex]['stackTrace'] = []

    threadResult = re.match( r'Thread\s+(\d+)\s+\(LWP\s+(\d+)\)', threadId, re.I|re.M)
    if threadResult:
    core['threads'][threadIndex]['threadId'] = threadResult.group(2)
    core['threads'][threadIndex]['threadNumber'] = threadResult.group(1)
    else:
    print "*ERROR* ThreadId mismatch: ", threadId
    core['threads'][threadIndex]['threadId'] = threadId

    else:
    if threadIndex > -1:
    if len(line) > 0:
    if len(multipleLines) > 0:
    if line[0] == '#':
    core['threads'][threadIndex]['stackTrace'].append(coreLinesToObject(multipleLines))
    multipleLines = line;
    else:
    multipleLines += line;
    else:
    if (line[0] == '#'):
    multipleLines = line;
    else:
    if len(multipleLines) > 0:
    core['threads'][threadIndex]['stackTrace'].append(coreLinesToObject(multipleLines))
    multipleLines = "";

    return core


    # the filename is the name of the textual output of gdb's "thread apply all bt"
    def process(argList):
    fileName = argList[1]

    coreDump = open(fileName, 'r')
    lines = coreDump.readlines()
    coreDump.close()

    coreDumpObject = textToList(fileName, lines)

    # right, dump the json
    print json.dumps(coreDumpObject, sort_keys=True, indent=4)

    # decode the first passed filename
    process(sys.argv)