JodiTheTigger · January 14, 2014 01:52 · Jan 14, 2014
diff --git a/gdbBacktraceToJson.py b/gdbBacktraceToJson.py
@@ -0,0 +1,219 @@
+#!/usr/bin/python2
+#
+# gdbBacktraceToJson.py.  Parses gdb backtraces into json.
+# Copyright (C) 2014 Richard Maxwell <jodi.the.tigger@gmail.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>
+#
+# Description:
+# gdbBacktraceToJson.py parses the output of the command "thread apply all bt full" and turns it into a json array.
+# Useful for automating the analysis of coredump files generated when an application crashes. Use the tokenised json
+# to seach a database of crashes for similar crashes or make a nice web interface for viewing back traces. You could
+# make a backtrace diff tool. It's much easier to use and write tools using a standard data format.
+
+# Usage:
+# python2 gdbBacktraceToJson.py <backtrace file>
+# It will parse the file and output the backtrace as a json array to std out.
+# you can get the backtrace file from a core dump file by running gdb in the following way:
+# gdb [app with debug symbols] [core file] --eval-command "thread apply all bt full" --eval-command "quit" > mybacktrace.txt
+
+import sys
+import os
+import re
+import json
+import string
+import datetime
+
+def parseLocals(lines):
+    result = {}
+
+    index = 0
+    while index < len(lines):                        
+        simpleVars = re.match( r'\s*(.*)\s=\s(.*)\s*', lines[index], re.I|re.M)
+
+        if lines[index].find('{') == -1:
+
+            if simpleVars:
+                result[simpleVars.group(1)] = simpleVars.group(2).strip().strip(',')
+
+        else:            
+            # find the closing brace.
+            closingIndex = index + 1
+            closingIndexFound = -1
+            depth = 1
+            while closingIndex < len(lines):
+                closingIndexFound = closingIndex
+
+                if lines[closingIndex].find('{') != -1:
+                    depth = depth + 1
+                else:                     
+                    if lines[closingIndex].find('}') != -1: 
+                        depth = depth - 1
+
+                        if depth < 1:
+                            closingIndexFound = closingIndex
+                            break
+
+                closingIndex = closingIndex + 1
+
+            if closingIndexFound == -1:
+                # wtf?
+                print "*ERROR* Coreline: parseLocals: Can't find closing brace."
+                return result
+
+            # deal with nested braces using recursion.                 
+            joinedLines = '\n'.join(lines[index+1:closingIndex])
+
+            if simpleVars:
+                result[simpleVars.group(1)] = parseLocals(lines[index+1:closingIndex])
+                index = closingIndex
+            else:
+                return result      
+
+        index = index + 1
+
+    return result
+
+def coreLinesToObject(coreLine):
+    coreObject = {}
+
+    # line format is:
+    # #frame [0x12345678] in (<function>) [from|at] [library|file]
+    # (?:....) means don't capture that group (?:)
+    matchResult = re.match( r'\#(\d+)\s+(?:(0x(?:[0-9A-F])*) in |)(\S+) (\((?:.|\n|\r)*\))(?: (?:at|from) (.*)|$)', coreLine, re.I|re.M)
+
+    if matchResult: 
+        # matches are:
+        # 1: frame
+        # 2: address or no match
+        # 3: function name
+        # 4: argument list (including braces)
+        # 5: source / library
+        coreObject['frame'] = matchResult.group(1)
+        coreObject['address'] = matchResult.group(2)
+        coreObject['function'] = matchResult.group(3)
+        coreObject['source'] = matchResult.group(5)
+        coreObject['arguments'] = {}
+
+        # right, parse in the argument list
+        # arguments can have the @ symbol in them 'this@entry=0x12345678'
+        argSearch = re.findall( r'([\w@]+)=(\w+|<optimized out>)', matchResult.group(4), re.I|re.M)
+        for (argKey, argValue) in argSearch:
+            coreObject['arguments'][argKey] = argValue
+
+        # bt full stuff will come here. Stack variables and source files too.
+        arguments = coreLine.split('\n')[1:] 
+        if len(arguments) > 1:
+            if coreObject['source'] == None:
+                sourceMatch = re.match( r'\s+(?:at|from) (.*)\w', arguments[0], re.I|re.M)
+
+                if sourceMatch:
+                    coreObject['source'] = sourceMatch.group(1)     
+
+            # parse the arguments.
+            coreObject['locals'] = parseLocals(arguments[1:])  
+
+    else:
+        # really should complain.
+        print "*ERROR* Coreline mismatch: ", coreLine 
+
+    return coreObject
+
+def textToList(filePath, fileText):
+    core = {}
+    core['filePath'] = filePath
+    core['fileName'] = os.path.splitext(os.path.basename(filePath))[0]
+    core['threads'] = []
+    core['jsonCreationTimeUtc'] = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
+
+    threadIndex = -1;
+    threadId = ""
+    multipleLines = ""
+
+    for line in fileText:
+        # Search for core dump global meta
+        # (command line and termination reason)
+        # Core was generated by `.....'.
+        # Program terminated with ...
+        # ---------------------------------------
+        if not core.has_key('commandLine'):
+            if line.find("Core was generated by") == 0:
+                # [23:-3] manually deduced so I can keep what's in quotes
+                # If I did it properly I would use a regex.
+                core['commandLine'] = line[23:-3]
+
+        if not core.has_key('coreReason'):
+            if line.find("Program terminated with") == 0:
+                # [:-1] remove line ending
+                core['coreReason'] = line[:-1]
+
+
+        # Parse core dumps per thread.
+        # ---------------------------------------        
+        if line.find("Thread")== 0:
+            #right, make sure we purge the last line of the last stack trace please.
+            if len(multipleLines) > 0:
+                core['threads'][threadIndex]['stackTrace'].append(coreLinesToObject(multipleLines))
+                multipleLines = "";
+
+            threadIndex += 1
+
+            threadId = line[:-2]
+            core['threads'].append({})
+            core['threads'][threadIndex]['stackTrace'] = []
+
+            threadResult = re.match( r'Thread\s+(\d+)\s+\(LWP\s+(\d+)\)', threadId, re.I|re.M)
+            if threadResult:
+                core['threads'][threadIndex]['threadId'] = threadResult.group(2)
+                core['threads'][threadIndex]['threadNumber'] = threadResult.group(1)
+            else:                
+                print "*ERROR* ThreadId mismatch: ", threadId
+                core['threads'][threadIndex]['threadId'] = threadId
+
+        else:
+            if threadIndex > -1:
+                if len(line) > 0:                
+                    if len(multipleLines) > 0:
+                        if line[0] == '#':
+                            core['threads'][threadIndex]['stackTrace'].append(coreLinesToObject(multipleLines))
+                            multipleLines = line;
+                        else:
+                            multipleLines += line;
+                    else:
+                        if (line[0] == '#'):
+                            multipleLines = line;
+                else:
+                    if len(multipleLines) > 0:
+                        core['threads'][threadIndex]['stackTrace'].append(coreLinesToObject(multipleLines))
+                        multipleLines = "";
+
+    return core
+
+
+# the filename is the name of the textual output of gdb's "thread apply all bt"
+def process(argList):
+    fileName = argList[1]
+
+    coreDump = open(fileName, 'r')
+    lines = coreDump.readlines()
+    coreDump.close()
+
+    coreDumpObject = textToList(fileName, lines)
+
+    # right, dump the json
+    print json.dumps(coreDumpObject, sort_keys=True, indent=4)
+
+# decode the first passed filename
+process(sys.argv)
+
No results found