kinnala · August 29, 2013 09:50 · Aug 29, 2013
diff --git a/gistfile1.py b/gistfile1.py
@@ -0,0 +1,68 @@
+import urllib2
+import json
+import re
+
+class SoccerwayTeamMatches:
+
+    def __init__(self, teamId):
+        self.teamId = str(teamId)
+        self.data = {'all': [], 'home': [], 'away': []}
+
+    def parseJson(self, jsonStr):
+        """
+        Handles the parsing of the JSON object returned
+        by Soccerway API (nr.soccerway.com/a/block_team_matches).
+        
+        Input: The JSON object returned by the API as a string
+        
+        Return: Two dimensional array of the match results
+
+        """
+        jsonPy = json.loads(jsonStr)
+
+        # Fetch the interesting part of inputted JSON obj
+        content = jsonPy['commands'][0]['parameters']['content']
+
+        # Remove uninteresting header and footer data
+        cleanContent = content.split('</tbody>',1)[0].split('<thead',1)[1]
+
+        # Split content by <tr> -tags (tr is shorthand for table row)
+        p1 = re.compile(r'<tr[^<]+?>')
+        splitted = p1.split(cleanContent)
+        header = splitted[1] # First row is the table header data
+        data = splitted[2:-1] # Rest are the match info 
+
+        # Split content by <td> -tags (table columns) and clean other tags
+        p2 = re.compile(r'<td[^<]+?>')
+        f = lambda x: map(lambda y: re.sub('<[^<]+?>','',y).strip(), p2.split(x)[1:-2])
+        return map(f, data)
+
+    def getData(self, matchType):
+        """ 
+        Return the cleaned match data in 2D array.
+        Does simple caching of the GET queries, that is,
+        same data is not queried twice.
+
+        Input: Type of the matches, must be one of
+        the following strings: 'all', 'away' or 'home'.
+
+        Output: 2d array of match results
+
+        """
+        if matchType not in ['all','away','home']:
+            return []
+
+        if not self.data[matchType]:
+            url = "http://nr.soccerway.com/a/block_team_matches" \
+                  "?block_id=page_team_1_block_team_matches_5" \
+                  "&callback_params=%7B%22page%22%3A0%2C%22" \
+                  "bookmaker_urls%22%3A%5B%5D%2C%22block_service_id" \
+                  "%22%3A%22team_matches_block_teammatches%22%2C%22" \
+                  "team_id%22%3A"+self.teamId+"%2C%22competition_id" \
+                  "%22%3A0%2C%22filter%22%3A%22all%22%7D" \
+                  "&action=filterMatches&params=%7B%22" \
+                  "filter%22%3A%22"+matchType+"%22%7D"
+            jsonStr = urllib2.urlopen(url).read()
+            self.data[matchType] = self.parseJson(jsonStr)
+
+        return self.data[matchType]
No results found