import re class ElasticsearchHelpers(object): """ Sanitize a query string for Elasticsearch / Lucene. Based on: http://stackoverflow.com/questions/16205341/symbols-in-query-string-for-elasticsearch https://gist.github.com/bcoe/6505434 """ @staticmethod def sanitize_string(text): # Escape special characters # http://lucene.apache.org/core/old_versioned_docs/versions/2_9_1/queryparsersyntax.html#Escaping Special Characters text = re.sub('([{}])'.format(re.escape('\\+\-&|!(){}\[\]^~*?:\/')), r"\\\1", text) # AND, OR and NOT are used by lucene as logical operators. We need # to escape them for word in ['AND', 'OR', 'NOT']: escaped_word = "".join(["\\" + letter for letter in word]) text = re.sub(r'\s*\b({})\b\s*'.format(word), r" {} ".format(escaped_word), text) # Escape odd quotes quote_count = text.count('"') return re.sub(r'(.*)"(.*)', r'\1\"\2', text) if quote_count % 2 == 1 else text