Skip to content

Instantly share code, notes, and snippets.

@AndrewO
Created April 16, 2014 20:38
Show Gist options
  • Select an option

  • Save AndrewO/10930544 to your computer and use it in GitHub Desktop.

Select an option

Save AndrewO/10930544 to your computer and use it in GitHub Desktop.

Revisions

  1. AndrewO renamed this gist Apr 16, 2014. 1 changed file with 0 additions and 0 deletions.
  2. AndrewO created this gist Apr 16, 2014.
    211 changes: 211 additions & 0 deletions multi_index_elastic_search
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,211 @@
    # We have a couple of indices that have been created independently.
    # Now we want to search across them, with highlighting (and ideally
    # suggestors as well) without breaking exisiting services.
    #
    # The mappings do not have consistent field names, but they have fields that can
    # be semantically mapped into "title" or "body" (and some others) fields.
    #
    # From the size of the data, I'm told that creating a new purpose-build index is
    # out of the question--even something that would duplicate data in another field
    # in the same index is a stretch.
    #
    # Which is what lead me to look at `index_name` and `copy_to`.

    PUT /restaurants
    # For this type, `name` and `description` map 1:1 to `title` and `body`
    PUT /restaurants/fast_food/_mapping
    {
    "fast_food" : {
    "properties": {
    "name" : { "type": "string", "index_name": "title"},
    "description" : { "type": "string", "index_name": "body"}
    }
    }
    }
    PUT /restaurants/fast_food/1
    {
    "name": "Trough King",
    "description": "French fries injected directly into your heart!"
    }
    PUT /restaurants/fast_food/2
    {
    "name": "Charles Cheese Family Fun Fest",
    "description": "Lots and lots of cheese. Also, robotic rats wearing funny hats."
    }
    # For this type, we have two fields that we want to treat as `body`, so we're going to treat
    # `summary` as the "main" `body` field, and `letter_from_chef` as a "secondary" one.
    PUT /restaurants/fancy/_mapping
    {
    "fancy" : {
    "properties": {
    "name" : { "type": "string", "index_name": "title"},
    "summary" : { "type": "string", "index_name": "body"},
    "letter_from_chef" : { "type": "string", "copy_to": "body"}
    }
    }
    }
    PUT /restaurants/fancy/1
    {
    "name": "Chez Fromage",
    "summary": "French food masterfully cooked by some guy in a funny hat. Also, great cheese.",
    "letter_from_chef": "Bonjour! Enjoy my cheese!"
    }

    # This example's only here to test multi-index searching
    PUT /hotels
    PUT /hotels/hotel/_mapping
    {
    "hotel": {
    "properties": {
    "hotelName" : { "type": "string", "index_name": "title"},
    "hotelDescription": { "type": "string", "index_name": "body"},
    "hotelRating": {"type": "integer"}
    }
    }
    }
    PUT /hotels/hotel/1
    {
    "hotelName": "Sleep Already",
    "hotelDescription": "Okay, I guess. Room smelled like cheese, but no rats, so can't complain, right?",
    "hotelRating": 2
    }

    # Check that everything looks right...
    GET /restaurants,hotels/_mapping
    # Note that all of the fields are returned by mapping name, not index name (which makes sense).
    GET /restaurants,hotels/_search
    {"query": {"match_all": {}}}

    # Let's make sure our existing hotel search still works...
    GET /hotels/_search
    {
    "highlight": {
    "fields": {
    "hotelName": {},
    "hotelDescription": {}
    }
    },
    "query": {
    "filtered": {
    "query": {
    "multi_match": {
    "query": "cheese",
    "fields": [
    "hotelName^2",
    "hotelDescription"
    ]
    }
    },
    "filter": {
    "range": {
    "hotelRating": { "gte": 2}
    }
    }
    }
    }
    }
    # Good. As expected, searching by mapping names still works.

    # A highlighted multi-index search:
    GET /restaurants,hotels/_search
    {
    "highlight": {
    "fields": {
    "title": {},
    "body": {
    "number_of_fragments": 3
    }
    }
    },
    "query": {
    "multi_match": {
    "query": "cheese",
    "fields": [
    "title^2",
    "body"
    ]
    }
    }
    }
    # Pretty good: it queries our fields by "index_name" and highlights them.
    # However, restaurant/fancy/1's body highlight only contains a match from `summary`.

    # Maybe the copy_to isn't working?
    GET /restaurants,hotels/_search
    {
    "highlight": {
    "fields": {
    "title": {},
    "body": {
    "number_of_fragments": 3
    }
    }
    },
    "query": {
    "multi_match": {
    "query": "bonjour",
    "fields": [
    "title^2",
    "body"
    ]
    }
    }
    }
    # Nope, it matched on the `letter_from_chef` field, but no highlighting...

    # Can we restrict the source fields by index_name?
    GET /restaurants,hotels/_search
    {
    "highlight": {
    "fields": {
    "title": {},
    "body": {
    "number_of_fragments": 3
    }
    }
    },
    "query": {
    "multi_match": {
    "query": "cheese",
    "fields": [
    "title^2",
    "body"
    ]
    }
    },
    "_source": ["title", "body"]
    }
    # No. Neither the "copy_to" or "index_name" fields show up.

    # Okay, so highlighting doesn't work as I thought it would here. How about suggestors?
    # Suggestors can use copied fields
    GET /restaurants,hotels/_search
    {
    "highlight": {
    "fields": {
    "title": {},
    "body": {
    "number_of_fragments": 3
    }
    }
    },
    "query": {
    "multi_match": {
    "query": "eat my ches",
    "fields": [
    "title^2",
    "body"
    ]
    }
    },
    "suggest": {
    "text": "eat my ches",
    "my-suggestor": {
    "phrase": {
    "size": 3,
    "field": "body"
    }
    }
    }
    }
    # Interesting. The suggestor works fine with a copy_to field. (Not shown: works with index_name ones too. Try 'funn hat').