Skip to content

Instantly share code, notes, and snippets.

@insekticid
Created July 24, 2020 10:17
Show Gist options
  • Select an option

  • Save insekticid/452fc7e5619d3a2dff2d9c6d924c50b9 to your computer and use it in GitHub Desktop.

Select an option

Save insekticid/452fc7e5619d3a2dff2d9c6d924c50b9 to your computer and use it in GitHub Desktop.

Revisions

  1. insekticid created this gist Jul 24, 2020.
    76 changes: 76 additions & 0 deletions sitemaper.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,76 @@
    import sitemapModule from 'sitemap';
    import axios from 'axios';

    const { buildSitemapIndex, createSitemapsAndIndex } = sitemapModule;

    let elasticsearch = axios.create({
    baseURL: 'http://' + process.env.ELASTIC_SERVER + ':' + process.env.ELASTIC_PORT,
    responseType: 'json'
    });

    elasticsearch.interceptors.request.use(request => {
    //console.log('Starting Request', request)
    return request
    })

    elasticsearch.interceptors.response.use(response => {
    //console.log('Response:', response)
    return response
    })

    let elasticData = (field) => {
    return {
    "_source": {
    "includes": [ "name", "url" ]
    },
    "size": 0,
    "aggs": {
    "group_by_state": {
    "terms": {
    "field": field,
    "size": 500000
    }
    }
    }
    }
    }

    const sitemapPaths = [];

    let createIndex = (urls) => createSitemapsAndIndex({
    urls: urls,
    lastmod: new Date().toISOString(),
    targetFolder: 'sitemap-search',
    hostname: process.env.SITEMAP_URL,
    cacheTime: 600,
    sitemapName: 'sitemap',
    sitemapSize: 10000, // number of urls to allow in each sitemap
    gzip: true, // whether to gzip the files
    })

    let prepareUrl = (bucket) => {
    sitemapPaths.push( { url: process.env.SITEMAP_KEY_URL + bucket.key });
    }

    let getData = async() => {
    await elasticsearch.post(process.env.ELASTIC_INDEX + '/_search', elasticData('name.keyword'))
    .then(async response =>
    {
    let buckets = response.data.aggregations.group_by_state.buckets;
    await Promise.all(buckets.map(prepareUrl));
    console.log('done keyword', buckets.length, sitemapPaths.length)
    })

    await elasticsearch.post(process.env.ELASTIC_INDEX + '/_search', elasticData('name.analyzed'))
    .then(async response =>
    {
    let buckets = response.data.aggregations.group_by_state.buckets;
    await Promise.all(buckets.map(prepareUrl));
    console.log('done analyzed', buckets.length, sitemapPaths.length)
    })

    await createIndex(sitemapPaths);
    console.log('done all', sitemapPaths.length)
    }

    getData()