//------------START------------------------------------------------------------------------------------------------ async function start(context,$,site,typeDeCrawl){ context.log.info('remote file => start') site = (context.customData && context.customData.site) ? context.customData.site : site typeDeCrawl = (context.customData && context.customData.typeDeCrawl) ? context.customData.typeDeCrawl : typeDeCrawl switch (context.request.userData.label) { case 'home': return await case_home(context,site,typeDeCrawl); case 'search': return await case_search(context,$,site,typeDeCrawl); case 'product': return await case_product(context,$,site,typeDeCrawl); } } //------------HOME------------------------------------------------------------------------------------------------ async function case_home(context,site,typeDeCrawl){ var initialList = (context.customData && context.customData.initialList) ? context.customData.initialList : '' if (initialList === '') { return { debugInfo: 'case_home: initialList empty' }; } switch (typeDeCrawl) { case 'simple': case 'profond': initialList.split(',').map(function(keyword) { var encodedKeyword = encodeURI( keyword.trim().replace(/(\s{1,})/g, '+') ); var searchUrl = 'https://www.cdiscount.com/search/10/'+encodedKeyword+'.html'; enqueueLabel(context,'search',searchUrl,{ c01_keyword: keyword }) }); return undefined case 'produit': initialList.split(',').map(function(url) { enqueueLabel(context,'product',url,{ p03_marketplaceName: site }) }); return undefined default: return { debugInfo: 'case_home: bug in typeDeCrawl' }; } } //------------SEARCH---------------------------------------------------------------------------------------------- async function case_search(context,$,site,typeDeCrawl){ obj = context.request.userData.interceptRequestData; var result = []; var sponsored = 0; var notAProductRow = 0; await context.waitFor(() => !!$(".lpMain .jsPrdBlocContainer form"), { timeoutMillis: 10000 }); var productCountRaw = $(".c-heading__title > span").text(); var position = 0; $(".lpMain .jsPrdBlocContainer form").map(function(i) { var obj = {}; obj = $.extend({}, context.request.userData.interceptRequestData); obj.c02_marketplaceName = site; obj.c03_NumberofResults = parseInt( productCountRaw.replace(/[^0-9]/g,'') ); obj.c06_itemURL = $(this).find('.prdtBILDetails a:eq(0)').attr('href'); obj.c04_asin = reg(obj.c06_itemURL) if(obj.c04_asin.length === 0){ notAProductRow++; return false; } obj.c26_sponsoredBrand = false; if($(this).find(".c-sponsoredMentions").length > 0){ sponsored++; obj.c23_sponsoredProduct = true; obj.c07_position = sponsored; }else{ position++; obj.c23_sponsoredProduct = false; obj.c07_position = position - notAProductRow; if (obj.c07_position < 0) { obj.errorInfo = 'BUG => obj.c07_position =' + obj.c07_position + ' & obj.c01_keyword = ' +obj.c01_keyword + ' & i=' + i + '& position='+position } } obj.c05_itemTitle = $(this).find('.prdtBILA').text().trim(); obj.c14_priceRaw = $(this).find('.prdtBILPrice .price:eq(0)').text().trim(); var nbrOfCom = $(this).find(".prdtBILStar").text().trim(); obj.c08_numberofcomments = (nbrOfCom) ? tr(nbrOfCom.replace(",","").replace(/[^0-9]/g,'')) : 0 // sur-charging the result obj = addBooleansCdiscount($,this,obj) if(obj.c06_itemURL && obj.c07_position){ if(obj.c07_position <= 15){ if(typeDeCrawl === 'profond'){ // here we don't use c06_itemURL because we want to remove duplicates url ending with ?param=blabla enqueueLabel(context,'product',obj.c06_itemURL,{ p03_marketplaceName: site }); return undefined }else{ result.push($.extend({}, obj)); } } }else{ console.log('===> BUG : (crawl profond) c06_itemURL undefined OR c07_position undefined'); } }); //here we try to get the Headlines Products $(".skwOffer").map(function(i) { var obj = {}; obj = $.extend({}, context.request.userData.interceptRequestData); obj.c02_marketplaceName = site; // obj.c07_position = false; obj.c23_sponsoredProduct = false; obj.c26_sponsoredBrand = true; obj.c06_itemURL = $(this).find('a[href]:eq(0)').attr('href'); obj.c07_position = i + 1 obj.c04_asin = reg(obj.c06_itemURL) obj.c05_itemTitle = $(this).find('.skwOfferTitle').text().trim(); obj.c08_numberofcomments = $(this).find(".skwRateContent").text().trim(); obj.c08_numberofcomments = (obj.c08_numberofcomments) ? tr(obj.c08_numberofcomments.replace(",","").replace(/[\(\)]/g,"")) : 0; // sur-charging the result obj = addBooleansCdiscount($,this,obj) if(obj.c07_position <= 15){ result.push($.extend({}, obj)); } }); //end of headline Search return await result; } //------------PRODUCT----------------------------------------------------------------------------------- async function case_product(context,$,site,typeDeCrawl){ var obj = (context.request.userData.interceptRequestData) ? context.request.userData.interceptRequestData : {}; var startedAt = Date.now(); var g = function() { if( Date.now() - startedAt > 10000 ) { // timeout after 10 seconds obj.debugInfo = 'case_product: timeout after 10 seconds - check h1#title ? or is captcha true or false:' + checkCaptcha($) return obj; } if($("h1").length){ obj.p16_ImageURL = $(".fpImg img:eq(0)").attr('src'); obj.p10_sellerTechnicalBrand = $("#ProductSheetAccordion table tr:contains('Marque')").text().replace(/\s/g,'').replace('Marque',''); obj.p08_description1 = tr($(".c-productHighlights p").text()); obj.p08_description1 = (typeof(obj.p08_description1) === 'string') ? obj.p08_description1.substr(0,1900) + '...' : obj.p08_description1 obj.p04_code = reg(context.request.url) obj.p05_itemTitle = tr($("h1").text()) obj.p06_numberofcomments = Number( tr( $(".c-stars-rating__label:contains('avis'):eq(0)").text().replace(/[^0-9]/g,'') ) ) // var p09_description2 = $(".c-productHighlights p.read-more") // p09_description2.find("script").remove() // obj.p09_description2 = tr(p09_description2.text()); // obj.p09_description2 = (typeof(obj.p09_description2) === 'string') ? obj.p09_description2.substr(0,1900) + '...' : obj.p09_description2 obj.p09_description2 = undefined obj.p02_sellerOfficial = tr($(".c-sellerBy a").text()) if(!obj.p02_sellerOfficial && $(".outOfStock").length > 0){ obj.p02_sellerOfficial = 'out of stock' } var cdiscountavolonte = $("#fpSellBy").filter(function() { return /Cdiscount\sà\svolonté/g.test($(this).text());}).text(); if(!obj.p02_sellerOfficial && cdiscountavolonte.length > 0){ obj.p02_sellerOfficial = 'Cdiscount à volonté' } if(!obj.p02_sellerOfficial){ obj.p02_sellerOfficial = tr($("#fpSellBy").text()) } var star = 'NA' try { star = tr( $("span.c-stars-rating__note:eq(0)").text().replace('/ 5','').replace(',','.')); star = (star) ? star : 'NA' } catch(e) { console.error(e); } obj.p07_star = Number( tr(star).replace(',','.') ) obj.p14_reviews = []; var h = function() { $(".infoCli").map(function() { var review = {}; review.p05_title = tr($(this).find('.title').text()) review.p02_note = getStarFromClasses($(this)) review.p04_texte = tr($(this).find('> p').text()) review.p04_texte = (typeof(review.p04_texte) === 'string') ? review.p04_texte.substr(0,2900) + '...' : review.p04_texte // ex de p07_other: "1000 PREMIERS REDACTEURS D'AVIS" // review.p07_other = tr($(this).find(".badges-genome-widget").text()); var helpf = tr($(this).find(".jsYesRat:contains('Oui') span").text()); review.p01_helpfulReview = (helpf && helpf.match(/\([0-9]*\)/g) ) ? helpf.replace(/[\(\)]/g,'') : 'NA' ; review.p06_verified = ($(this).find(".achatCert").length > 0) ? true : false; obj.p14_reviews.push( review ); }); } h(); // obj.p01_seller = obj.p15_QuestionReponses = []; var h2 = function() { $(this).find(".fpFAQQuestion").map(function() { var qa = {}; qa.p01_question = tr($(this).find('div:eq(0) .fpFAQQuestionText').text()); qa.p02_reponse = tr($(this).find('div:eq(0) .fpAnswerContent p:eq(0)').text()); // qa.p03_votes = Number( $(this).find('.a-col-left .vote .count').text().trim() ); obj.p15_QuestionReponses.push( qa ); }); } h2() return obj; }else{ setTimeout(g, 1000); } }; return await g(); } //-----------------------------------function----------------------------------------------------- function enqueueLabel(context,label,url,interceptRequestData) { context.enqueueRequest({ userData : { label:label, interceptRequestData:interceptRequestData }, url: url }); } function tr(text) { return (typeof text === 'string') ? text.replace(/(\s\s+|\\n)/gi, ' ').trim() : text ; } //captcha alert function checkCaptcha($){ if ( $('div:contains("make sure you\'re not a robot.")').length !== 0 || $('div:contains("ne suis pas un robot")').length !== 0 || $('div:contains("n\'êtes pas un robot")').length !== 0 || $('div:contains("caractères que vous voyez")').length !== 0 || $('div:contains("the characters you see")').length !== 0 || $('div:contains("les caractères affichés")').length !== 0) { return true }else{ return false } } function addBooleansCdiscount($,thisObject,interceptRequestData){ // interceptRequestData.c28_isPantry = ($(thisObject).find("i.s-eu-icon-amazon-pantry").length ) ? true : false // interceptRequestData.c29_isPrime = ($(thisObject).find("i.a-icon-prime").length ) ? true : false // interceptRequestData.c30_isNumberOneSeller = ( $(thisObject).find("[aria-labelledby*='best-seller']").length ) ? true : false interceptRequestData.c31_isFirstChoice = ($(thisObject).find(".prdtBILLabel").length ) ? true : false interceptRequestData.c32_isCouponAvailable = ($(thisObject).find(".prdtBILSpecial > div").length ) ? true : false return interceptRequestData } function getStarFromClasses(jQueryElement){ var star = false for( var i = 1; i <= 5; i++){ if( jQueryElement.find('.ratingPosition').hasClass('stN'+i) ){ star = i; } } return star } function reg(url) { var re = url.match(/[^\/]*.html|\??idOffre=[^&#]*/g); return (re) ? re.join('') : ''; }