Last active
July 13, 2021 21:13
-
-
Save dnewber/6bea2150df6cace60d260bbd72ea687a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import random | |
| import yaml | |
| import json | |
| import time | |
| import requests | |
| # Config Variables | |
| ZYTE_KEY = "ADD API KEY HERE" | |
| proxies = { | |
| 'http': f'http://{ZYTE_KEY}:@proxy.zyte.com:8011/', | |
| 'https': f'http://{ZYTE_KEY}:@proxy.zyte.com:8011/' | |
| } | |
| item_id = '310654540' | |
| store_id = '117' | |
| use_proxies = True | |
| # Helper Functions | |
| def random_user_agent(): | |
| user_agents = [ | |
| "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 OPR/43.0.2442.991", | |
| "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36", | |
| "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36", | |
| "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0", | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36", | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36", | |
| "Mozilla/5.0 (Windows NT 5.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586", | |
| "Mozilla/5.0 (Windows NT 6.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586", | |
| "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586", | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.10240", | |
| "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8", | |
| "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36 Edge/15.15063", | |
| ] | |
| return random.choice(user_agents) | |
| def get_zyte_sessions(): | |
| zyte_sessions = requests.get(proxies["http"] + "sessions").json() | |
| for _id, ip in zyte_sessions.items(): | |
| print(f"ID:{_id} IP: {ip}") | |
| return zyte_sessions | |
| def create_zyte_session(): | |
| response = requests.post(proxies["http"] + "sessions") | |
| new_session_id = response.headers["X-Crawlera-Session"] | |
| return new_session_id | |
| def delete_zyte_session(session_id): | |
| response = requests.delete(proxies["http"] + f"sessions/{session_id}") | |
| return response | |
| def build_session(use_proxies=True): | |
| session = requests.Session() | |
| if use_proxies: | |
| session.proxies = proxies | |
| session.verify = False | |
| return session | |
| def build_request(item_id:str, store_id:str): | |
| endpoint_url = "https://www.homedepot.com/product-information/model?opname=productClientOnlyProduct" | |
| headers = { | |
| 'User-Agent': random_user_agent(), | |
| 'X-Crawlera-Cookies': 'disable', | |
| 'X-Crawlera-Profile': 'desktop', | |
| 'X-Crawlera-Profile-Pass': 'User-Agent', | |
| 'Accept-Encoding': 'gzip, deflate', | |
| 'Accept': '*/*', | |
| 'Connection': 'keep-alive', | |
| 'Accept-Language': 'en-US,en;q=0.5', | |
| 'Referer': 'https://www.homedepot.com/', | |
| 'content-type': 'application/json', | |
| 'X-Experience-Name': 'general-merchandise', | |
| 'Origin': 'https://www.homedepot.com', | |
| } | |
| data_dict = { | |
| 'operationName': 'productClientOnlyProduct', | |
| 'variables': { | |
| 'skipSpecificationGroup': False, | |
| 'skipKPF': False, | |
| 'itemId': item_id, | |
| 'storeId': store_id, | |
| }, | |
| 'query': "query productClientOnlyProduct($storeId: String, $zipCode: String, $itemId: String!, $dataSource: String, $loyaltyMembershipInput: LoyaltyMembershipInput, $skipSpecificationGroup: Boolean = false, $skipKPF: Boolean = false) { product(itemId: $itemId, dataSource: $dataSource, loyaltyMembershipInput: $loyaltyMembershipInput) { fulfillment(storeId: $storeId, zipCode: $zipCode) { backordered fulfillmentOptions { type fulfillable services { type locations { isAnchor inventory { isLimitedQuantity isOutOfStock isInStock quantity isUnavailable maxAllowedBopisQty minAllowedBopisQty __typename } type storeName locationId curbsidePickupFlag isBuyInStoreCheckNearBy distance state storePhone __typename } deliveryTimeline deliveryDates { startDate endDate __typename } deliveryCharge dynamicEta { hours minutes __typename } hasFreeShipping freeDeliveryThreshold totalCharge __typename } __typename } anchorStoreStatus anchorStoreStatusType backorderedShipDate bossExcludedShipStates excludedShipStates seasonStatusEligible onlineStoreStatus onlineStoreStatusType inStoreAssemblyEligible __typename } itemId dataSources identifiers { canonicalUrl brandName itemId modelNumber productLabel storeSkuNumber upcGtin13 specialOrderSku toolRentalSkuNumber rentalCategory rentalSubCategory upc isSuperSku parentId productType sampleId __typename } availabilityType { discontinued status type buyable __typename } details { description collection { url collectionId __typename } highlights __typename } media { images { url sizes type subType __typename } video { shortDescription thumbnail url videoStill link { text url __typename } title type videoId longDescription __typename } threeSixty { id url __typename } augmentedRealityLink { usdz image __typename } __typename } pricing(storeId: $storeId) { promotion { dates { end start __typename } type description { shortDesc longDesc __typename } dollarOff percentageOff savingsCenter savingsCenterPromos specialBuySavings specialBuyDollarOff specialBuyPercentageOff experienceTag subExperienceTag anchorItemList itemList reward { tiers { minPurchaseAmount minPurchaseQuantity rewardPercent rewardAmountPerOrder rewardAmountPerItem rewardFixedPrice __typename } __typename } __typename } value alternatePriceDisplay alternate { bulk { pricePerUnit thresholdQuantity value __typename } unit { caseUnitOfMeasure unitsOriginalPrice unitsPerCase value __typename } __typename } original mapAboveOriginalPrice message preferredPriceFlag specialBuy unitOfMeasure __typename } reviews { ratingsReviews { averageRating totalReviews __typename } __typename } seo { seoKeywords seoDescription __typename } specificationGroup @skip(if: $skipSpecificationGroup) { specifications { specName specValue __typename } specTitle __typename } taxonomy { breadCrumbs { label url browseUrl creativeIconUrl deselectUrl dimensionName refinementKey __typename } brandLinkUrl __typename } favoriteDetail { count __typename } info { hidePrice ecoRebate quantityLimit sskMin sskMax unitOfMeasureCoverage wasMaxPriceRange wasMinPriceRange fiscalYear productDepartment classNumber forProfessionalUseOnly globalCustomConfigurator { customButtonText customDescription customExperience customExperienceUrl customTitle __typename } movingCalculatorEligible label recommendationFlags { visualNavigation reqItems __typename } replacementOMSID hasSubscription minimumOrderQuantity projectCalculatorEligible subClassNumber calculatorType isLiveGoodsProduct protectionPlanSku hasServiceAddOns consultationType __typename } sizeAndFitDetail { attributeGroups { attributes { attributeName dimensions __typename } dimensionLabel productType __typename } __typename } keyProductFeatures @skip(if: $skipKPF) { keyProductFeaturesItems { features { name refinementId refinementUrl value __typename } __typename } __typename } seoDescription badges(storeId: $storeId) { color creativeImageUrl endDate label message name timerDuration timer { timeBombThreshold daysLeftThreshold dateDisplayThreshold message __typename } __typename } installServices { scheduleAMeasure __typename } subscription { defaultfrequency discountPercentage subscriptionEnabled __typename } dataSource __typename }}" | |
| } | |
| return requests.Request( | |
| 'POST', endpoint_url, | |
| headers=headers, | |
| data=json.dumps(data_dict) | |
| ) | |
| # Make request | |
| successful_request = False | |
| session = build_session(use_proxies=use_proxies) | |
| request = build_request(item_id=item_id, store_id=store_id) | |
| while not successful_request: | |
| if use_proxies: | |
| session.headers['X-Crawlera-Session'] = create_zyte_session() | |
| get_zyte_sessions() # for logging purposes | |
| print('Sending the request...') | |
| response = session.send(request.prepare()) | |
| print(response.status_code, response.text) | |
| if response.status_code == 200: | |
| successful_request = True | |
| print("Request Succeeded!") | |
| else: | |
| print("Request failed. Retrying...") | |
| time.sleep(2) # wait before trying again |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment