Skip to content

Instantly share code, notes, and snippets.

@dnewber
Last active July 13, 2021 21:13
Show Gist options
  • Select an option

  • Save dnewber/6bea2150df6cace60d260bbd72ea687a to your computer and use it in GitHub Desktop.

Select an option

Save dnewber/6bea2150df6cace60d260bbd72ea687a to your computer and use it in GitHub Desktop.
import random
import yaml
import json
import time
import requests
# Config Variables
ZYTE_KEY = "ADD API KEY HERE"
proxies = {
'http': f'http://{ZYTE_KEY}:@proxy.zyte.com:8011/',
'https': f'http://{ZYTE_KEY}:@proxy.zyte.com:8011/'
}
item_id = '310654540'
store_id = '117'
use_proxies = True
# Helper Functions
def random_user_agent():
user_agents = [
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 OPR/43.0.2442.991",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
"Mozilla/5.0 (Windows NT 5.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586",
"Mozilla/5.0 (Windows NT 6.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586",
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.10240",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8",
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36 Edge/15.15063",
]
return random.choice(user_agents)
def get_zyte_sessions():
zyte_sessions = requests.get(proxies["http"] + "sessions").json()
for _id, ip in zyte_sessions.items():
print(f"ID:{_id} IP: {ip}")
return zyte_sessions
def create_zyte_session():
response = requests.post(proxies["http"] + "sessions")
new_session_id = response.headers["X-Crawlera-Session"]
return new_session_id
def delete_zyte_session(session_id):
response = requests.delete(proxies["http"] + f"sessions/{session_id}")
return response
def build_session(use_proxies=True):
session = requests.Session()
if use_proxies:
session.proxies = proxies
session.verify = False
return session
def build_request(item_id:str, store_id:str):
endpoint_url = "https://www.homedepot.com/product-information/model?opname=productClientOnlyProduct"
headers = {
'User-Agent': random_user_agent(),
'X-Crawlera-Cookies': 'disable',
'X-Crawlera-Profile': 'desktop',
'X-Crawlera-Profile-Pass': 'User-Agent',
'Accept-Encoding': 'gzip, deflate',
'Accept': '*/*',
'Connection': 'keep-alive',
'Accept-Language': 'en-US,en;q=0.5',
'Referer': 'https://www.homedepot.com/',
'content-type': 'application/json',
'X-Experience-Name': 'general-merchandise',
'Origin': 'https://www.homedepot.com',
}
data_dict = {
'operationName': 'productClientOnlyProduct',
'variables': {
'skipSpecificationGroup': False,
'skipKPF': False,
'itemId': item_id,
'storeId': store_id,
},
'query': "query productClientOnlyProduct($storeId: String, $zipCode: String, $itemId: String!, $dataSource: String, $loyaltyMembershipInput: LoyaltyMembershipInput, $skipSpecificationGroup: Boolean = false, $skipKPF: Boolean = false) { product(itemId: $itemId, dataSource: $dataSource, loyaltyMembershipInput: $loyaltyMembershipInput) { fulfillment(storeId: $storeId, zipCode: $zipCode) { backordered fulfillmentOptions { type fulfillable services { type locations { isAnchor inventory { isLimitedQuantity isOutOfStock isInStock quantity isUnavailable maxAllowedBopisQty minAllowedBopisQty __typename } type storeName locationId curbsidePickupFlag isBuyInStoreCheckNearBy distance state storePhone __typename } deliveryTimeline deliveryDates { startDate endDate __typename } deliveryCharge dynamicEta { hours minutes __typename } hasFreeShipping freeDeliveryThreshold totalCharge __typename } __typename } anchorStoreStatus anchorStoreStatusType backorderedShipDate bossExcludedShipStates excludedShipStates seasonStatusEligible onlineStoreStatus onlineStoreStatusType inStoreAssemblyEligible __typename } itemId dataSources identifiers { canonicalUrl brandName itemId modelNumber productLabel storeSkuNumber upcGtin13 specialOrderSku toolRentalSkuNumber rentalCategory rentalSubCategory upc isSuperSku parentId productType sampleId __typename } availabilityType { discontinued status type buyable __typename } details { description collection { url collectionId __typename } highlights __typename } media { images { url sizes type subType __typename } video { shortDescription thumbnail url videoStill link { text url __typename } title type videoId longDescription __typename } threeSixty { id url __typename } augmentedRealityLink { usdz image __typename } __typename } pricing(storeId: $storeId) { promotion { dates { end start __typename } type description { shortDesc longDesc __typename } dollarOff percentageOff savingsCenter savingsCenterPromos specialBuySavings specialBuyDollarOff specialBuyPercentageOff experienceTag subExperienceTag anchorItemList itemList reward { tiers { minPurchaseAmount minPurchaseQuantity rewardPercent rewardAmountPerOrder rewardAmountPerItem rewardFixedPrice __typename } __typename } __typename } value alternatePriceDisplay alternate { bulk { pricePerUnit thresholdQuantity value __typename } unit { caseUnitOfMeasure unitsOriginalPrice unitsPerCase value __typename } __typename } original mapAboveOriginalPrice message preferredPriceFlag specialBuy unitOfMeasure __typename } reviews { ratingsReviews { averageRating totalReviews __typename } __typename } seo { seoKeywords seoDescription __typename } specificationGroup @skip(if: $skipSpecificationGroup) { specifications { specName specValue __typename } specTitle __typename } taxonomy { breadCrumbs { label url browseUrl creativeIconUrl deselectUrl dimensionName refinementKey __typename } brandLinkUrl __typename } favoriteDetail { count __typename } info { hidePrice ecoRebate quantityLimit sskMin sskMax unitOfMeasureCoverage wasMaxPriceRange wasMinPriceRange fiscalYear productDepartment classNumber forProfessionalUseOnly globalCustomConfigurator { customButtonText customDescription customExperience customExperienceUrl customTitle __typename } movingCalculatorEligible label recommendationFlags { visualNavigation reqItems __typename } replacementOMSID hasSubscription minimumOrderQuantity projectCalculatorEligible subClassNumber calculatorType isLiveGoodsProduct protectionPlanSku hasServiceAddOns consultationType __typename } sizeAndFitDetail { attributeGroups { attributes { attributeName dimensions __typename } dimensionLabel productType __typename } __typename } keyProductFeatures @skip(if: $skipKPF) { keyProductFeaturesItems { features { name refinementId refinementUrl value __typename } __typename } __typename } seoDescription badges(storeId: $storeId) { color creativeImageUrl endDate label message name timerDuration timer { timeBombThreshold daysLeftThreshold dateDisplayThreshold message __typename } __typename } installServices { scheduleAMeasure __typename } subscription { defaultfrequency discountPercentage subscriptionEnabled __typename } dataSource __typename }}"
}
return requests.Request(
'POST', endpoint_url,
headers=headers,
data=json.dumps(data_dict)
)
# Make request
successful_request = False
session = build_session(use_proxies=use_proxies)
request = build_request(item_id=item_id, store_id=store_id)
while not successful_request:
if use_proxies:
session.headers['X-Crawlera-Session'] = create_zyte_session()
get_zyte_sessions() # for logging purposes
print('Sending the request...')
response = session.send(request.prepare())
print(response.status_code, response.text)
if response.status_code == 200:
successful_request = True
print("Request Succeeded!")
else:
print("Request failed. Retrying...")
time.sleep(2) # wait before trying again
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment