Hello everyone, I can not srapen the pages because a pop up window is displayed immediately. I need help or tips from you on how I can confirm the pop up more firmly in the code.
my python code:
import requests from bs4 import BeautifulSoup import csv import pandas as pd from time import sleep from selenium import webdriver from webdriver_manager.chrome import ChromeDriverManager from selenium.webdriver.common.keys import Keys
title=soup.find('h1',class_='element-name') if title is not None: title=title.text.strip() else: return 0 print(title)
Weight=soup.find('dd',class_='col-8') if Weight is not None: Weight=Weight.text.strip() else: Weight='N/A' print(Weight)
Price=soup.find('span',class_='price') if Price is not None: Price = Price.text.strip() else: Price='N/A' print(Price)
Categories=soup.find_all('dd',href_='col-8 ') if Categories is not None: Categories = Categories.text.strip() else: Categories='N/A' print(Categories)
amount=title.split('Stück')[0] amountSplit=title.split('Stück')[0].split(' ') for split in amountSplit: if hasNumbers(split): amount=split if not hasNumbers(amount):
amount=soup.find('button',class_='btn-sm') if amount is not None: print(amount.text.split(':')[1].split('x')[0].strip()) amount=amount.text.split(':')[1].split('x')[0].strip() else: amount='1 Pack' print(amount)
Hello everyone, I can not srapen the pages because a pop up window is displayed immediately.
I need help or tips from you on how I can confirm the pop up more firmly in the code.
my python code:
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd
from time import sleep
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
def connect():
try:
requests.get('http://google.com') #Python 3.x
return True
except:
return False
def gernate_file():
headers_text = []
headers_text.append('Article Number')
headers_text.append('Title')
headers_text.append('Amount')
headers_text.append('Weight')
headers_text.append('Price')
headers_text.append('Categorie')
headers_text.append('Link')
headers_text.append('Image Url')
headers_text.append('Article Number')
df = pd.DataFrame([], columns=headers_text)
df.to_csv('Results.csv', index=False, encoding='utf-8-sig')
def hasNumbers(inputString):
return any(char.isdigit() for char in inputString)
# url='https://www.docmorris.de/optifast-drink-kaffee/10267833'
# url='https://www.docmorris.de/orlistat-hexal-60-mg/08982497'
def profileScraper(url,index):
while(not connect()):
print('no internet')
sleep(5)
res= requests.get(url)
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get(url)
soup=BeautifulSoup(res.content,features='html.parser')
title=soup.find('h1',class_='element-name')
if title is not None:
title=title.text.strip()
else:
return 0
print(title)
Weight=soup.find('dd',class_='col-8')
if Weight is not None:
Weight=Weight.text.strip()
else:
Weight='N/A'
print(Weight)
Price=soup.find('span',class_='price')
if Price is not None:
Price = Price.text.strip()
else:
Price='N/A'
print(Price)
Categories=soup.find_all('dd',href_='col-8 ')
if Categories is not None:
Categories = Categories.text.strip()
else:
Categories='N/A'
print(Categories)
amount=title.split('Stück')[0]
amountSplit=title.split('Stück')[0].split(' ')
for split in amountSplit:
if hasNumbers(split):
amount=split
if not hasNumbers(amount):
amount=soup.find('button',class_='btn-sm')
if amount is not None:
print(amount.text.split(':')[1].split('x')[0].strip())
amount=amount.text.split(':')[1].split('x')[0].strip()
else:
amount='1 Pack'
print(amount)
sleep(2)
img_url_text = 'N/D'
try:
img_url_text = driver.find_element_by_xpath("//figure[@class ='gallery-slider__item']").get_attribute('src')
driver.close()
finally:
print(img_url_text)
art_tags=article_no_tags.find('dd',class_='col-8')
if art_tags is not None:
article_no=art_tags.text.strip()
print(article_no)
return[index,title,'`'+amount,Weight,Price,Categories,url,img_url_text,article_no]
with open('test.csv', 'r', encoding='utf-8') as readFile:
reader = csv.reader(readFile)
file_lines = list(reader)
# print(file_lines[1][6])
gernate_file()
for index, row in enumerate(file_lines[1:]):
print(index)
# print(row[6])
record=[]
record = profileScraper(row[6],index+1)
df = pd.DataFrame([record])
df.to_csv('Results.csv', index=False, mode='a', encoding='utf-8-sig', header=False)
# break
print()
print(record)
Thanks in advance