I'm new to Web scraping and new to Python. I want to scrape for the title of each Posting at the Forum of the URL, so then a new Post is created with 1 of the Titles below i'd like to receive a Mail with that Link of the Post.
With searching for the div structItem-title i receive the 23 Posting that are on 1 Page. But when i want to print the Text of each Posting i only receive <class 'str'> for the print(type(first_result.text)) and <class 'bs4.element.Tag'> for print(type(first_result)) .
Title for search
# Jeti_DS_16 = soup.find_all(text="Jeti DS 16") # Jeti_DS_16_v2 = soup.find_all(text="Jeti DS 16 2") # Jeti_DC_16 = soup.find_all(text="Jeti DC 16") # Jeti_DC_16_v2 = soup.find_all(text="Jeti DC 16 2") Code
from requests import get from bs4 import BeautifulSoup import re import smtplib import time import lxml URL = 'https://www.rc-network.de/forums/biete-rc-elektronik-zubeh%C3%B6r.135/' headers = { "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36'} def checkForSearchItem(): response = get(URL) # print(response.text[:500]) # page = requests.get(URL, headers=headers) # page = requests.get(URL, headers=headers).text # page = requests.get(URL).text # page = requests.get(URL) soup = BeautifulSoup(response.content, "lxml") # soup = BeautifulSoup(page.content, "html.parser") # soup = BeautifulSoup(page.text, "html.parser") search_for_class = soup.find_all( 'div', class_='structItem-title') # search_for_main = soup.find_all( # 'div', class_="structItemContainer-group js-threadList") # Jeti_DS_16 = soup.find_all(text="Jeti DS 16") # Jeti_DS_16_v2 = soup.find_all(text="Jeti DS 16 2") # Jeti_DC_16 = soup.find_all(text="Jeti DC 16") # Jeti_DC_16_v2 = soup.find_all(text="Jeti DC 16 2") # if(Jeti_DC_16, Jeti_DC_16_v2, Jeti_DS_16, Jeti_DS_16_v2): # send_mail() # print('Die Nummer {0} {1} {2} {3} wurden gezogen'.format( # Jeti_DC_16, Jeti_DC_16_v2, Jeti_DS_16, Jeti_DS_16_v2)) print(type(search_for_class)) print(len(search_for_class)) first_result = search_for_class[0] # print(type(first_result.h3)) # print(type(first_result.div.a.text)) # print(type(first_result.a.text)) # print(type(first_result.p.text)) # print(type(first_result.name.text)) # print(type(first_result.title)) print(type(first_result)) print(type(first_result.text)) # print(soup.div) # def send_mail(): # server_ssl = smtplib.SMTP_SSL('smtp.gmail.com', 465) # server_ssl.ehlo() # # server.starttls() # # server.ehlo() # server_ssl.login('[email protected]', 'SecurePassword') # subject = 'Es gibt ein neuer Post im RC-Network auf deine gespeicherte Anfragen. Sieh in dir an{Link to Post}' # body = 'Sieh es dir an Link: https://www.rc-network.de/forums/biete-rc-elektronik-zubeh%C3%B6r.135/' # msg = f"Subject: {subject}\n\n{body}" # emails = ["[email protected]"] # server_ssl.sendmail( # '[email protected]', # emails, # msg # ) # print('e-Mail wurde versendet!') # server_ssl.quit while(True): checkForSearchItem() time.sleep(600) # time.sleep(86400)