Оглавление:
Карта сайта:
Оглавление:
Карта сайта:
import requests from bs4 import BeautifulSoup import csv def get_html(url): header = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0', 'From': 'youremail@domain.com' } r = requests.get(url, headers=header) return r.text def get_articles(html): soup = BeautifulSoup(html, 'lxml') ts = soup.find('div', class_='testimonial-container').find_all('article') return ts def get_data(ts): for item in ts: try: since = item.find('p', class_='traxer-since').text.strip() except: since = '' try: author = item.find('p', class_='testimonial-author').text.strip() except: author = '' data = {'author' : author , 'since' : since} write_csv(data) def write_csv(data): with open('websites.csv', 'a', encoding='utf-8') as f: order = ['author', 'since'] writer = csv.DictWriter(f, fieldnames=order) writer.writerow(data) def main(): i = 1 while(True): url = 'https://catertrax.com/why-catertrax/traxers/page/{}/'.format(str(i)) articles = get_articles(get_html(url)) if articles: get_data(articles) i +=1 print('page '+ str(i)) if __name__ == '__main__': main()