Инструменты пользователя

Инструменты сайта


python:parsers:user-agent

User-Agent

import requests
from bs4 import BeautifulSoup
import csv
 
def get_html(url):
    header = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0',
        'From': 'youremail@domain.com'
    }
    r = requests.get(url, headers=header)
    return r.text
 
def get_articles(html):
    soup = BeautifulSoup(html, 'lxml')
    ts = soup.find('div', class_='testimonial-container').find_all('article')
    return ts
 
def get_data(ts):
    for item in ts:
        try:
            since = item.find('p', class_='traxer-since').text.strip()
        except:
            since = ''
        try:
            author = item.find('p', class_='testimonial-author').text.strip()
        except:
            author = ''
        data = {'author' : author , 'since' : since}
        write_csv(data)
 
 
def write_csv(data):
    with open('websites.csv', 'a', encoding='utf-8') as f:
        order = ['author', 'since']
        writer = csv.DictWriter(f, fieldnames=order)
        writer.writerow(data)
 
 
def main():
    i = 1
    while(True):
        url = 'https://catertrax.com/why-catertrax/traxers/page/{}/'.format(str(i))
        articles = get_articles(get_html(url))
        if articles:
            get_data(articles)
        i +=1
        print('page '+ str(i))
 
 
if __name__ == '__main__':
    main()
python/parsers/user-agent.txt · Последние изменения: 2023/01/12 12:18 (внешнее изменение)