14. “BeautifulSoup tries to make sense of the nonsensical;
it helps format and organize the messy web by fixing
bad HTML and presenting us with easily-traversible
Python objects representing XML structures.”
15. BEAUTIFUL SOUP
from urllib.request import urlopen
from bs4 import BeautifulSoup
html = urlopen("http://sampleshop.pl")
bsObj = BeautifulSoup(html.read(), "html.parser")
print(bsObj.h1)
16. REQUESTS
import requests
from bs4 import BeautifulSoup
session = requests.Session()
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36',
'Accept': ‘text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,
*/*;q=0.8'
}
url = "https://www.whatismybrowser.com/developers/
what-http-headers-is-my-browser-sending"
req = session.get(url, headers = headers)
bsObj = BeautifulSoup(req.text, "html.parser")
print(bsObj.find("table", {"class": "table-striped"}).get_text)
17. REQUESTS
import requests
from bs4 import BeautifulSoup
session = requests.Session()
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36',
'Accept': ‘text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,
*/*;q=0.8'
}
url = "http://sampleshop.pl/shop"
req = session.get(url, headers = headers)
bsObj = BeautifulSoup(req.text, "html.parser")
print(bsObj.find("p", {"class": "site-description"}).get_text())