In terminal do 

python3 -m ensurepip 
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py 
python3 -m pip install requests
python3 -m pip install html5lib
python3 -m pip install bs4 
python3 -m pip install texttable 

	BeautifulSoup is a Python package useful to parse HTML and XML documents. 
	Pandas is a Python package useful for data manipulation and analysis. 

import os 
import requests
from bs4 import BeautifulSoup
from pathlib import Path
import numpy 
os.chdir("/Users/rickpaikschoenberg/Desktop") 

	## EXAMPLE 1. 

https://example.com
from bs4 import BeautifulSoup
import requests
url = "https://example.com"
response = requests.get(url)
data = response.text
soup = BeautifulSoup(data, 'html.parser')
titles = soup.find_all('h1')
for title in titles:
    print(title.text)


	## EXAMPLE 2, from https://www.scrapingbee.com/blog/python-web-scraping-beautiful-soup 

import requests
from bs4 import BeautifulSoup

response = requests.get("https://news.ycombinator.com/")
html_content = response.content
soup = BeautifulSoup(html_content, "html.parser")

print(soup.title) 
print(soup.title.string) 

# All links in the page
nb_links = len(soup.find_all("a"))
print(f"There are {nb_links} links in this page")

print(soup.get_text()) 


	## EXAMPLE 3. 

import requests
from bs4 import BeautifulSoup

# Fetch the content from the URL
response = requests.get("https://news.ycombinator.com")
html_content = response.content

# Use Beautiful Soup to parse the HTML
soup = BeautifulSoup(html_content, "html.parser")
articles = soup.find_all(class_="athing")

# Check if articles were found
if articles:
    # Loop through the selected elements
    for article in articles:
        # Print each article's text content to the console
        print(article.text)


	## EXAMPLE 4. 

import requests
from bs4 import BeautifulSoup

# Fetch the content from the URL
response = requests.get("https://news.ycombinator.com")
html_content = response.content

# Use Beautiful Soup to parse the HTML
soup = BeautifulSoup(html_content, "html.parser")
articles = soup.find_all(class_="athing")

scraped_data = []

# Check if articles are found
if articles is not None:
    for article in articles:
        data = {
            "URL": article.find(class_="titleline").find("a").get("href"),
            "title": article.find(class_="titleline").getText(),
            "rank": article.find(class_="rank").getText().replace(".", ""),
        }
        scraped_data.append(data)

# Print the output list
print(scraped_data)


	## EXAMPLE 5. 

import requests
from bs4 import BeautifulSoup
from pathlib import Path
import numpy

url = 'https://en.wikipedia.org/wiki/List_of_best-selling_books'

# get URL html
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
 
data = []
 
# soup.find_all('td') will scrape every
# element in the url's table
data_iterator = iter(soup.find_all('td'))
 
b = []
# data_iterator is the iterator of the table
# This loop will keep repeating til there is no
# data available in the iterator

for i in range(50):
    for j in range(10):
        b.append(str(next(data_iterator).text))

content = str(numpy.array(b))
out = open("new.txt","w")
for i in range(60):
    out.write(b[i]+"\n")

out.close()

out = open("condensed.txt","w")
out.write(content)
out.close()