In terminal do python3 -m ensurepip curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py pip install requests pip install html5lib pip install bs4 pip install texttable ## Writing to files. from pathlib import Path p = Path("new.txt") p.write_text("test it out now") p.read_text() ## Better to use open() newfile = open("new.txt") newfile.read() newfile.readlines() ## One string for each new line. newfile = open("new.txt","a") ## a for append mode. w for write mode for overwriting the file. newfile.write("\nthis is the 2nd line.\n") newfile.close() newfile = open("new.txt") print(newfile.readlines()) newfile.close() BeautifulSoup is a Python package useful to parse HTML and XML documents. Pandas is a Python package useful for data manipulation and analysis. import requests from bs4 import BeautifulSoup from pathlib import Path import numpy url = 'https://www.worldometers.info/coronavirus/countries-where-coronavirus-has-spread/' page = requests.get(url) soup = BeautifulSoup(page.text, 'html.parser') data = [] # soup.find_all('td') will scrape every element in the url's table data_iterator = iter(soup.find_all('td')) # data_iterator is the iterator of the table # This loop will keep repeating till there is no # data available in the iterator while True: try: country = next(data_iterator).text confirmed = next(data_iterator).text deaths = next(data_iterator).text continent = next(data_iterator).text # remove the commas and convert to int data.append(( country, int(confirmed.replace(',', '')), int(deaths.replace(',', '')), continent )) # StopIteration error is raised when there # are no more elements left to iterate through except StopIteration: break # Sort the data by the number of confirmed cases data.sort(key = lambda row: row[1], reverse = True) # create texttable object import texttable as tt table = tt.Texttable() # Add an empty row at the beginning for the headers table.add_rows([(None, None, None, None)] + data) # 'l' denotes left, 'c' denotes center,'r' denotes right table.set_cols_align(('c', 'c', 'c', 'c')) table.header((' Country ', ' Number of cases ', ' Deaths ', ' Continent ')) ## Print it in python. print(table.draw()) ## Save to a file called new.txt. content = str(numpy.array(data)) out = open("new.txt","w") out.write(content) out.close() # importing modules import requests from bs4 import BeautifulSoup from pathlib import Path import numpy url = 'https://en.wikipedia.org/wiki/List_of_best-selling_books' # get URL html page = requests.get(url) soup = BeautifulSoup(page.text, 'html.parser') data = [] # soup.find_all('td') will scrape every # element in the url's table data_iterator = iter(soup.find_all('td')) b = [] # data_iterator is the iterator of the table # This loop will keep repeating till there is no # data available in the iterator for i in range(50): for j in range(10): b.append(str(next(data_iterator).text)) content = str(numpy.array(b)) out = open("new.txt","w") for i in range(60): out.write(b[i]+"\n") out.close() out = open("condensed.txt","w") out.write(content) out.close()