"""This script reads a HTML page and extracts variable names and values.
The names and values are encoded as table cells with class-attributes
like this:
Voltage | 13 |
The number of variables in the file is not limited, but only one
variable definition per table row is allowed.
V2.00 19-NOV-2019 Te
"""
import sys
from urllib.request import urlopen
from bs4 import BeautifulSoup
def processpage( url ):
"""Reads and parses one URL from the command line."""
try:
html = urlopen( url )
soup = BeautifulSoup( html.read(), features = "lxml" )
elements = soup.find_all( "td", class_ = "name" )
for element in elements:
sibling = element.parent.find( "td", class_ = "value" )
if sibling != None:
print( f"{element.string} = {sibling.string}" )
except Exception as exception:
print( f"Error: {url} {exception}" )
if __name__ == "__main__":
for url in sys.argv[1:]:
processpage( url )