"""This script reads a HTML page and extracts variable names and values.

   The names and values are encoded as table cells with class-attributes
   like this:

   <tr>
    <td class="name">Voltage</td><td class="value">13</td>
   </tr>

   The number of variables in the file is not limited, but only one
   variable definition per table row is allowed.

   Limits:
     Entities in names or values will be ignored.

   V0.01  16-FEB-2014 Te
"""

import sys
import httplib
from HTMLParser import HTMLParser
from urlparse import urlparse

class HTMLHandler( HTMLParser ):
    """Parses the HTML and collects the variables."""

    def __ignore( self, text ):
        """Ignores the text."""
        pass

    def __addtoname( self, text ):
        """Adds the test to the name."""
        self.__name += text

    def __addtovalue( self, text ):
        """Adds the test to the value."""
        self.__value += text

    def handle_starttag( self, tag, attrs ):
        """Handles the start tag of td."""
        if tag == "td":
            for attr in attrs:
                if attr[0] == "class":
                    if attr[1] == "name":
                        self.__name = ""
                        self.__texthandler = self.__addtoname

                    elif attr[1] == "value":
                        self.__value = ""
                        self.__texthandler = self.__addtovalue

    def handle_endtag( self, tag ):
        """Handles the end tag of td and tr."""
        if tag == "td":
            self.__texthandler = self.__ignore

        elif (tag == "tr") and (self.__name != ""):
            self.variables.append( (self.__name, self.__value) )

    def handle_data( self, data ):
        """Forwards the text to the current texthandler."""
        self.__texthandler( data )

    def __init__( self ):
        """Initializes the class members."""
        HTMLParser.__init__( self )

        self.variables     = []
        self.__texthandler = self.__ignore
        self.__name        = ""
        self.__value       = ""

def processpage( url, parser ):
    """Reads and parses one URL from the command line."""
    try:
        components = urlparse( url )

        connection = httplib.HTTPConnection( components.netloc )
        connection.putrequest( "GET", components.path )
        connection.putheader( "Accept", "text/html" )
        connection.endheaders()

        reply = connection.getresponse()
        if reply.status != 200:
            print "Error: {0} {1} {2}".format( url, reply.status, reply.reason )
        else:
            body = reply.read()
            reply.close()

            parser.feed( body )
    except Exception as exception:
        print "Error: {0} {1}".format( url, exception )
   
def main( argv ):
    """The main function of the script."""
    parser = HTMLHandler()

    for url in argv[1:]:
        processpage( url, parser )

    for var in parser.variables:
        print "{0} = {1}".format( var[0], var[1] )

if __name__ == "__main__":
    main( sys.argv )

