#
# Classes supporting web server frontend
#

import BaseHTTPServer
import threading
import logging
import time
import socket
import SocketServer
import cgi # for parse_qsl

import rave.plugins.names as rnames
import rave.log as rlog
import rave.exceptions as rexcept

get_log = rlog.log_factory("org.cert.rave.web")

#
# Constant names for headers et al.
#

# Wait for the request to finish before returning
HDR_WAIT= "X-Rave-Wait"
# Generate a new version and refresh the cache, even if the
# cache hasn't expired yet.
HDR_REFRESH = "X-Rave-Force-Refresh"
# Generate a new version of this request, _and_ if the operation
# uses cached results to generate its results, regenerate them, too.
HDR_DEEP_REFRESH = "X-Rave-Force-Deep-Refresh"

OUTPUT_XML=1
OUTPUT_JSON=2

# Additional module-global variables
output_format = OUTPUT_JSON

class BadRequest(Exception):
    "Client made a bad request"
    pass

def parse_action(action):
    first_slash = action.index('/')
    return action[:first_slash], action[first_slash + 1:]

def cgi_demunge(cgi_dict):
    """
    Turn crazy CGI dictionary into a real one. Multiple values are ignored;
    only the first is used.
    """
    return dict((k, v[0]) for k, v in cgi_dict.items())
        

class Result(object):
    def __init__(self, filename, mime_type):
        self.filename = filename
        self.mime_type = mime_type
    def __str__(self):
        return "<rave.web.result: filename: %s; mime_type: %s>" % (
            self.filename, self.mime_type)

def query(action, params, scheduler,
          timeout=0, refresh=False, deep_refresh=False):
    """
    Obtain the results of the requested action.

    Parameters:
        action: str
            A string of the form "namespace/analysis" specifying an
            analysis to perform.
        params: dict
            Parameters to the analysis. Keys are strings, values are
            a list of all listed values for that parameter (usually
            one, so it's usually a one-element array).
        scheduler: rave.threads.Scheduler
            Work scheduler used to run analysis, if not cached
        timeout: int | None
            Controls whether to run the anlysis synchronously or
            asynchronously, and how long to wait for the analysis to finish.
                0 == asynchronous
                any integer == wait that many seconds
                None == wait forever
        refresh: Boolean
            Whether to unconditionally refresh the cache by rerunning
            the operation
        deep_refresh: Boolean
            Whether to unconditionally refresh this operation's cached data
            and any cached data it might use.
    Returns: Result
        The desired results
    """
    try:
        ns, opname = parse_action(action)
    except ValueError:
        raise BadRequest()

    fileop = rnames.op_from_export(ns, opname)
    get_log().debug("fileop is %s", fileop)

    get_log().debug("timeout is %s", timeout)

    if timeout == 0:
        get_log().debug("Won't wait for file operation to finish.")
        get_log().debug("Calling: %s(rave_params=%s...)", fileop.core_name(), cgi_demunge(params))
        filename = fileop(
              rave_params=cgi_demunge(params)
            , rave_refresh=refresh
            , rave_deep_refresh=deep_refresh
            , rave_relative_path=True
            , rave_scheduler=scheduler
        )
    else:
        if timeout is not None:
            get_log().debug(
                "Waiting up to %d seconds for file operation to finish.",
                timeout)
        else:
            get_log().debug(
                "Waiting forever for file operation to finish.")
        job_done = threading.Event()
        demunged = cgi_demunge(params)
        get_log().debug("Calling: %s(rave_params=%s...)", fileop.core_name(), demunged)
        filename = fileop(
              rave_params=demunged
            , rave_refresh=refresh
            , rave_deep_refresh=deep_refresh
            , rave_relative_path=True
            , rave_scheduler=scheduler
            , rave_on_done=job_done
        )
        job_done.wait(timeout)
        
    return Result(filename, fileop.mime_type)


class Ticket(object):
    """
    Identifies a job either in queue with the analysis engine or completed.
    """
    def __init__(self, url_base, result):
        """
        Constructor.
        Parameters:
        url_base
            prefix which, combined with filename, forms URL reference
            for completed job product
        result
            rave.plugins.registry.Result object
        """
        self.url_base  = url_base
        self.fname     = result.filename
        self.mime_type = result.mime_type
        self.version   = "1.1"

    def name(self):
        return self.fname

    def xml(self):
        return """<?xml version="1.0"?>
<rave:ticket xmlns:rave="http://www.cert.org/schema/rave">
    <rave:version>%s</rave:version>
    <rave:uri>%s/%s</rave:uri>
    <rave:content-type>%s</rave:content-type>
</rave:ticket>
        """ % (self.version, self.url_base, self.fname, self.mime_type)

    def json(self):
        return """{
"version": "%s", 
"uri": "%s/%s",
"content-type": "%s"
 }""" % (self.version, self.url_base, self.fname, self.mime_type)

    def output(self):
        global output_format
        if output_format == OUTPUT_XML:
            return self.xml()
        elif output_format == OUTPUT_JSON:
            return self.json()
        else:
            raise RaveException("Unrecognized output format: %s" % 
                                output_format)

    def output_type(self):
        global output_format
        if output_format == OUTPUT_XML:
            return "text/xml"
        elif output_format == OUTPUT_JSON:
            return "text/json"
        else:
            raise RaveException("Unrecognized output format: %s" % 
                                output_format)

class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
    """
    Handles individual requests from rave.app.WebServer.

    The implementation is HTTP, but this is not a full-featured web server.
    In particular, sending query parameters on the URL via a POST is not
    supported.
    """
    def normalize_path(self):
    #   Strip leading slash(es)
        while self.path.startswith("/"):
            self.path = self.path[1:]

        if self.path.find("?") == -1:
            path = self.path
            params = {}
        else:
            try:
                (path, params_str) = self.path.split("?")
            except ValueError:
            #   Malformed query string -- two or more ?'s
                return None, None
            params = cgi.parse_qs(params_str)

    #   Strip trailing slash(es)
        while path.endswith("/"):
            path = path[:-1]

        return path, params

    def do_GET(self):
        """Serve all requests."""
        global output_format
        get_log().debug("RequestHandler.do_GET() called")
        try:
            path, params = self.normalize_path()
            if not path:
                self.send_error(400, "Bad request")
                return
            timeout_hdr = self.headers.getheader(HDR_WAIT)
        #   For the header:
        #   None      == unsupplied (default is async)
        #   0         == asynchronous
        #   n         == synchronous ( n == int timeout in seconds)
        #   "forever" == synchronous (wait forever)
        #
        #   We need to map this onto the semantics for a timeout
        #   in query, which adhere more closely to the semantics
        #   for threading timeouts:
        #   0    == async, non-blocking (wait() returns immediately)
        #   None == sync, block forever
        #   n    == sync, block n seconds
            if timeout_hdr is None or timeout_hdr == 0:
                timeout = 0    # Async
            elif timeout_hdr == "forever":
                timeout = None # Wait forever
            else:
                timeout = int(timeout_hdr)
                
            get_log().debug("timeout is %s", timeout)

            result = query(
                  path
                , params
                , self.server.scheduler
                , timeout
                , self.headers.has_key(HDR_REFRESH)
                , self.headers.has_key(HDR_DEEP_REFRESH)
            )
            get_log().debug("result is %s", result)
            ticket = Ticket(self.server.url_base, result)
            data = ticket.output()
            if timeout is None:
                self.send_response(200, "Job submitted")
            else:
                self.send_response(200, "OK")
            self.send_header("Content-Type", ticket.output_type())
            self.send_header("Content-Length", len(data))
            self.end_headers()
            print >>self.wfile, data, # Final comma suppresses trailing newline
        except socket.error:
            get_log().error("Lost connection")
            return
        except rexcept.NoSuchOperation:
            self.send_error(
                404, "%s not found" % path
            )
            return
        except BadRequest:
            self.send_error(400, "Bad request")
        except:
            get_log().exception("Error processing request")
            self.send_error(500, "Server Error")
            return

    def log_message(self, format, *args):
        get_log('request').info("%s - - [%s] %s",
                                self.address_string(),
                                self.log_date_time_string(),
                                format%args)

#   Assign do_GET as handler for POST (TODO: fix POST)
    do_HEAD = do_POST = do_GET



class WebServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer):
    """
    The web server used to file tickets with the RAVE analysis
    engines.

    This webserver is thread-safe; in particular, a call to serve() will
    block until another thread calls shutdown().
    """

#   Change defaults in SocketServer
    allow_reuse_address = True

    def __init__(
          self
#        , registry
        , scheduler
        , url_base
        , listen_port=None
        , listen_addr="localhost"
     ):
        """
        Constructor.
        Parameters:
        <strikethrough>registry
            Registry object containing all the operations we support</strikethrough>
        scheduler
            Work scheduler object used to scheduler analysis queries
        url_base
            Root URL to report to clients to pick up visualizations
        listen_port (required)
            Port on which to listen for connections.
        listen_addr
            Address on which to listen for connections. Defaults to localhost.
        """
        if not listen_port:
            raise rexcept.ParameterError, "Listening port is required parameter"
        else:
            self.listen_port = listen_port
        if listen_addr == 'ALL':
            self.listen_addr = ''
        else:
            self.listen_addr = listen_addr
        #self.registry = registry
        self.scheduler = scheduler
        self.url_base = url_base

        self.should_run   = True
        self.evt_shutdown = threading.Event()
        self.evt_shutdown.set() # Start off in "shut down" state

    #   Doing __init__ to superclass last, so RequestHandler has access
    #   to variables on this class
        BaseHTTPServer.HTTPServer.__init__(
              self
            , (self.listen_addr, self.listen_port)
            , RequestHandler
        )
        get_log().info("Server bound to %s:%s" % (self.listen_addr, self.listen_port))

    def serve(self):
        """
        Serve until told to shutdown.

        serve() is a middle ground between serve_forever() (serve forever)
        handle_request() (serve one request). The caller must use serve()
        to service connections if shutdown() is expected to work.
        """
        get_log().debug("Server starting")
        self.evt_shutdown.clear()
        while self.should_run:
            self.handle_request()
        self.server_close()
        get_log().debug("Server stopped")
        self.evt_shutdown.set()

    def shutdown(self, timeout=None):
        """
        Stop serving connections and free up listening socket.
        @param timeout number of seconds to block until server has shut 
               down. By default, wait forever.
        """
        self.should_run = False
    #   The thread that ran serve() is probably blocking in handle_request()
    #   right now, waiting for a connection. Give it what it wants.
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            sock.connect((self.listen_addr, self.listen_port))
            sock.close()
        except socket.error: 
        #   The connection will occasionally shut down while the connect()
        #   is in the middle of connecting (usu. because a connection came
        #   between setting shouldRun to False and making the connect()
        #   call. In this case, a "connection reset by peer" will ensue.
        #
        #   This is harmless, and usually just means that things are working
        #   right without any help from connect().
            pass
        self.evt_shutdown.wait(timeout)

    def is_shutdown(self):
        """
        Is the webserver accepting connections?

        After calling shutdown() with a timeout, call this method to be sure
        that the server actually shut down and didn't just time out.

        @return True if the listening server has been shut down.
        """
        return self.evt_shutdown.isSet()



class WebServerThread(threading.Thread):
    """Wrap a WebServer in a thread, with facilities for control 
    (startup, shutdown).
    """
    def __init__(self, www):
        super(WebServerThread, self).__init__()
        self.www = www

    def shutdown(self):
    #   Block no more than 5 seconds waiting for shutdown
        self.www.shutdown(5)
        if not self.www.is_shutdown():
            get_log().warn("Webserver did not shut down normally. Expect errors.")

    #   Give the server socket time to actually shut down, as we'll
    #   most likely be using it again in just a millisecond.
        time.sleep(.1)

    def run(self):
        self.www.serve()

