Canonical Voices

Posts tagged with 'ubuntuone'

mandel

So yet again I have been confronted with broken tests in Ubuntu One. As I have already mentioned before I have spent a significant amount of time ensuring that the tests of Ubuntu One (which use twisted a lot) are deterministic and we do not leave a dirty reactor in the way. In order to do that a few week a go I wrote the following code that will help the rest of the team write such tests:

import os
import shutil
import tempfile
 
from twisted.internet import defer, endpoints, protocol
from twisted.spread import pb
 
from ubuntuone.devtools.testcases import BaseTestCase
 
# no init method +  twisted common warnings
# pylint: disable=W0232, C0103, E1101
 
 
def server_protocol_factory(cls):
    """Factory to create tidy protocols."""
 
    if cls is None:
        cls = protocol.Protocol
 
    class ServerTidyProtocol(cls):
        """A tidy protocol."""
 
        def connectionLost(self, *args):
            """Lost the connection."""
            cls.connectionLost(self, *args)
            # lets tell everyone
            # pylint: disable=W0212
            if (self.factory._disconnecting
                    and self.factory.testserver_on_connection_lost is not None
                    and not self.factory.testserver_on_connection_lost.called):
                self.factory.testserver_on_connection_lost.callback(self)
            # pylint: enable=W0212
 
    return ServerTidyProtocol
 
 
def client_protocol_factory(cls):
    """Factory to create tidy protocols."""
 
    if cls is None:
        cls = protocol.Protocol
 
    class ClientTidyProtocol(cls):
        """A tidy protocol."""
 
        def connectionLost(self, *a):
            """Connection list."""
            # pylint: disable=W0212
            if (self.factory._disconnecting
                    and self.factory.testserver_on_connection_lost is not None
                    and not self.factory.testserver_on_connection_lost.called):
                self.factory.testserver_on_connection_lost.callback(self)
            # pylint: enable=W0212
            cls.connectionLost(self, *a)
 
    return ClientTidyProtocol
 
 
class TidySocketServer(object):
    """Ensure that twisted servers are correctly managed in tests.
 
    Closing a twisted server is a complicated matter. In order to do so you
    have to ensure that three different deferreds are fired:
 
        1. The server must stop listening.
        2. The client connection must disconnect.
        3. The server connection must disconnect.
 
    This class allows to create a server and a client that will ensure that
    the reactor is left clean by following the pattern described at
    http://mumak.net/stuff/twisted-disconnect.html
    """
    def __init__(self):
        """Create a new instance."""
        self.listener = None
        self.server_factory = None
 
        self.connector = None
        self.client_factory = None
 
    def get_server_endpoint(self):
        """Return the server endpoint description."""
        raise NotImplementedError('To be implemented by child classes.')
 
    def get_client_endpoint(self):
        """Return the client endpoint description."""
        raise NotImplementedError('To be implemented by child classes.')
 
    @defer.inlineCallbacks
    def listen_server(self, server_class, *args, **kwargs):
        """Start a server in a random port."""
        from twisted.internet import reactor
        self.server_factory = server_class(*args, **kwargs)
        self.server_factory._disconnecting = False
        self.server_factory.testserver_on_connection_lost = defer.Deferred()
        self.server_factory.protocol = server_protocol_factory(
                                                 self.server_factory.protocol)
        endpoint = endpoints.serverFromString(reactor,
                                              self.get_server_endpoint())
        self.listener = yield endpoint.listen(self.server_factory)
        defer.returnValue(self.server_factory)
 
    @defer.inlineCallbacks
    def connect_client(self, client_class, *args, **kwargs):
        """Conect a client to a given server."""
        from twisted.internet import reactor
 
        if self.server_factory is None:
            raise ValueError('Server Factory was not provided.')
        if self.listener is None:
            raise ValueError('%s has not started listening.',
                             self.server_factory)
 
        self.client_factory = client_class(*args, **kwargs)
        self.client_factory._disconnecting = False
        self.client_factory.protocol = client_protocol_factory(
                                                 self.client_factory.protocol)
        self.client_factory.testserver_on_connection_lost = defer.Deferred()
        endpoint = endpoints.clientFromString(reactor,
                                                    self.get_client_endpoint())
        self.connector = yield endpoint.connect(self.client_factory)
        defer.returnValue(self.client_factory)
 
    def clean_up(self):
        """Action to be performed for clean up."""
        if self.server_factory is None or self.listener is None:
            # nothing to clean
            return defer.succeed(None)
 
        if self.listener and self.connector:
            # clean client and server
            self.server_factory._disconnecting = True
            self.client_factory._disconnecting = True
            self.connector.transport.loseConnection()
            d = defer.maybeDeferred(self.listener.stopListening)
            return defer.gatherResults([d,
                self.client_factory.testserver_on_connection_lost,
                self.server_factory.testserver_on_connection_lost])
        if self.listener:
            # just clean the server since there is no client
            self.server_factory._disconnecting = True
            return defer.maybeDeferred(self.listener.stopListening)
 
 
class TidyTCPServer(TidySocketServer):
    """A tidy tcp domain sockets server."""
 
    client_endpoint_pattern = 'tcp:host=127.0.0.1:port=%s'
    server_endpoint_pattern = 'tcp:0:interface=127.0.0.1'
 
    def get_server_endpoint(self):
        """Return the server endpoint description."""
        return self.server_endpoint_pattern
 
    def get_client_endpoint(self):
        """Return the client endpoint description."""
        if self.server_factory is None:
            raise ValueError('Server Factory was not provided.')
        if self.listener is None:
            raise ValueError('%s has not started listening.',
                                                          self.server_factory)
        return self.client_endpoint_pattern % self.listener.getHost().port
 
 
class TidyUnixServer(TidySocketServer):
    """A tidy unix domain sockets server."""
 
    client_endpoint_pattern = 'unix:path=%s'
    server_endpoint_pattern = 'unix:%s'
 
    def __init__(self):
        """Create a new instance."""
        super(TidyUnixServer, self).__init__()
        self.temp_dir = tempfile.mkdtemp()
        self.path = os.path.join(self.temp_dir, 'tidy_unix_server')
 
    def get_server_endpoint(self):
        """Return the server endpoint description."""
        return self.server_endpoint_pattern % self.path
 
    def get_client_endpoint(self):
        """Return the client endpoint description."""
        return self.client_endpoint_pattern % self.path
 
    def clean_up(self):
        """Action to be performed for clean up."""
        result = super(TidyUnixServer, self).clean_up()
        # remove the dir once we are disconnected
        result.addCallback(lambda _: shutil.rmtree(self.temp_dir))
        return result
 
 
class ServerTestCase(BaseTestCase):
    """Base test case for tidy servers."""
 
    @defer.inlineCallbacks
    def setUp(self):
        """Set the diff tests."""
        yield super(ServerTestCase, self).setUp()
 
        try:
            self.server_runner = self.get_server()
        except NotImplementedError:
            self.server_runner = None
 
        self.server_factory = None
        self.client_factory = None
        self.server_disconnected = None
        self.client_connected = None
        self.client_disconnected = None
        self.listener = None
        self.connector = None
        self.addCleanup(self.tear_down_server_client)
 
    def get_server(self):
        """Return the server to be used to run the tests."""
        raise NotImplementedError('To be implemented by child classes.')
 
    @defer.inlineCallbacks
    def listen_server(self, server_class, *args, **kwargs):
        """Listen a server.
 
        The method takes the server class and the arguments that should be
        passed to the server constructor.
        """
        self.server_factory = yield self.server_runner.listen_server(
                                                server_class, *args, **kwargs)
        self.server_disconnected = 
                self.server_factory.testserver_on_connection_lost
        self.listener = self.server_runner.listener
 
    @defer.inlineCallbacks
    def connect_client(self, client_class, *args, **kwargs):
        """Connect the client.
 
        The method takes the client factory  class and the arguments that
        should be passed to the client constructor.
        """
        self.client_factory = yield self.server_runner.connect_client(
                                                client_class, *args, **kwargs)
        self.client_disconnected = 
                self.client_factory.testserver_on_connection_lost
        self.connector = self.server_runner.connector
 
    def tear_down_server_client(self):
        """Clean the server and client."""
        if self.server_runner:
            return self.server_runner.clean_up()
 
 
class TCPServerTestCase(ServerTestCase):
    """Test that uses a single twisted server."""
 
    def get_server(self):
        """Return the server to be used to run the tests."""
        return TidyTCPServer()
 
 
class UnixServerTestCase(ServerTestCase):
    """Test that uses a single twisted server."""
 
    def get_server(self):
        """Return the server to be used to run the tests."""
        return TidyUnixServer()
 
 
class PbServerTestCase(ServerTestCase):
    """Test a pb server."""
 
    def get_server(self):
        """Return the server to be used to run the tests."""
        raise NotImplementedError('To be implemented by child classes.')
 
    @defer.inlineCallbacks
    def listen_server(self, *args, **kwargs):
        """Listen a pb server."""
        yield super(PbServerTestCase, self).listen_server(pb.PBServerFactory,
                                                              *args, **kwargs)
 
    @defer.inlineCallbacks
    def connect_client(self, *args, **kwargs):
        """Connect a pb client."""
        yield super(PbServerTestCase, self).connect_client(pb.PBClientFactory,
                                                              *args, **kwargs)
 
 
class TCPPbServerTestCase(PbServerTestCase):
    """Test a pb server over TCP."""
 
    def get_server(self):
        """Return the server to be used to run the tests."""
        return TidyTCPServer()
 
 
class UnixPbServerTestCase(PbServerTestCase):
    """Test a pb server over Unix domain sockets."""
 
    def get_server(self):
        """Return the server to be used to run the tests."""
        return TidyUnixServer()

The idea of the code is that developers do not need to worry about how to stop listening ports in their tests and just write tests like the following:

class TCPMultipleServersTestCase(TestCase):
    """Ensure that several servers can be ran."""
 
    timeout = 2
 
    @defer.inlineCallbacks
    def setUp(self):
        """Set the diff tests."""
        yield super(TCPMultipleServersTestCase, self).setUp()
        self.first_tcp_server = self.get_server()
        self.second_tcp_server = self.get_server()
        self.adder = Adder()
        self.calculator = Calculator(self.adder)
        self.echoer = Echoer()
 
    def get_server(self):
        """Return the server to be used to run the tests."""
        return TidyTCPServer()
 
    @defer.inlineCallbacks
    def test_single_server(self):
        """Test setting a single server."""
        first_number = 1
        second_number = 2
        yield self.first_tcp_server.listen_server(pb.PBServerFactory,
                                                              self.calculator)
        self.addCleanup(self.first_tcp_server.clean_up)
        calculator_c = yield self.first_tcp_server.connect_client(
                                                           pb.PBClientFactory)
        calculator = yield calculator_c.getRootObject()
        adder = yield calculator.callRemote('get_adder')
        result = yield adder.callRemote('add', first_number, second_number)
        self.assertEqual(first_number + second_number, result)
 
    @defer.inlineCallbacks
    def test_multiple_server(self):
        """Test setting multiple server."""
        first_number = 1
        second_number = 2
        # first server
        yield self.first_tcp_server.listen_server(pb.PBServerFactory,
                                                              self.calculator)
        self.addCleanup(self.first_tcp_server.clean_up)
 
        # second server
        yield self.second_tcp_server.listen_server(pb.PBServerFactory,
                                                   self.echoer)
        self.addCleanup(self.second_tcp_server.clean_up)
 
        # connect the diff clients
        calculator_c = yield self.first_tcp_server.connect_client(
                                                           pb.PBClientFactory)
        echoer_c = yield self.second_tcp_server.connect_client(
                                                           pb.PBClientFactory)
 
        calculator = yield calculator_c.getRootObject()
        adder = yield calculator.callRemote('get_adder')
        result = yield adder.callRemote('add', first_number, second_number)
        self.assertEqual(first_number + second_number, result)
        echoer = yield echoer_c.getRootObject()
        echo = yield echoer.callRemote('say', 'hello')
        self.assertEqual(self.echoer.remote_say('hello'), echo)

As you can see those tests do not give a rats ass about ensuring that the clients lose connection or we stop listening ports… Or so I though because the following code made such approach break in Mac OS X (although I suspect it was broken on Linux and Windows but we never experienced it):

class NullProtocol(protocol.Protocol):
    """A protocol that drops the connection."""
 
    def connectionMade(self):
        """Just drop the connection."""
        self.transport.loseConnection()
 
 
class PortDetectFactory(protocol.ClientFactory):
    """Will detect if something is listening in a given port."""
 
    protocol = NullProtocol
 
    def __init__(self):
        """Initialize this instance."""
        self.d = defer.Deferred()
 
    def is_listening(self):
        """A deferred that will become True if something is listening."""
        return self.d
 
    def buildProtocol(self, addr):
        """Connected."""
        p = protocol.ClientFactory.buildProtocol(self, addr)
        if not self.d.called:
            self.d.callback(True)
        return p
 
    def clientConnectionLost(self, connector, reason):
        """The connection was lost."""
        if not self.d.called:
            self.d.callback(False)
 
    def clientConnectionFailed(self, connector, reason):
        """The connection failed."""
        if not self.d.called:
            self.d.callback(False)

The code used to test the above was written as:

    @defer.inlineCallbacks
    def test_is_already_running(self):
        """The is_already_running method returns True if already started."""
        server = self.get_server()
        self.addCleanup(server.clean_up)
 
        class TestConnect(object):
 
            @defer.inlineCallbacks
            def connect(my_self, factory):
                connected_factory = yield server.connect_client(PortDetectFactory)
                self.patch(factory, 'is_listening', lambda:
                        connected_factory.is_listening())
                defer.returnValue(connected_factory)
 
        self.patch(tcpactivation, 'clientFromString', lambda *args: TestConnect())
 
        yield server.listen_server(protocol.ServerFactory)
 
        # pylint: disable=E1101
        ad = ActivationDetector(self.config)
        result = yield ad.is_already_running()
        self.assertTrue(result, "It should be already running.")

While in all the other platforms the tests passed with no problems on Mac OS X the tests would block in the clean_up method from the server because the deferred that was called in the connectionLost from the ServerTidyProtocol was never fired… Interesting.. After digging in the code I realized that the main issue with the approach of the clean_up code was wrong. The problem relies on the way in which the NullProtocol works. As you can see in the code the protocol loses its connections as soon as it made. This results in to possible things:

  1. The server does know that we have a client connected and calls buildProtocol.
  2. The connection is lost so fast that the buildProtocol on the ServerFactory does not get call.

When running the tests on Windows and Linux we were always facing the first scenario, buildProtocol was called which meant that connectionLost in the server protocol would be called. On the other hand, on Mac OS X, 1 out of 10 runs of the tests would block in the clean up because we would be in the second scenario, that is, no protocol would be build in the ServerFactory which results in the connectionLost never being called because it was no needed. The work around this issue is quite simple once you understand what is going on. The ServerFactory has to be modified to set the deferred when buildProtocol is called and not before ensuring that when we cleanup we check if the deferred is None and if it is not we wait for it to be fired. The fixed version of the helper code is the following:

import os
import shutil
import tempfile
 
from twisted.internet import defer, endpoints, protocol
from twisted.spread import pb
 
from ubuntuone.devtools.testcases import BaseTestCase
 
# no init method + twisted common warnings
# pylint: disable=W0232, C0103, E1101
 
 
def server_protocol_factory(cls):
    """Factory to create tidy protocols."""
 
    if cls is None:
        cls = protocol.Protocol
 
    class ServerTidyProtocol(cls):
        """A tidy protocol."""
 
        def connectionLost(self, *args):
            """Lost the connection."""
            cls.connectionLost(self, *args)
            # lets tell everyone
            # pylint: disable=W0212
            if (self.factory._disconnecting
                    and self.factory.testserver_on_connection_lost is not None
                    and not self.factory.testserver_on_connection_lost.called):
                self.factory.testserver_on_connection_lost.callback(self)
            # pylint: enable=W0212
 
    return ServerTidyProtocol
 
 
def server_factory_factory(cls):
    """Factory that creates special types of factories for tests."""
 
    if cls is None:
        cls = protocol.ServerFactory
 
    class TidyServerFactory(cls):
        """A tidy factory."""
 
        testserver_on_connection_lost = None
 
        def buildProtocol(self, addr):
            prot = cls.buildProtocol(self, addr)
            self.testserver_on_connection_lost = defer.Deferred()
            return prot
 
    return TidyServerFactory
 
 
def client_protocol_factory(cls):
    """Factory to create tidy protocols."""
 
    if cls is None:
        cls = protocol.Protocol
 
    class ClientTidyProtocol(cls):
        """A tidy protocol."""
 
        def connectionLost(self, *a):
            """Connection list."""
            cls.connectionLost(self, *a)
            # pylint: disable=W0212
            if (self.factory._disconnecting
                    and self.factory.testserver_on_connection_lost is not None
                    and not self.factory.testserver_on_connection_lost.called):
                self.factory.testserver_on_connection_lost.callback(self)
            # pylint: enable=W0212
 
    return ClientTidyProtocol
 
 
class TidySocketServer(object):
    """Ensure that twisted servers are correctly managed in tests.
 
    Closing a twisted server is a complicated matter. In order to do so you
    have to ensure that three different deferreds are fired:
 
        1. The server must stop listening.
        2. The client connection must disconnect.
        3. The server connection must disconnect.
 
    This class allows to create a server and a client that will ensure that
    the reactor is left clean by following the pattern described at
    http://mumak.net/stuff/twisted-disconnect.html
    """
    def __init__(self):
        """Create a new instance."""
        self.listener = None
        self.server_factory = None
 
        self.connector = None
        self.client_factory = None
 
    def get_server_endpoint(self):
        """Return the server endpoint description."""
        raise NotImplementedError('To be implemented by child classes.')
 
    def get_client_endpoint(self):
        """Return the client endpoint description."""
        raise NotImplementedError('To be implemented by child classes.')
 
    @defer.inlineCallbacks
    def listen_server(self, server_class, *args, **kwargs):
        """Start a server in a random port."""
        from twisted.internet import reactor
        tidy_class = server_factory_factory(server_class)
        self.server_factory = tidy_class(*args, **kwargs)
        self.server_factory._disconnecting = False
        self.server_factory.protocol = server_protocol_factory(
                                                 self.server_factory.protocol)
        endpoint = endpoints.serverFromString(reactor,
                                              self.get_server_endpoint())
        self.listener = yield endpoint.listen(self.server_factory)
        defer.returnValue(self.server_factory)
 
    @defer.inlineCallbacks
    def connect_client(self, client_class, *args, **kwargs):
        """Conect a client to a given server."""
        from twisted.internet import reactor
 
        if self.server_factory is None:
            raise ValueError('Server Factory was not provided.')
        if self.listener is None:
            raise ValueError('%s has not started listening.',
                             self.server_factory)
 
        self.client_factory = client_class(*args, **kwargs)
        self.client_factory._disconnecting = False
        self.client_factory.protocol = client_protocol_factory(
                                                 self.client_factory.protocol)
        self.client_factory.testserver_on_connection_lost = defer.Deferred()
        endpoint = endpoints.clientFromString(reactor,
                                                    self.get_client_endpoint())
        self.connector = yield endpoint.connect(self.client_factory)
        defer.returnValue(self.client_factory)
 
    def clean_up(self):
        """Action to be performed for clean up."""
        if self.server_factory is None or self.listener is None:
            # nothing to clean
            return defer.succeed(None)
 
        if self.listener and self.connector:
            # clean client and server
            self.server_factory._disconnecting = True
            self.client_factory._disconnecting = True
            d = defer.maybeDeferred(self.listener.stopListening)
            self.connector.transport.loseConnection()
            if self.server_factory.testserver_on_connection_lost:
                return defer.gatherResults([d,
                    self.client_factory.testserver_on_connection_lost,
                    self.server_factory.testserver_on_connection_lost])
            else:
                return defer.gatherResults([d,
                    self.client_factory.testserver_on_connection_lost])
        if self.listener:
            # just clean the server since there is no client
            # pylint: disable=W0201
            self.server_factory._disconnecting = True
            return defer.maybeDeferred(self.listener.stopListening)
            # pylint: enable=W0201
 
 
class TidyTCPServer(TidySocketServer):
    """A tidy tcp domain sockets server."""
 
    client_endpoint_pattern = 'tcp:host=127.0.0.1:port=%s'
    server_endpoint_pattern = 'tcp:0:interface=127.0.0.1'
 
    def get_server_endpoint(self):
        """Return the server endpoint description."""
        return self.server_endpoint_pattern
 
    def get_client_endpoint(self):
        """Return the client endpoint description."""
        if self.server_factory is None:
            raise ValueError('Server Factory was not provided.')
        if self.listener is None:
            raise ValueError('%s has not started listening.',
                                                          self.server_factory)
        return self.client_endpoint_pattern % self.listener.getHost().port
 
 
class TidyUnixServer(TidySocketServer):
    """A tidy unix domain sockets server."""
 
    client_endpoint_pattern = 'unix:path=%s'
    server_endpoint_pattern = 'unix:%s'
 
    def __init__(self):
        """Create a new instance."""
        super(TidyUnixServer, self).__init__()
        self.temp_dir = tempfile.mkdtemp()
        self.path = os.path.join(self.temp_dir, 'tidy_unix_server')
 
    def get_server_endpoint(self):
        """Return the server endpoint description."""
        return self.server_endpoint_pattern % self.path
 
    def get_client_endpoint(self):
        """Return the client endpoint description."""
        return self.client_endpoint_pattern % self.path
 
    def clean_up(self):
        """Action to be performed for clean up."""
        result = super(TidyUnixServer, self).clean_up()
        # remove the dir once we are disconnected
        result.addCallback(lambda _: shutil.rmtree(self.temp_dir))
        return result
 
 
class ServerTestCase(BaseTestCase):
    """Base test case for tidy servers."""
 
    @defer.inlineCallbacks
    def setUp(self):
        """Set the diff tests."""
        yield super(ServerTestCase, self).setUp()
 
        try:
            self.server_runner = self.get_server()
        except NotImplementedError:
            self.server_runner = None
 
        self.server_factory = None
        self.client_factory = None
        self.server_disconnected = None
        self.client_connected = None
        self.client_disconnected = None
        self.listener = None
        self.connector = None
        self.addCleanup(self.tear_down_server_client)
 
    def get_server(self):
        """Return the server to be used to run the tests."""
        raise NotImplementedError('To be implemented by child classes.')
 
    @defer.inlineCallbacks
    def listen_server(self, server_class, *args, **kwargs):
        """Listen a server.
 
        The method takes the server class and the arguments that should be
        passed to the server constructor.
        """
        self.server_factory = yield self.server_runner.listen_server(
                                                server_class, *args, **kwargs)
        self.server_disconnected = 
                self.server_factory.testserver_on_connection_lost
        self.listener = self.server_runner.listener
 
    @defer.inlineCallbacks
    def connect_client(self, client_class, *args, **kwargs):
        """Connect the client.
 
        The method takes the client factory  class and the arguments that
        should be passed to the client constructor.
        """
        self.client_factory = yield self.server_runner.connect_client(
                                                client_class, *args, **kwargs)
        self.client_disconnected = 
                self.client_factory.testserver_on_connection_lost
        self.connector = self.server_runner.connector
 
    def tear_down_server_client(self):
        """Clean the server and client."""
        if self.server_runner:
            return self.server_runner.clean_up()
 
 
class TCPServerTestCase(ServerTestCase):
    """Test that uses a single twisted server."""
 
    def get_server(self):
        """Return the server to be used to run the tests."""
        return TidyTCPServer()
 
 
class UnixServerTestCase(ServerTestCase):
    """Test that uses a single twisted server."""
 
    def get_server(self):
        """Return the server to be used to run the tests."""
        return TidyUnixServer()
 
 
class PbServerTestCase(ServerTestCase):
    """Test a pb server."""
 
    def get_server(self):
        """Return the server to be used to run the tests."""
        raise NotImplementedError('To be implemented by child classes.')
 
    @defer.inlineCallbacks
    def listen_server(self, *args, **kwargs):
        """Listen a pb server."""
        yield super(PbServerTestCase, self).listen_server(pb.PBServerFactory,
                                                              *args, **kwargs)
 
    @defer.inlineCallbacks
    def connect_client(self, *args, **kwargs):
        """Connect a pb client."""
        yield super(PbServerTestCase, self).connect_client(pb.PBClientFactory,
                                                              *args, **kwargs)
 
 
class TCPPbServerTestCase(PbServerTestCase):
    """Test a pb server over TCP."""
 
    def get_server(self):
        """Return the server to be used to run the tests."""
        return TidyTCPServer()
 
 
class UnixPbServerTestCase(PbServerTestCase):
    """Test a pb server over Unix domain sockets."""
 
    def get_server(self):
        """Return the server to be used to run the tests."""
        return TidyUnixServer()

I wonder if at some point I should share this code for the people out there… any opinions?

Read more
mandel

In the past days I have been working on implementing a python TestCase that can be used to perform integration tests to the future implementation of proxy support that will be landing in Ubuntu One. The idea of the TestCase is the following:

  • Start a proxy so that connections go throw it. The proxy has to be listening to two different ports, one in which auth is not required and a second one in which auth is required. At the moment the only supported proxy is Squid using base auth.
  • The test case should provide a way to access to the proxy details for subclasses to use.
  • The test case should integrate with the ubuntuone-dev-tools.

Initially, one of the major problems I had was to start squid in two different ports so that:

  • Port A accepts non-auth requests.
  • Port A rejects auth requests.
  • Port B accepts auth requests.

The idea is simple, if you use port A you should never auth while you must in port B, and example configuration of the ACLs and ports is the following:

auth_param basic casesensitive on
# Use a default auth using ncsa and the passed generated file.
auth_param basic program ${auth_process} ${auth_file}
#Recommended minimum configuration:
acl all src all
acl manager proto cache_object
acl localhost src 127.0.0.1/32
acl to_localhost dst 127.0.0.0/8 0.0.0.0/32
#
# Example rule allowing access from your local networks.
# Adapt to list your (internal) IP networks from where browsing
# should be allowed
acl localnet src 10.0.0.0/8	# RFC1918 possible internal network
acl localnet src 172.16.0.0/12	# RFC1918 possible internal network
acl localnet src 192.168.0.0/16	# RFC1918 possible internal network
#
acl SSL_ports port 443		# https
acl SSL_ports port 563		# snews
acl SSL_ports port 873		# rsync
acl Safe_ports port 80		# http
acl Safe_ports port 21		# ftp
acl Safe_ports port 443		# https
acl Safe_ports port 70		# gopher
acl Safe_ports port 210		# wais
acl Safe_ports port 1025-65535	# unregistered ports
acl Safe_ports port 280		# http-mgmt
acl Safe_ports port 488		# gss-http
acl Safe_ports port 591		# filemaker
acl Safe_ports port 777		# multiling http
acl Safe_ports port 631		# cups
acl Safe_ports port 873		# rsync
acl Safe_ports port 901		# SWAT
acl purge method PURGE
acl CONNECT method CONNECT

# make an acl for users that have auth
acl password proxy_auth REQUIRED myportname ${auth_port_number}
acl auth_port_connected myportname ${auth_port_number}
acl nonauth_port_connected myportname ${noauth_port_number}

# Settings used for the tests:
# Allow users connected to the nonauth port
# Allow users authenticated AND connected to the auth port
http_access allow nonauth_port_connected
http_access allow password

#Recommended minimum configuration:
#
# Only allow cachemgr access from localhost
http_access allow manager localhost
http_access deny manager
# Only allow purge requests from localhost
http_access allow purge localhost
http_access deny purge
# Deny requests to unknown ports
http_access deny !Safe_ports
# Deny CONNECT to other than SSL ports
http_access deny CONNECT !SSL_ports
# Example rule allowing access from your local networks.
# Adapt localnet in the ACL section to list your (internal) IP networks
# from where browsing should be allowed
#http_access allow localnet
http_access allow localhost

# And finally deny all other access to this proxy
http_access deny all

#Allow ICP queries from local networks only
icp_access allow localnet
icp_access deny all

# Squid normally listens to port 3128 but we are going to listento two
# different ports, one for auth one for nonauth.
http_port ${noauth_port_number}
http_port ${auth_port_number}

#We recommend you to use at least the following line.
hierarchy_stoplist cgi-bin ?

# Default cache settings.
cache_dir ufs ${spool_temp} 100 16 256

# access log settings
access_log ${squid_temp}/access.log squid

#Default cache stroe log
cache_store_log ${squid_temp}/store.log

#Default pid file name
pid_filename ${squid_temp}/squid.pid

#Default netdb file name:
netdb_filename ${spool_temp}/logs/netdb.state

#Suggested default:
refresh_pattern ^ftp:		1440	20%	10080
refresh_pattern ^gopher:	1440	0%	1440
refresh_pattern -i (/cgi-bin/|\?) 0	0%	0
refresh_pattern (Release|Packages(.gz)*)$	0	20%	2880
# example line deb packages
#refresh_pattern (\.deb|\.udeb)$   129600 100% 129600
refresh_pattern .		0	20%	4320

# Don't upgrade ShoutCast responses to HTTP
acl shoutcast rep_header X-HTTP09-First-Line ^ICY.[0-9]
upgrade_http0.9 deny shoutcast

# Apache mod_gzip and mod_deflate known to be broken so don't trust
# Apache to signal ETag correctly on such responses
acl apache rep_header Server ^Apache
broken_vary_encoding allow apache

extension_methods REPORT MERGE MKACTIVITY CHECKOUT

hosts_file /etc/hosts

# Leave coredumps in the first cache dir
coredump_dir ${spool_temp}

Once the above was achieved the code of the test case was quite simple for Ubuntu O, unfortunatly, it was not that issues in Ubuntu P because there we have squid3 which supports http 1.1 and keeps the proxy keeps the connection alive. The fact that the connection is kept alive means that the reactor has a selectable running because the proxy keep it there. In order to solve the issue I wrote the code so that the server could say that the connection timedout. Here is the code that does it:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# -*- coding: utf-8 -*-
#
# Copyright 2011 Canonical Ltd.
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 3, as published
# by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranties of
# MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR
# PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program.  If not, see <http://www.gnu.org/licenses/>.
"""Test the squid test case."""
import base64
 
from twisted.application import internet, service
from twisted.internet import defer, reactor
from twisted.web import client, error, http, resource, server
 
from ubuntuone.devtools.testcases.squid import SquidTestCase
 
 
SAMPLE_RESOURCE = "<p>Hello World!</p>"
SIMPLERESOURCE = "simpleresource"
THROWERROR = "throwerror"
UNAUTHORIZED = "unauthorized"
 
# ignore common twisted lint errors
# pylint: disable=C0103, W0212
 
 
class ProxyClientFactory(client.HTTPClientFactory):
    """Factory that supports proxy."""
 
    def __init__(self, proxy_url, proxy_port, url, headers=None):
        # we set the proxy details before the init because the parent __init__
        # calls setURL
        self.proxy_url = proxy_url
        self.proxy_port = proxy_port
        self.disconnected_d = defer.Deferred()
        client.HTTPClientFactory.__init__(self, url, headers=headers)
 
    def setURL(self, url):
        self.host = self.proxy_url
        self.port = self.proxy_port
        self.url = url
        self.path = url
 
    def clientConnectionLost(self, connector, reason, reconnecting=0):
        """Connection lost."""
        self.disconnected_d.callback(self)
 
 
class ProxyWebClient(object):
    """Provide useful web methods with proxy."""
 
    def __init__(self, proxy_url=None, proxy_port=None, username=None,
            password=None):
        """Create a new instance with the proxy settings."""
        self.proxy_url = proxy_url
        self.proxy_port = proxy_port
        self.username = username
        self.password = password
        self.factory = None
        self.connectors = []
 
    def _connect(self, url, contextFactory):
        """Perform the connection."""
        scheme, _, _, _ = client._parse(url)
        # pylint: disable=E1101
        if scheme == 'https':
            from twisted.internet import ssl
            if contextFactory is None:
                contextFactory = ssl.ClientContextFactory()
            self.connectors.append(reactor.connectSSL(self.proxy_url,
                                                      self.proxy_port,
                                                      self.factory,
                                                      contextFactory))
        else:
            self.connectors.append(reactor.connectTCP(self.proxy_url,
                                                      self.proxy_port,
                                                      self.factory))
            # pylint: enable=E1101
 
    def _process_auth_error(self, failure, url, contextFactory):
        """Process an auth failure."""
        failure.trap(error.Error)
        if failure.value.status == str(http.PROXY_AUTH_REQUIRED):
            # we try to get the page using the basic auth
            auth = base64.b64encode('%s:%s' % (self.username, self.password))
            auth_header = 'Basic ' + auth.strip()
            self.factory = ProxyClientFactory(self.proxy_url, self.proxy_port,
                            url, headers={'Proxy-Authorization': auth_header})
            self._connect(url, contextFactory)
            return self.factory.deferred
        else:
            return failure
 
    def get_page(self, url, contextFactory=None, *args, **kwargs):
        """Download a webpage as a string.
 
        This method relies on the twisted.web.client.getPage but adds and extra
        step. If there is an auth error the method will perform a second try
        so that the username and password are used.
        """
        self.factory = ProxyClientFactory(self.proxy_url, self.proxy_port, url,
                                          headers={'Connection': 'close'})
        self._connect(url, contextFactory)
        self.factory.deferred.addErrback(self._process_auth_error, url,
                                    contextFactory)
        return self.factory.deferred
 
    @defer.inlineCallbacks
    def shutdown(self):
        """Clean all connectors."""
        for connector in self.connectors:
            yield connector.disconnect()
        defer.returnValue(True)
 
 
class SimpleResource(resource.Resource):
    """A simple web resource."""
 
    def render_GET(self, request):
        """Make a bit of html out of these resource's
        content."""
        return SAMPLE_RESOURCE
 
 
class SaveHTTPChannel(http.HTTPChannel):
    """A save protocol to be used in tests."""
 
    protocolInstance = None
 
    def connectionMade(self):
        """Keep track of the given protocol."""
        SaveHTTPChannel.protocolInstance = self
        http.HTTPChannel.connectionMade(self)
 
 
class SaveSite(server.Site):
    """A site that let us know when it closed."""
 
    protocol = SaveHTTPChannel
 
    def __init__(self, *args, **kwargs):
        """Create a new instance."""
        server.Site.__init__(self, *args, **kwargs)
        # we disable the timeout in the tests, we will deal with it manually.
        self.timeOut = None
 
 
class MockWebServer(object):
    """A mock webserver for testing"""
 
    def __init__(self):
        """Start up this instance."""
        root = resource.Resource()
        root.putChild(SIMPLERESOURCE, SimpleResource())
 
        root.putChild(THROWERROR, resource.NoResource())
 
        unauthorized_resource = resource.ErrorPage(resource.http.UNAUTHORIZED,
                                                "Unauthorized", "Unauthorized")
        root.putChild(UNAUTHORIZED, unauthorized_resource)
 
        self.site = SaveSite(root)
        application = service.Application('web')
        self.service_collection = service.IServiceCollection(application)
        #pylint: disable=E1101
        self.tcpserver = internet.TCPServer(0, self.site)
        self.tcpserver.setServiceParent(self.service_collection)
        self.service_collection.startService()
 
    def get_url(self):
        """Build the url for this mock server."""
        #pylint: disable=W0212
        port_num = self.tcpserver._port.getHost().port
        return "http://localhost:%d/" % port_num
 
    @defer.inlineCallbacks
    def stop(self):
        """Shut it down."""
        #pylint: disable=E1101
        # make the connection time out so that is works with squid3 when
        # the connection is kept alive.
        if self.site.protocol.protocolInstance:
            self.site.protocol.protocolInstance.timeoutConnection()
        yield self.service_collection.stopService()
 
 
class ProxyTestCase(SquidTestCase):
    """A squid test with no auth proxy."""
 
    @defer.inlineCallbacks
    def setUp(self):
        """Set the tests."""
        yield super(ProxyTestCase, self).setUp()
        self.ws = MockWebServer()
        self.proxy_client = None
        self.addCleanup(self.teardown_client_server)
        self.url = self.ws.get_url() + SIMPLERESOURCE
 
    def teardown_client_server(self):
        """Clean resources."""
        if self.proxy_client is not None:
            self.proxy_client.shutdown()
            return defer.gatherResults([self.ws.stop(),
                               self.proxy_client.shutdown(),
                               self.proxy_client.factory.disconnected_d])
        else:
            return self.ws.stop()
 
    def access_noauth_url(self, address, port):
        """Access a url throught the proxy."""
        self.proxy_client = ProxyWebClient(proxy_url=address, proxy_port=port)
        return self.proxy_client.get_page(self.url)
 
    def access_auth_url(self, address, port, username, password):
        """Access a url throught the proxy."""
        self.proxy_client = ProxyWebClient(proxy_url=address, proxy_port=port,
                                         username=username, password=password)
        return self.proxy_client.get_page(self.url)
 
    @defer.inlineCallbacks
    def test_noauth_url_access(self):
        """Test accessing to the url."""
        settings = self.get_nonauth_proxy_settings()
        # if there is an exception we fail.
        data = yield self.access_noauth_url(settings['host'],
                                            settings['port'])
        self.assertEqual(SAMPLE_RESOURCE, data)
 
    @defer.inlineCallbacks
    def test_auth_url_access(self):
        """Test accessing to the url."""
        settings = self.get_auth_proxy_settings()
        # if there is an exception we fail.
        data = yield self.access_auth_url(settings['host'],
                                          settings['port'],
                                          settings['username'],
                                          settings['password'])
        self.assertEqual(SAMPLE_RESOURCE, data)
 
    def test_auth_url_401(self):
        """Test failing accessing the url."""
        settings = self.get_auth_proxy_settings()
        # swap password for username to fail
        d = self.failUnlessFailure(self.access_auth_url(settings['host'],
                                        settings['port'], settings['password'],
                                        settings['username']), error.Error)
        return d
 
    def test_auth_url_407(self):
        """Test failing accessing the url."""
        settings = self.get_auth_proxy_settings()
        d = self.failUnlessFailure(self.access_noauth_url(settings['host'],
                                   settings['port']), error.Error)
        return d

The above code is the tests for the test case and the important bits are:

133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
class SaveHTTPChannel(http.HTTPChannel):
    """A save protocol to be used in tests."""
 
    protocolInstance = None
 
    def connectionMade(self):
        """Keep track of the given protocol."""
        SaveHTTPChannel.protocolInstance = self
        http.HTTPChannel.connectionMade(self)
 
 
class SaveSite(server.Site):
    """A site that let us know when it closed."""
 
    protocol = SaveHTTPChannel
 
    def __init__(self, *args, **kwargs):
        """Create a new instance."""
        server.Site.__init__(self, *args, **kwargs)
        # we disable the timeout in the tests, we will deal with it manually.
        self.timeOut = None

The above defines a protocol that will know the instance that it was used so that we can trigger the time out in a clean up function.

190
191
        if self.site.protocol.protocolInstance:
            self.site.protocol.protocolInstance.timeoutConnection()

This tells the server to time out.

207
208
209
210
211
212
213
214
215
    def teardown_client_server(self):
        """Clean resources."""
        if self.proxy_client is not None:
            self.proxy_client.shutdown()
            return defer.gatherResults([self.ws.stop(),
                               self.proxy_client.shutdown(),
                               self.proxy_client.factory.disconnected_d])
        else:
            return self.ws.stop()

And the clean up function. That is all, now I guess I’ll move to add proxy support or ensure that the test case works on Windows, which is certainly going to be a diff issue.

Read more
mandel

At the moment we are working on providing support for proxy on Ubuntu One. In order to test this correctly I have been setting up a LAN in my office so that I can test as many scenarion as possible. On of those scenarios is the one in which the auth if the proxy uses Active Directory.

Because I use bind9 to set one of my boxed for the DNS I had to dig out how to configure it to work with AD. In order to do that I did the following:

  1. Edited named.conf.local to add a subdomain for the AD machine:

    zone "ad.example.com" {
            type master;
            file "/etc/bind/db.ad.example.com";
            allow-update { 192.168.1.103; };
    };
    
  2. Configured the subzone to work with AD.

    ; BIND data file for local loopback interface
    ;
    $TTL    604800
    @       IN      SOA     ad.example.com. root.ad.example.com. (
                                  2         ; Serial
                             604800         ; Refresh
                              86400         ; Retry
                            2419200         ; Expire
                             604800 )       ; Negative Cache TTL
    ;
    @       IN      NS      ad.marvel.
    @       IN      A       127.0.0.1
    @       IN      AAAA    ::1
    ;
    ; AD horrible domains
    ;
    dc1.ad.example.com.    A       192.168.1.103
    _ldap._tcp.ad.example.com.     SRV     0 0 389  dc1.ad.example.com.
    _kerberos._tcp.ad.example.com.    SRV     0 0 88   dc1.ad.example.com.
    _ldap._tcp.dc._msdcs.ad.example.com.   SRV     0 0 389  dc1.ad.example.com.
    _kerberos._tcp.dc._msdcs.ad.example.com.    SRV     0 0 88   dc1.ad.example.com.
    gc._msdcs.ad.example.com.      SRV     0 0 3268 dc1.ad.example.com.
    

    Note:Is important to remember that the computer name of the server that has the AD role is dc1, if we used a diff name we have to change the configuration accordingly.

  3. Restart the bind9 service:

    sudo /etc/init.d/bind9 restart
    
  4. Install the AD server and specify that you DO NOT want to set that server as a DNS server too.
  5. Set the AD server to use your Ubuntu with your bind9 as the DNS server.

There are lots of things missing if you wanted to use this a set up for a corporate network, but it does the trick in my LAN since I do not have AD duplication or other fancy things. Maybe is useful for you home, who knows..

Read more
mandel

At Ubuntu One we required to be able to use named pipes on windows for IPC. This is a ver normal process in multi-process applications like the one we are going to provide, but in our case we had a twist, we are using twisted. As some of you may know there is not default reactor that would allow you to write a protocol in twisted and allows to use named pipes as the transport of the protocol. Well this was until very recently.

Txnamedpipes (lp:txnamedpipes) is a project that provides a ICOP based reactor that allows to use namedpipes for the transport of your protocol. At the moment we are confident that the implementation would allow you to use spred.pb or a custom protocol on twisted 10 and later on Windows 7 (we have been able to find a number of issues on Windows XP). The following is a small example of a spread.pb service and client that uses a named pipe for communication.

from txnamedpipes.reactor import install
install()
from twisted.spread import pb
from twisted.internet import reactor
 
class Echoer(pb.Root):
    def remote_echo(self, st):
        print 'echoing:', st
        return st
 
if __name__ == '__main__':
    reactor.listenPipe('\\\\.\\pipe\\test_pipe',
                               pb.PBServerFactory(Echoer()))
    reactor.run()
from txnamedpipes.reactor import install
install()
from twisted.spread import pb
from twisted.internet import reactor
from twisted.python import util
 
factory = pb.PBClientFactory()
reactor.connectPipe('\\\\.\\pipe\\test_pipe', factory)
d = factory.getRootObject()
d.addCallback(lambda object: object.callRemote("echo", 
                      "hello network"))
d.addCallback(lambda echo: 'server echoed: '+echo)
d.addErrback(lambda reason: 'error: '+str(reason.value))
d.addCallback(util.println)
d.addCallback(lambda _: reactor.stop())
reactor.run()

The code has the MIT license and we hope that other people find it useful.

Read more
mandel

Pywin32 is a very cool project that allows you to access the win api without having to go through ctypes and deal with all the crazy parameters that COM is famous for. Unfortunately sometimes it has som issues which you face only a few times in your life.

This case I found a bug where GetFileSecurity does not use the GetFileSecurityW method but the w-less version. For those who don’t have to deal with this terrible details, the W usually means that the functions knows how to deal with utf-8 strings (backward compatibility can be a problem sometimes). I have reported the bug but for those that are in a hurry here is the patch:

diff -r 7dce71d174a9 win32/src/win32security.i
--- a/win32/src/win32security.i	Sat Jun 18 10:16:06 2011 -0400
+++ b/win32/src/win32security.i	Mon Jun 20 14:15:27 2011 +0200
@@ -2108,7 +2108,7 @@
  if (!PyWinObject_AsTCHAR(obFname, &fname))
   goto done;
 
-	if (GetFileSecurity(fname, info, psd, dwSize, &dwSize)) {
+	if (GetFileSecurityW(fname, info, psd, dwSize, &dwSize)) {
   PyErr_SetString(PyExc_RuntimeError, "Can't query for SECURITY_DESCRIPTOR size info?");
   goto done;
  }
@@ -2117,7 +2117,7 @@
   PyErr_SetString(PyExc_MemoryError, "allocating SECURITY_DESCRIPTOR");
   goto done;
  }
- if (!GetFileSecurity(fname, info, psd, dwSize, &dwSize)) {
+ if (!GetFileSecurityW(fname, info, psd, dwSize, &dwSize)) {
   PyWin_SetAPIError("GetFileSecurity");
   goto done;
  }
@@ -2153,7 +2153,7 @@
  PSECURITY_DESCRIPTOR psd;
  if (!PyWinObject_AsSECURITY_DESCRIPTOR(obsd, &psd))
   goto done;
-	if (!SetFileSecurity(fname, info, psd)) {
+	if (!SetFileSecurityW(fname, info, psd)) {
   PyWin_SetAPIError("SetFileSecurity");
   goto done;
  }

Read more
mandel

Before I introduce the code, let me say that this is not a 100% exact implementation of the interfaces that can be found in pyinotify but the implementation of a subset that matches my needs. The main idea of creating this post is to give an example of the implementation of such a library for Windows trying to reuse the code that can be found in pyinotify.

Once I have excused my self, let get into the code. First of all, there are a number of classes from pyinotify that we can use in our code. That subset of classes is the below code which I grabbed from pyinotify git:

#!/usr/bin/env python
 
# pyinotify.py - python interface to inotify
# Copyright (c) 2010 Sebastien Martini <seb@dbzteam.org>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
"""Platform agnostic code grabed from pyinotify."""
import logging
import os
 
COMPATIBILITY_MODE = False
 
 
class RawOutputFormat:
    """
    Format string representations.
    """
    def __init__(self, format=None):
        self.format = format or {}
 
    def simple(self, s, attribute):
        if not isinstance(s, str):
            s = str(s)
        return (self.format.get(attribute, '') + s +
                self.format.get('normal', ''))
 
    def punctuation(self, s):
        """Punctuation color."""
        return self.simple(s, 'normal')
 
    def field_value(self, s):
        """Field value color."""
        return self.simple(s, 'purple')
 
    def field_name(self, s):
        """Field name color."""
        return self.simple(s, 'blue')
 
    def class_name(self, s):
        """Class name color."""
        return self.format.get('red', '') + self.simple(s, 'bold')
 
output_format = RawOutputFormat()
 
 
class EventsCodes:
    """
    Set of codes corresponding to each kind of events.
    Some of these flags are used to communicate with inotify, whereas
    the others are sent to userspace by inotify notifying some events.
 
    @cvar IN_ACCESS: File was accessed.
    @type IN_ACCESS: int
    @cvar IN_MODIFY: File was modified.
    @type IN_MODIFY: int
    @cvar IN_ATTRIB: Metadata changed.
    @type IN_ATTRIB: int
    @cvar IN_CLOSE_WRITE: Writtable file was closed.
    @type IN_CLOSE_WRITE: int
    @cvar IN_CLOSE_NOWRITE: Unwrittable file closed.
    @type IN_CLOSE_NOWRITE: int
    @cvar IN_OPEN: File was opened.
    @type IN_OPEN: int
    @cvar IN_MOVED_FROM: File was moved from X.
    @type IN_MOVED_FROM: int
    @cvar IN_MOVED_TO: File was moved to Y.
    @type IN_MOVED_TO: int
    @cvar IN_CREATE: Subfile was created.
    @type IN_CREATE: int
    @cvar IN_DELETE: Subfile was deleted.
    @type IN_DELETE: int
    @cvar IN_DELETE_SELF: Self (watched item itself) was deleted.
    @type IN_DELETE_SELF: int
    @cvar IN_MOVE_SELF: Self (watched item itself) was moved.
    @type IN_MOVE_SELF: int
    @cvar IN_UNMOUNT: Backing fs was unmounted.
    @type IN_UNMOUNT: int
    @cvar IN_Q_OVERFLOW: Event queued overflowed.
    @type IN_Q_OVERFLOW: int
    @cvar IN_IGNORED: File was ignored.
    @type IN_IGNORED: int
    @cvar IN_ONLYDIR: only watch the path if it is a directory (new
                      in kernel 2.6.15).
    @type IN_ONLYDIR: int
    @cvar IN_DONT_FOLLOW: don't follow a symlink (new in kernel 2.6.15).
                          IN_ONLYDIR we can make sure that we don't watch
                          the target of symlinks.
    @type IN_DONT_FOLLOW: int
    @cvar IN_MASK_ADD: add to the mask of an already existing watch (new
                       in kernel 2.6.14).
    @type IN_MASK_ADD: int
    @cvar IN_ISDIR: Event occurred against dir.
    @type IN_ISDIR: int
    @cvar IN_ONESHOT: Only send event once.
    @type IN_ONESHOT: int
    @cvar ALL_EVENTS: Alias for considering all of the events.
    @type ALL_EVENTS: int
    """
 
    # The idea here is 'configuration-as-code' - this way, we get
    # our nice class constants, but we also get nice human-friendly text
    # mappings to do lookups against as well, for free:
    FLAG_COLLECTIONS = {'OP_FLAGS': {
        'IN_ACCESS'        : 0x00000001,  # File was accessed
        'IN_MODIFY'        : 0x00000002,  # File was modified
        'IN_ATTRIB'        : 0x00000004,  # Metadata changed
        'IN_CLOSE_WRITE'   : 0x00000008,  # Writable file was closed
        'IN_CLOSE_NOWRITE' : 0x00000010,  # Unwritable file closed
        'IN_OPEN'          : 0x00000020,  # File was opened
        'IN_MOVED_FROM'    : 0x00000040,  # File was moved from X
        'IN_MOVED_TO'      : 0x00000080,  # File was moved to Y
        'IN_CREATE'        : 0x00000100,  # Subfile was created
        'IN_DELETE'        : 0x00000200,  # Subfile was deleted
        'IN_DELETE_SELF'   : 0x00000400,  # Self (watched item itself)
                                          # was deleted
        'IN_MOVE_SELF'     : 0x00000800,  # Self(watched item itself) was moved
        },
                        'EVENT_FLAGS': {
        'IN_UNMOUNT'       : 0x00002000,  # Backing fs was unmounted
        'IN_Q_OVERFLOW'    : 0x00004000,  # Event queued overflowed
        'IN_IGNORED'       : 0x00008000,  # File was ignored
        },
                        'SPECIAL_FLAGS': {
        'IN_ONLYDIR'       : 0x01000000,  # only watch the path if it is a
                                          # directory
        'IN_DONT_FOLLOW'   : 0x02000000,  # don't follow a symlink
        'IN_MASK_ADD'      : 0x20000000,  # add to the mask of an already
                                          # existing watch
        'IN_ISDIR'         : 0x40000000,  # event occurred against dir
        'IN_ONESHOT'       : 0x80000000,  # only send event once
        },
                        }
 
    def maskname(mask):
        """
        Returns the event name associated to mask. IN_ISDIR is appended to
        the result when appropriate. Note: only one event is returned, because
        only one event can be raised at a given time.
 
        @param mask: mask.
        @type mask: int
        @return: event name.
        @rtype: str
        """
        ms = mask
        name = '%s'
        if mask & IN_ISDIR:
            ms = mask - IN_ISDIR
            name = '%s|IN_ISDIR'
        return name % EventsCodes.ALL_VALUES[ms]
 
    maskname = staticmethod(maskname)
 
 
# So let's now turn the configuration into code
EventsCodes.ALL_FLAGS = {}
EventsCodes.ALL_VALUES = {}
for flagc, valc in EventsCodes.FLAG_COLLECTIONS.items():
    # Make the collections' members directly accessible through the
    # class dictionary
    setattr(EventsCodes, flagc, valc)
 
    # Collect all the flags under a common umbrella
    EventsCodes.ALL_FLAGS.update(valc)
 
    # Make the individual masks accessible as 'constants' at globals() scope
    # and masknames accessible by values.
    for name, val in valc.items():
        globals()[name] = val
        EventsCodes.ALL_VALUES[val] = name
 
 
# all 'normal' events
ALL_EVENTS = reduce(lambda x, y: x | y, EventsCodes.OP_FLAGS.values())
EventsCodes.ALL_FLAGS['ALL_EVENTS'] = ALL_EVENTS
EventsCodes.ALL_VALUES[ALL_EVENTS] = 'ALL_EVENTS'
 
 
class _Event:
    """
    Event structure, represent events raised by the system. This
    is the base class and should be subclassed.
 
    """
    def __init__(self, dict_):
        """
        Attach attributes (contained in dict_) to self.
 
        @param dict_: Set of attributes.
        @type dict_: dictionary
        """
        for tpl in dict_.items():
            setattr(self, *tpl)
 
    def __repr__(self):
        """
        @return: Generic event string representation.
        @rtype: str
        """
        s = ''
        for attr, value in sorted(self.__dict__.items(), key=lambda x: x[0]):
            if attr.startswith('_'):
                continue
            if attr == 'mask':
                value = hex(getattr(self, attr))
            elif isinstance(value, basestring) and not value:
                value = "''"
            s += ' %s%s%s' % (output_format.field_name(attr),
                              output_format.punctuation('='),
                              output_format.field_value(value))
 
        s = '%s%s%s %s' % (output_format.punctuation('<'),
                           output_format.class_name(self.__class__.__name__),
                           s,
                           output_format.punctuation('>'))
        return s
 
    def __str__(self):
        return repr(self)
 
 
class _RawEvent(_Event):
    """
    Raw event, it contains only the informations provided by the system.
    It doesn't infer anything.
    """
    def __init__(self, wd, mask, cookie, name):
        """
        @param wd: Watch Descriptor.
        @type wd: int
        @param mask: Bitmask of events.
        @type mask: int
        @param cookie: Cookie.
        @type cookie: int
        @param name: Basename of the file or directory against which the
                     event was raised in case where the watched directory
                     is the parent directory. None if the event was raised
                     on the watched item itself.
        @type name: string or None
        """
        # Use this variable to cache the result of str(self), this object
        # is immutable.
        self._str = None
        # name: remove trailing '\0'
        d = {'wd': wd,
             'mask': mask,
             'cookie': cookie,
             'name': name.rstrip('\0')}
        _Event.__init__(self, d)
        logging.debug(str(self))
 
    def __str__(self):
        if self._str is None:
            self._str = _Event.__str__(self)
        return self._str
 
 
class Event(_Event):
    """
    This class contains all the useful informations about the observed
    event. However, the presence of each field is not guaranteed and
    depends on the type of event. In effect, some fields are irrelevant
    for some kind of event (for example 'cookie' is meaningless for
    IN_CREATE whereas it is mandatory for IN_MOVE_TO).
 
    The possible fields are:
      - wd (int): Watch Descriptor.
      - mask (int): Mask.
      - maskname (str): Readable event name.
      - path (str): path of the file or directory being watched.
      - name (str): Basename of the file or directory against which the
              event was raised in case where the watched directory
              is the parent directory. None if the event was raised
              on the watched item itself. This field is always provided
              even if the string is ''.
      - pathname (str): Concatenation of 'path' and 'name'.
      - src_pathname (str): Only present for IN_MOVED_TO events and only in
              the case where IN_MOVED_FROM events are watched too. Holds the
              source pathname from where pathname was moved from.
      - cookie (int): Cookie.
      - dir (bool): True if the event was raised against a directory.
 
    """
    def __init__(self, raw):
        """
        Concretely, this is the raw event plus inferred infos.
        """
        _Event.__init__(self, raw)
        self.maskname = EventsCodes.maskname(self.mask)
        if COMPATIBILITY_MODE:
            self.event_name = self.maskname
        try:
            if self.name:
                self.pathname = os.path.abspath(os.path.join(self.path,
                                                             self.name))
            else:
                self.pathname = os.path.abspath(self.path)
        except AttributeError, err:
            # Usually it is not an error some events are perfectly valids
            # despite the lack of these attributes.
            logging.debug(err)
 
 
class _ProcessEvent:
    """
    Abstract processing event class.
    """
    def __call__(self, event):
        """
        To behave like a functor the object must be callable.
        This method is a dispatch method. Its lookup order is:
          1. process_MASKNAME method
          2. process_FAMILY_NAME method
          3. otherwise calls process_default
 
        @param event: Event to be processed.
        @type event: Event object
        @return: By convention when used from the ProcessEvent class:
                 - Returning False or None (default value) means keep on
                 executing next chained functors (see chain.py example).
                 - Returning True instead means do not execute next
                   processing functions.
        @rtype: bool
        @raise ProcessEventError: Event object undispatchable,
                                  unknown event.
        """
        stripped_mask = event.mask - (event.mask & IN_ISDIR)
        maskname = EventsCodes.ALL_VALUES.get(stripped_mask)
        if maskname is None:
            raise ProcessEventError("Unknown mask 0x%08x" % stripped_mask)
 
        # 1- look for process_MASKNAME
        meth = getattr(self, 'process_' + maskname, None)
        if meth is not None:
            return meth(event)
        # 2- look for process_FAMILY_NAME
        meth = getattr(self, 'process_IN_' + maskname.split('_')[1], None)
        if meth is not None:
            return meth(event)
        # 3- default call method process_default
        return self.process_default(event)
 
    def __repr__(self):
        return '<%s>' % self.__class__.__name__
 
 
class ProcessEvent(_ProcessEvent):
    """
    Process events objects, can be specialized via subclassing, thus its
    behavior can be overriden:
 
    Note: you should not override __init__ in your subclass instead define
    a my_init() method, this method will be called automatically from the
    constructor of this class with its optionals parameters.
 
      1. Provide specialized individual methods, e.g. process_IN_DELETE for
         processing a precise type of event (e.g. IN_DELETE in this case).
      2. Or/and provide methods for processing events by 'family', e.g.
         process_IN_CLOSE method will process both IN_CLOSE_WRITE and
         IN_CLOSE_NOWRITE events (if process_IN_CLOSE_WRITE and
         process_IN_CLOSE_NOWRITE aren't defined though).
      3. Or/and override process_default for catching and processing all
         the remaining types of events.
    """
    pevent = None
 
    def __init__(self, pevent=None, **kargs):
        """
        Enable chaining of ProcessEvent instances.
 
        @param pevent: Optional callable object, will be called on event
                       processing (before self).
        @type pevent: callable
        @param kargs: This constructor is implemented as a template method
                      delegating its optionals keyworded arguments to the
                      method my_init().
        @type kargs: dict
        """
        self.pevent = pevent
        self.my_init(**kargs)
 
    def my_init(self, **kargs):
        """
        This method is called from ProcessEvent.__init__(). This method is
        empty here and must be redefined to be useful. In effect, if you
        need to specifically initialize your subclass' instance then you
        just have to override this method in your subclass. Then all the
        keyworded arguments passed to ProcessEvent.__init__() will be
        transmitted as parameters to this method. Beware you MUST pass
        keyword arguments though.
 
        @param kargs: optional delegated arguments from __init__().
        @type kargs: dict
        """
        pass
 
    def __call__(self, event):
        stop_chaining = False
        if self.pevent is not None:
            # By default methods return None so we set as guideline
            # that methods asking for stop chaining must explicitely
            # return non None or non False values, otherwise the default
            # behavior will be to accept chain call to the corresponding
            # local method.
            stop_chaining = self.pevent(event)
        if not stop_chaining:
            return _ProcessEvent.__call__(self, event)
 
    def nested_pevent(self):
        return self.pevent
 
    def process_IN_Q_OVERFLOW(self, event):
        """
        By default this method only reports warning messages, you can
        overredide it by subclassing ProcessEvent and implement your own
        process_IN_Q_OVERFLOW method. The actions you can take on receiving
        this event is either to update the variable max_queued_events in order
        to handle more simultaneous events or to modify your code in order to
        accomplish a better filtering diminishing the number of raised events.
        Because this method is defined, IN_Q_OVERFLOW will never get
        transmitted as arguments to process_default calls.
 
        @param event: IN_Q_OVERFLOW event.
        @type event: dict
        """
        log.warning('Event queue overflowed.')
 
    def process_default(self, event):
        """
        Default processing event method. By default does nothing. Subclass
        ProcessEvent and redefine this method in order to modify its behavior.
 
        @param event: Event to be processed. Can be of any type of events but
                      IN_Q_OVERFLOW events (see method process_IN_Q_OVERFLOW).
        @type event: Event instance
        """
        pass
 
 
class PrintAllEvents(ProcessEvent):
    """
    Dummy class used to print events strings representations. For instance this
    class is used from command line to print all received events to stdout.
    """
    def my_init(self, out=None):
        """
        @param out: Where events will be written.
        @type out: Object providing a valid file object interface.
        """
        if out is None:
            out = sys.stdout
        self._out = out
 
    def process_default(self, event):
        """
        Writes event string representation to file object provided to
        my_init().
 
        @param event: Event to be processed. Can be of any type of events but
                      IN_Q_OVERFLOW events (see method process_IN_Q_OVERFLOW).
        @type event: Event instance
        """
        self._out.write(str(event))
        self._out.write('\n')
        self._out.flush()
 
 
class WatchManagerError(Exception):
    """
    WatchManager Exception. Raised on error encountered on watches
    operations.
 
    """
    def __init__(self, msg, wmd):
        """
        @param msg: Exception string's description.
        @type msg: string
        @param wmd: This dictionary contains the wd assigned to paths of the
                    same call for which watches were successfully added.
        @type wmd: dict
        """
        self.wmd = wmd
        Exception.__init__(self, msg)

Unfortunatly we need to implement the code that talks with the Win32 API to be able to retrieve the events in the file system. In my design this is done by the Watch class that looks like this:

# Author: Manuel de la Pena <manuel@canonical.com>
#
# Copyright 2011 Canonical Ltd.
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 3, as published
# by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranties of
# MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR
# PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program.  If not, see <http://www.gnu.org/licenses/>.
"""File notifications on windows."""
 
import logging
import os
import re
 
import winerror
 
from Queue import Queue, Empty
from threading import Thread
from uuid import uuid4
from twisted.internet import task, reactor
from win32con import (
    FILE_SHARE_READ,
    FILE_SHARE_WRITE,
    FILE_FLAG_BACKUP_SEMANTICS,
    FILE_NOTIFY_CHANGE_FILE_NAME,
    FILE_NOTIFY_CHANGE_DIR_NAME,
    FILE_NOTIFY_CHANGE_ATTRIBUTES,
    FILE_NOTIFY_CHANGE_SIZE,
    FILE_NOTIFY_CHANGE_LAST_WRITE,
    FILE_NOTIFY_CHANGE_SECURITY,
    OPEN_EXISTING
)
from win32file import CreateFile, ReadDirectoryChangesW
from ubuntuone.platform.windows.pyinotify import (
    Event,
    WatchManagerError,
    ProcessEvent,
    PrintAllEvents,
    IN_OPEN,
    IN_CLOSE_NOWRITE,
    IN_CLOSE_WRITE,
    IN_CREATE,
    IN_ISDIR,
    IN_DELETE,
    IN_MOVED_FROM,
    IN_MOVED_TO,
    IN_MODIFY,
    IN_IGNORED
)
from ubuntuone.syncdaemon.filesystem_notifications import (
    GeneralINotifyProcessor
)
from ubuntuone.platform.windows.os_helper import (
    LONG_PATH_PREFIX,
    abspath,
    listdir
)
 
# constant found in the msdn documentation:
# http://msdn.microsoft.com/en-us/library/ff538834(v=vs.85).aspx
FILE_LIST_DIRECTORY = 0x0001
FILE_NOTIFY_CHANGE_LAST_ACCESS = 0x00000020
FILE_NOTIFY_CHANGE_CREATION = 0x00000040
 
# a map between the few events that we have on windows and those
# found in pyinotify
WINDOWS_ACTIONS = {
  1: IN_CREATE,
  2: IN_DELETE,
  3: IN_MODIFY,
  4: IN_MOVED_FROM,
  5: IN_MOVED_TO
}
 
# translates quickly the event and it's is_dir state to our standard events
NAME_TRANSLATIONS = {
    IN_OPEN: 'FS_FILE_OPEN',
    IN_CLOSE_NOWRITE: 'FS_FILE_CLOSE_NOWRITE',
    IN_CLOSE_WRITE: 'FS_FILE_CLOSE_WRITE',
    IN_CREATE: 'FS_FILE_CREATE',
    IN_CREATE | IN_ISDIR: 'FS_DIR_CREATE',
    IN_DELETE: 'FS_FILE_DELETE',
    IN_DELETE | IN_ISDIR: 'FS_DIR_DELETE',
    IN_MOVED_FROM: 'FS_FILE_DELETE',
    IN_MOVED_FROM | IN_ISDIR: 'FS_DIR_DELETE',
    IN_MOVED_TO: 'FS_FILE_CREATE',
    IN_MOVED_TO | IN_ISDIR: 'FS_DIR_CREATE',
}
 
# the default mask to be used in the watches added by the FilesystemMonitor
# class
FILESYSTEM_MONITOR_MASK = FILE_NOTIFY_CHANGE_FILE_NAME | \
    FILE_NOTIFY_CHANGE_DIR_NAME | \
    FILE_NOTIFY_CHANGE_ATTRIBUTES | \
    FILE_NOTIFY_CHANGE_SIZE | \
    FILE_NOTIFY_CHANGE_LAST_WRITE | \
    FILE_NOTIFY_CHANGE_SECURITY | \
    FILE_NOTIFY_CHANGE_LAST_ACCESS
 
 
# The implementation of the code that is provided as the pyinotify
# substitute
class Watch(object):
    """Implement the same functions as pyinotify.Watch."""
 
    def __init__(self, watch_descriptor, path, mask, auto_add,
        events_queue=None, exclude_filter=None, proc_fun=None):
        super(Watch, self).__init__()
        self.log = logging.getLogger('ubuntuone.platform.windows.' +
            'filesystem_notifications.Watch')
        self._watching = False
        self._descriptor = watch_descriptor
        self._auto_add = auto_add
        self.exclude_filter = None
        self._proc_fun = proc_fun
        self._cookie = None
        self._source_pathname = None
        # remember the subdirs we have so that when we have a delete we can
        # check if it was a remove
        self._subdirs = []
        # ensure that we work with an abspath and that we can deal with
        # long paths over 260 chars.
        self._path = os.path.abspath(path)
        if not self._path.startswith(LONG_PATH_PREFIX):
            self._path = LONG_PATH_PREFIX + self._path
        self._mask = mask
        # lets make the q as big as possible
        self._raw_events_queue = Queue()
        if not events_queue:
            events_queue = Queue()
        self.events_queue = events_queue
 
    def _path_is_dir(self, path):
        """"Check if the path is a dir and update the local subdir list."""
        self.log.debug('Testing if path "%s" is a dir', path)
        is_dir = False
        if os.path.exists(path):
            is_dir = os.path.isdir(path)
        else:
            self.log.debug('Path "%s" was deleted subdirs are %s.',
                path, self._subdirs)
            # we removed the path, we look in the internal list
            if path in self._subdirs:
                is_dir = True
                self._subdirs.remove(path)
        if is_dir:
            self.log.debug('Adding %s to subdirs %s', path, self._subdirs)
            self._subdirs.append(path)
        return is_dir
 
    def _process_events(self):
        """Process the events form the queue."""
        # we transform the events to be the same as the one in pyinotify
        # and then use the proc_fun
        while self._watching or not self._raw_events_queue.empty():
            file_name, action = self._raw_events_queue.get()
            # map the windows events to the pyinotify ones, tis is dirty but
            # makes the multiplatform better, linux was first :P
            is_dir = self._path_is_dir(file_name)
            if os.path.exists(file_name):
                is_dir = os.path.isdir(file_name)
            else:
                # we removed the path, we look in the internal list
                if file_name in self._subdirs:
                    is_dir = True
                    self._subdirs.remove(file_name)
            if is_dir:
                self._subdirs.append(file_name)
            mask = WINDOWS_ACTIONS[action]
            head, tail = os.path.split(file_name)
            if is_dir:
                mask |= IN_ISDIR
            event_raw_data = {
                'wd': self._descriptor,
                'dir': is_dir,
                'mask': mask,
                'name': tail,
                'path': head.replace(self.path, '.')
            }
            # by the way in which the win api fires the events we know for
            # sure that no move events will be added in the wrong order, this
            # is kind of hacky, I dont like it too much
            if WINDOWS_ACTIONS[action] == IN_MOVED_FROM:
                self._cookie = str(uuid4())
                self._source_pathname = tail
                event_raw_data['cookie'] = self._cookie
            if WINDOWS_ACTIONS[action] == IN_MOVED_TO:
                event_raw_data['src_pathname'] = self._source_pathname
                event_raw_data['cookie'] = self._cookie
            event = Event(event_raw_data)
            # FIXME: event deduces the pathname wrong and we need manually
            # set it
            event.pathname = file_name
            # add the event only if we do not have an exclude filter or
            # the exclude filter returns False, that is, the event will not
            # be excluded
            if not self.exclude_filter or not self.exclude_filter(event):
                self.log.debug('Addding event %s to queue.', event)
                self.events_queue.put(event)
 
    def _watch(self):
        """Watch a path that is a directory."""
        # we are going to be using the ReadDirectoryChangesW whihc requires
        # a direcotry handle and the mask to be used.
        handle = CreateFile(
            self._path,
            FILE_LIST_DIRECTORY,
            FILE_SHARE_READ | FILE_SHARE_WRITE,
            None,
            OPEN_EXISTING,
            FILE_FLAG_BACKUP_SEMANTICS,
            None
        )
        self.log.debug('Watchng path %s.', self._path)
        while self._watching:
            # important information to know about the parameters:
            # param 1: the handle to the dir
            # param 2: the size to be used in the kernel to store events
            # that might be lost whilw the call is being performed. This
            # is complicates to fine tune since if you make lots of watcher
            # you migh used to much memory and make your OS to BSOD
            results = ReadDirectoryChangesW(
                handle,
                1024,
                self._auto_add,
                self._mask,
                None,
                None
            )
            # add the diff events to the q so that the can be processed no
            # matter the speed.
            for action, file in results:
                full_filename = os.path.join(self._path, file)
                self._raw_events_queue.put((full_filename, action))
                self.log.debug('Added %s to raw events queue.',
                    (full_filename, action))
 
    def start_watching(self):
        """Tell the watch to start processing events."""
        # get the diff dirs in the path
        for current_child in listdir(self._path):
            full_child_path = os.path.join(self._path, current_child)
            if os.path.isdir(full_child_path):
                self._subdirs.append(full_child_path)
        # start to diff threads, one to watch the path, the other to
        # process the events.
        self.log.debug('Sart watching path.')
        self._watching = True
        watch_thread = Thread(target=self._watch,
            name='Watch(%s)' % self._path)
        process_thread = Thread(target=self._process_events,
            name='Process(%s)' % self._path)
        process_thread.start()
        watch_thread.start()
 
    def stop_watching(self):
        """Tell the watch to stop processing events."""
        self._watching = False
        self._subdirs = []
 
    def update(self, mask, proc_fun=None, auto_add=False):
        """Update the info used by the watcher."""
        self.log.debug('update(%s, %s, %s)', mask, proc_fun, auto_add)
        self._mask = mask
        self._proc_fun = proc_fun
        self._auto_add = auto_add
 
    @property
    def path(self):
        """Return the patch watched."""
        return self._path
 
    @property
    def auto_add(self):
        return self._auto_add
 
    @property
    def proc_fun(self):
        return self._proc_fun
 
 
class WatchManager(object):
    """Implement the same functions as pyinotify.WatchManager."""
 
    def __init__(self, exclude_filter=lambda path: False):
        """Init the manager to keep trak of the different watches."""
        super(WatchManager, self).__init__()
        self.log = logging.getLogger('ubuntuone.platform.windows.'
            + 'filesystem_notifications.WatchManager')
        self._wdm = {}
        self._wd_count = 0
        self._exclude_filter = exclude_filter
        self._events_queue = Queue()
        self._ignored_paths = []
 
    def stop(self):
        """Close the manager and stop all watches."""
        self.log.debug('Stopping watches.')
        for current_wd in self._wdm:
            self._wdm[current_wd].stop_watching()
            self.log.debug('Watch for %s stopped.', self._wdm[current_wd].path)
 
    def get_watch(self, wd):
        """Return the watch with the given descriptor."""
        return self._wdm[wd]
 
    def del_watch(self, wd):
        """Delete the watch with the given descriptor."""
        try:
            watch = self._wdm[wd]
            watch.stop_watching()
            del self._wdm[wd]
            self.log.debug('Watch %s removed.', wd)
        except KeyError, e:
            logging.error(str(e))
 
    def _add_single_watch(self, path, mask, proc_fun=None, auto_add=False,
        quiet=True, exclude_filter=None):
        self.log.debug('add_single_watch(%s, %s, %s, %s, %s, %s)', path, mask,
            proc_fun, auto_add, quiet, exclude_filter)
        self._wdm[self._wd_count] = Watch(self._wd_count, path, mask,
            auto_add, events_queue=self._events_queue,
            exclude_filter=exclude_filter, proc_fun=proc_fun)
        self._wdm[self._wd_count].start_watching()
        self._wd_count += 1
        self.log.debug('Watch count increased to %s', self._wd_count)
 
    def add_watch(self, path, mask, proc_fun=None, auto_add=False,
        quiet=True, exclude_filter=None):
        if hasattr(path, '__iter__'):
            self.log.debug('Added collection of watches.')
            # we are dealing with a collection of paths
            for current_path in path:
                if not self.get_wd(current_path):
                    self._add_single_watch(current_path, mask, proc_fun,
                        auto_add, quiet, exclude_filter)
        elif not self.get_wd(path):
            self.log.debug('Adding single watch.')
            self._add_single_watch(path, mask, proc_fun, auto_add,
                quiet, exclude_filter)
 
    def update_watch(self, wd, mask=None, proc_fun=None, rec=False,
                     auto_add=False, quiet=True):
        try:
            watch = self._wdm[wd]
            watch.stop_watching()
            self.log.debug('Stopped watch on %s for update.', watch.path)
            # update the data and restart watching
            auto_add = auto_add or rec
            watch.update(mask, proc_fun=proc_fun, auto_add=auto_add)
            # only start the watcher again if the mask was given, otherwhise
            # we are not watchng and therefore do not care
            if mask:
                watch.start_watching()
        except KeyError, e:
            self.log.error(str(e))
            if not quiet:
                raise WatchManagerError('Watch %s was not found' % wd, {})
 
    def get_wd(self, path):
        """Return the watcher that is used to watch the given path."""
        for current_wd in self._wdm:
            if self._wdm[current_wd].path in path and \
                self._wdm[current_wd].auto_add:
                return current_wd
 
    def get_path(self, wd):
        """Return the path watched by the wath with the given wd."""
        watch_ = self._wmd.get(wd)
        if watch:
            return watch.path
 
    def rm_watch(self, wd, rec=False, quiet=True):
        """Remove the the watch with the given wd."""
        try:
            watch = self._wdm[wd]
            watch.stop_watching()
            del self._wdm[wd]
        except KeyrError, err:
            self.log.error(str(err))
            if not quiet:
                raise WatchManagerError('Watch %s was not found' % wd, {})
 
    def rm_path(self, path):
        """Remove a watch to the given path."""
        # it would be very tricky to remove a subpath from a watcher that is
        # looking at changes in ther kids. To make it simpler and less error
        # prone (and even better performant since we use less threads) we will
        # add a filter to the events in the watcher so that the events from
        # that child are not received :)
        def ignore_path(event):
            """Ignore an event if it has a given path."""
            is_ignored = False
            for ignored_path in self._ignored_paths:
                if ignore_path in event.pathname:
                    return True
            return False
 
        wd = self.get_wd(path)
        if wd:
            if self._wdm[wd].path == path:
                self.log.debug('Removing watch for path "%s"', path)
                self.rm_watch(wd)
            else:
                self.log.debug('Adding exclude filter for "%s"', path)
                # we have a watch that cotains the path as a child path
                if not path in self._ignored_paths:
                    self._ignored_paths.append(path)
                # FIXME: This assumes that we do not have other function
                # which in our usecase is correct, but what is we move this
                # to other projects evet?!? Maybe using the manager
                # exclude_filter is better
                if not self._wdm[wd].exclude_filter:
                    self._wdm[wd].exclude_filter = ignore_path
 
    @property
    def watches(self):
        """Return a reference to the dictionary that contains the watches."""
        return self._wdm
 
    @property
    def events_queue(self):
        """Return the queue with the events that the manager contains."""
        return self._events_queue
 
 
class Notifier(object):
    """
    Read notifications, process events. Inspired by the pyinotify.Notifier
    """
 
    def __init__(self, watch_manager, default_proc_fun=None, read_freq=0,
                 threshold=10, timeout=-1):
        """Init to process event according to the given timeout & threshold."""
        super(Notifier, self).__init__()
        self.log = logging.getLogger('ubuntuone.platform.windows.'
            + 'filesystem_notifications.Notifier')
        # Watch Manager instance
        self._watch_manager = watch_manager
        # Default processing method
        self._default_proc_fun = default_proc_fun
        if default_proc_fun is None:
            self._default_proc_fun = PrintAllEvents()
        # Loop parameters
        self._read_freq = read_freq
        self._threshold = threshold
        self._timeout = timeout
 
    def proc_fun(self):
        return self._default_proc_fun
 
    def process_events(self):
        """
        Process the event given the threshold and the timeout.
        """
        self.log.debug('Processing events with threashold: %s and timeout: %s',
            self._threshold, self._timeout)
        # we will process an amount of events equal to the threshold of
        # the notifier and will block for the amount given by the timeout
        processed_events = 0
        while processed_events < self._threshold:
            try:
                raw_event = None
                if not self._timeout or self._timeout < 0:
                    raw_event = self._watch_manager.events_queue.get(
                        block=False)
                else:
                    raw_event = self._watch_manager.events_queue.get(
                        timeout=self._timeout)
                watch = self._watch_manager.get_watch(raw_event.wd)
                if watch is None:
                    # Not really sure how we ended up here, nor how we should
                    # handle these types of events and if it is appropriate to
                    # completly skip them (like we are doing here).
                    self.log.warning('Unable to retrieve Watch object '
                        + 'associated to %s', raw_event)
                    processed_events += 1
                    continue
                if watch and watch.proc_fun:
                    self.log.debug('Executing proc_fun from watch.')
                    watch.proc_fun(raw_event)  # user processings
                else:
                    self.log.debug('Executing default_proc_fun')
                    self._default_proc_fun(raw_event)
                processed_events += 1
            except Empty:
                # increase the number of processed events, and continue
                processed_events += 1
                continue
 
    def stop(self):
        """Stop processing events and the watch manager."""
        self._watch_manager.stop()

While one of the threads is retrieving the events from the file system, the second one process them so that the will be exposed as pyinotify events. I have done so because I did not want to deal with OVERLAP structures for asyn operations in Win32 and because I wanted to use pyinotify events so that if someone with experience in pyinotify looks at the output, he can easily understand it. I really like this approach because it allowed me to reuse a fair amount of logic hat we had in the Ubuntu client and to approach the port in a very TDD way since the tests I’ve used are the same ones as the ones found on Ubuntu :)

Read more