on Sep 11th, 2008Prototype based programming in python

Revision #2:
I updated my code with some more bells and whistles, also removing the double-underscore-method/attributes and operator overloading as some people commented so “nicely” on.

prototype.py

class Object(object):
    def __init__(self):
        self._parent = None
        self._methods = {}

    def clone(self):
        o = Object()
        o._parent = self
        return o

    def _getmethod(self, name):
        try:
            return self._methods[name]
        except KeyError:
            return self._parent._getmethod(name)

    def __getattr__(self, name):
        method = self._getmethod(name)

        if isinstance(method, Method) and method.object is not self:
            self._methods[name] = self._methods[name] = Method(method.method, self)

        return self._methods[name]

class Method(object):
    def __init__(self, method, object = None):
        self.method = method
        self.object = object

    def __call__(self, *args, **kw):
        return self.method(self.object, *args, **kw)

def method(obj):
    def decorator(f):
        obj._methods[f.__name__] = Method(f, obj)
        return obj._methods[f.__name__]
    return decorator

def siblings(a, b):
    return a._parent is b._parent

def mixin(into, mixin):
    if isinstance(mixin, Object):
        for name, wrapper in mixin._methods.iteritems():
            into._methods[name] = wrapper

def child_of(child, parent):
    comp = child

    while comp is not None:
        if comp is parent:
            return True

        comp = comp._parent

    return False

def sibling(obj):
    return obj._parent.clone()

def is_prototype(obj):
    return typeof(obj) is Object

def clone():
    return Object()

and, demo.py

import prototype as p
from prototype import method

# Basic usage example

animal = p.clone()
animal.name = None

cat = animal.clone()
cat.name = "Cat"
cat.color = None

@method(cat)
def meow(self, times = 1):
   print ("%s says meow, and is of the color %s" % (self.name, self.color))*times

nermal = cat.clone()
nermal.name = "Nermal"
nermal.color = "grey"

garfield = p.sibling(nermal) # same as cat.clone() or nermal._parent.clone()
garfield.name = "Garfield"
garfield.color = "orange"

print cat.name
print garfield.name
print nermal.name

cat.meow()
garfield.meow()
nermal.meow()

kitten_1 = garfield.clone()
kitten_1.name = "Kiten #1"
kitten_1.color = "brown"

kitten_2 = p.sibling(kitten_1)
kitten_2.name = "Kitten #2"
kitten_2.color = "White"
garfield.name = "Garfield ..."
garfield.color = "Orange ..."
cat.name = "Original cat"
cat.color = "Still none"

cat.meow()
garfield.meow()
nermal.meow()
kitten_1.meow()
kitten_2.meow()

# Mixin example

mixin = p.clone()

@method(mixin)
def say_name_two_times(self):
    print self.name*2

@method(mixin)
def say_name_three_times(self):
    print self.name*5

p.mixin(garfield, mixin)
garfield.say_name_two_times()
garfield.say_name_three_times()

print p.child_of(garfield, cat)
print p.child_of(nermal, garfield)
print p.siblings(nermal, garfield)

, will print something like this:

Cat
Garfield
Nermal
Cat says meow, and is of the color None
Garfield says meow, and is of the color orange
Nermal says meow, and is of the color grey
Original cat says meow, and is of the color Still none
Garfield ... says meow, and is of the color Orange ...
Nermal says meow, and is of the color grey
Kiten #1 says meow, and is of the color brown
Kitten #2 says meow, and is of the color White
Garfield ...Garfield ...
Garfield ...Garfield ...Garfield ...Garfield ...Garfield ...
True
False
True


I love python.

Original version of the code
Revision #1:

class Object(object):
        def __init__(self):
                self.__parent__ = None
                self.__methods__ = {}

        def clone(self):
                o = Object()
                o.__parent__ = self
                return o

        def __pos__(self):
                return self.clone()

        def __getmethod__(self, name):
                try:
                        return self.__methods__[name]
                except KeyError:
                        return self.__parent__.__getmethod__(name)

        def __getattr__(self, name):
                method = self.__getmethod__(name)

                if isinstance(method, Method) and method.object is not self:
                        method = self.__methods__[name] = Method(method.method, self)

                return method

class Method(object):
        def __init__(self, method, object = None):
                self.method = method
                self.object = object

        def __call__(self, *args, **kw):
                return self.method(self.object, *args, **kw)

        def __isbound__(self):
                return object != None

def method(obj):
        def decorator(f):
                _method = Method(f, obj)
                obj.__methods__[f.__name__] = _method
                return _method

        return decorator

if __name__ == "__main__":
        animal = Object()
        cat = +animal
        cat.name = None

        @method(cat)
        def meow(self, times=1):
                print ("%s says meow " % self.name)*times

        cat.meow()

        fluffy = +cat
        fluffy.name = "fluffy"
        fluffy.meow()

        cat.name = "original cat"
        cat.meow()
        fluffy.meow()

        puffy = +fluffy
        puffy.name = "puffy"
        puffy.meow()
        fluffy.meow()
        cat.meow()

The above prints:

None says meow
fluffy says meow
original cat says meow
fluffy says meow
puffy says meow
fluffy says meow
original cat says meow

A very simple example on how to work with prototype-based programming in Python.

on Sep 7th, 2008Present

31478_1220186639.jpg
30th August 2008.

on Sep 6th, 2008Outputting html from python programmatically

After being stuck with the drudgery of string interpolation when outputting HTML from python I figured there had to be some better way to do it, yes? The combination of a single decorator and a couple of functions proved to be the solution, here’s the code:

Wordpress decided to f*ck up my syntax highlighting in this snippet… grr…

def dec(f):
	def inner(*args, **kw):
		attrs = ''
		for a, v in kw.iteritems():
			attrs += ' %s="%s"' % (a, v)

		s = "<%s%s>\n" % (f.__name__, attrs)
		for a in args:
			s += a
		return s + "\n" % f.__name__
	return inner

@dec
def html(): pass
@dec
def head(): pass
@dec
def title(): pass
@dec
def body(): pass
@dec
def div(): pass
@dec
def p(): pass
@dec
def h1(): pass
@dec
def h2(): pass

I haven’t included all tags for readability here, but as you can see they’re not very hard to add yourself. Here’s some basic usage:

enable_subheaders = True
nothing = ''

print html(
	head(
		title('Hey this is a test')
	),
	body(
		div(
			h1('Some header', style='font-weight:bold;'),
			h2('A subheader ;D yay!') if enable_subheaders else nothing,
			p('Some text', 'some other text...'),

			h2('Another subheader') if enable_subheaders else nothing,
			p('Some more text...')
		)
	)
)

Which will print something like this:

<html>
<head>
<title>
Hey this is a test</title>
</head>
<body>
<div>
<h1 style=\"font-weight:bold;\">
Some header</h1>
<h2>
A subheader ;D yay!</h2>
<p>
Some textsome other text...</p>
<h2>
Another subheader</h2>
<p>
Some more text...</p>
</div>
</body>
</html>

Simple, but effective.

Those of you that are waiting for the fourth, and last, part of my generator article series - I’ve been knee deep in work this week, but I’m hoping I will be able to complete it till tomorrow

on Sep 3rd, 2008How super() should be used when calling a parent’s method

Edit: You learn something new every day, it turns I was the disillusioned one, the correct way to call super is actually this:

class Demo(object):
        def __init__(self):
                print "From Demo"

class Test(Demo):
        def __init__(self):
                super(Test, self).__init__()
                print "From Test"
o
o = Test()

Because if you do this:

class Demo(object):
        def __init__(self):
                print "From Demo"

class Test(Demo):
        def __init__(self):
                super(self.__class__, self).__init__()
                print "From Test"

o = Test()

More then one level of inheritance will make it go into an infinite loop, thanks to Manuel and Malcom that pointed it out to me.

on Sep 1st, 2008Managing asynchronous operations with python generators (part3)


Part 1: Introduction to generators in Python
Part 2: Indepth generator usage in Python (part2)
Part 3: Managing asynchronous operations with python generators (part3)

You can download all the code for this part here: http://totmacher.eu/upload/generators.tar.gz

I’m sorry, I lied to you guys - this part will not contain a real world example because I felt there were to many new concepts introduced. In this part we’ll go through something called asynchronous programming and how to create a scheduler to keep track of all the tasks we’re performing.

We’ll start of with the Task-base class, it wraps a generator exposing it’s own .next()-method and a .suspended()-method that is used by our scheduler to decide if we should run the task or not. A task can basically be anything, but more often then not it’s some type of operation that involves a delay we can’t control when it’s complete - such as a network call, disk access or something similar.

The directory structure for all of these examples is the following:

/
	/demo
		__init__.py
		tasks.py
		scheduler.py
	example_N.py

demo/tasks.py:

class Task(object):
	def __init__(self, generator = None):
		self.generator = generator

	def suspended(self):
		return False

	def next(self):
		return self.generator.next()

No doubt a fairly simple class, three methods spanning just one line each. Either download the entire code for these examples or write it yourself and put it in the demo/tasks.py file.

Lets put together our scheduler, the scheduler isn’t a class - it’s just a module with two variables and two functions, it to is fairly simple and spans just about twenty five lines:

demo/scheduler.py:

queue = []

def add(task):
	global queue
	queue.append(task)

def run():
	global queue

	stack = queue
	queue = []

	while len(stack) > 0:
		task = stack.pop(0)

		try:
			if not task.suspended():
				task.next()
		except StopIteration:
			continue

		queue.append(task)

		if len(stack) == 0 and len(queue) > 0:
			stack = queue
			queue = []

One global list named queue, and two functions: add() that just appends an element on the queue and run() that runs through the queue until there are no more generators left in it. Let’s go through the run() function line by line:

  • stack = queue - We store the queue in a temporary local variable
  • queue = [] - Set queue to an empty list
  • task = stack.pop(0) - Pop the first element of the stack
  • if not task.suspended(): - If the task isn’t suspended continue
  • task.next() - Call .next() on the task which in turn forwards the call to the generator the task wraps
  • except StopIteration: - If we get a StopIteration exception from the executing task/generator we should just continue with the next one (this will push the task that was responsible for the exception out of the queue)
  • queue.append(task) - Add the task in question to the queue again
  • if len(stack) == 0 and len(queue) > 0: - If the stack is empty and the queue isn’t
  • stack = queue - Put the queue in the stack
  • queue = [] - And clear the queue

Let’s put this together in a simple example so we can see that our scheduler works as it should:

example_1.py

from demo import tasks, scheduler

def echo(word):
	while True:
		print word
		yield

scheduler.add(
	tasks.Task(
		echo("Hello")))

scheduler.add(
	tasks.Task(
		echo("World!")))

scheduler.run()

If we run this from the the terminal with python example_1.py we will get Hello and World! looped over our screen for eternity:

...
Hello
World!
Hello
World!
Hello
World!
Hello
World!
Hello
World!
...

Maybe not so exiting, but if you look closely you will see that Hello and World! alternate between each other, meaning that when the generator that prints Hello yields the first time the scheduler will take control passing execution to the generator printing World!, when that has printed out its message the stack is empty so the scheduler fills the stack with the queue again and beings from the top, and it all begins again.

Let’s add a new function called sleeper() to the mix, here you have example_2.py:

from time import sleep
from demo import tasks, scheduler

def echo(word):
	while True:
		print word
		yield

def sleeper(seconds):
	while True:
		sleep(seconds)
		yield

scheduler.add(
	tasks.Task(
		echo("Hello")))

scheduler.add(
	tasks.Task(
		sleeper(1)))

scheduler.add(
	tasks.Task(
		echo("World!")))

scheduler.add(
	tasks.Task(
		sleeper(1)))

scheduler.run()

You should be able to figure out what happens when we run this example from the terminal, Hello will print - and then yield and the scheduler passes execution to the first sleeper, pausing for one second and then passing it to World! printing that which yields and then execution gets passed to our second sleeper generator that pauses execution for one second, this is mainly so you can see that they actually take turn instead of some mindless spaming from an infinite repeating loop without pauses.

Let’s make use of that .suspended()-method on the Task class shall we? Let’s create a generator that gets called every second time the scheduler asks for it instead of every iteration, here’s the code - add it to the end of demo/tasks.py:

class EvenTask(Task):
	def __init__(self, *args, **kwargs):
		super(self.__class__, self).__init__(*args, **kwargs)
		self.counter = 0

	def suspended(self):
		self.counter += 1
		return self.counter % 2 != 0

If you don’t understand the super(self.__class__, self).__init__(*args, **kwargs)-line it’s how you call the parent classes .__init__()-method in Python, .suspended() increases the counter with +1 each time it’s called but only returns false when we’re at an even number, allowing the task to be executed every other time.

Here’s the next example code, example_3.py

from time import sleep
from demo import tasks, scheduler

def echo(word):
	while True:
		print word
		yield

def sleeper(seconds):
	while True:
		sleep(seconds)
		yield

scheduler.add(
	tasks.Task(
		echo("Hello")))

scheduler.add(
	tasks.Task(
		sleeper(1)))

scheduler.add(
	tasks.EvenTask(
		echo("World!")))

scheduler.add(
	tasks.Task(
		sleeper(1)))

scheduler.run()

It’s identical to example_2.py except that the echo(”World!”)-task now is of the type EvenTask instead of Task, if you run this code you will get Hello Hello World! printed, because the World!-task will skip every other time.

...
Hello
Hello
World!
Hello
Hello
World!
Hello
Hello
World!
...

So, I’ll take one last example before calling it for the day - if you remember the line “except StopIteration:” from the scheduler.run()-function we can use that to make a generator drop out of the scheduler’s run()-loop, i present to you the marvelous example_4.py:

from time import sleep
from demo import tasks, scheduler

def echo(word):
	while True:
		print word
		yield

def sleeper(seconds):
	while True:
		sleep(seconds)
		yield

def echo_once(word):
	while True:
		print word
		yield
		raise StopIteration

scheduler.add(
	tasks.Task(
		echo("Hello")))

scheduler.add(
	tasks.Task(
		echo_once("World!")))

scheduler.add(
	tasks.Task(
		sleeper(1)))

scheduler.run()

Running this from the terminal will yield (no pun intended) you something like this, only printing “World!” once:

Hello
World!
Hello
Hello
Hello
...

In the next part, number four I will show you a real world example with asynchronous network i/o, i just wanted to introduce the concept of a scheduler before jumping into a more advanced example.

on Aug 31st, 2008Three great python lectures

Maybe these are old news for a lot of people in the community, but I just found these three great lectures about Python over at google code university:

These are some of the best programming language lectures I’ve ever had the privilege to listen to, i warmly recommend them to anyone with even the slightest interest in Python.

on Aug 30th, 2008Indepth generator usage in Python (part2)


Part 1: Introduction to generators in Python
Part 2: Indepth generator usage in Python (part2)
Part 3: Managing asynchronous operations with python generators (part3)

After my last post on generators in Python I realized I missed go through one thing that I wanted to mention in the first part, namely how the return keyword interfaces with yield and generators, take this example function and its usage:

def count_to_3or4():
	counter = 0

	while counter < 3:
		counter += 1
		yield counter

	return counter+1

c = count_to_3or4()
print c.next() # 1
print c.next() # 2
print c.next() # 3
print c.next() # 4, from return - or ?

If you’ve read the previous post, or have a basic understanding of generators you would probably guess that 1, 2, 3 will print from the thee first .next()-calls - but you would be wrong. If you try to run the above code you will get this thrown back in your face:

  File "generators.py", line 15
    return counter+1
SyntaxError: 'return' with argument inside generator

So you can’t use return in generators (functions with yield) - well, yes you can - you just can’t use return with a value attached to it. If you call return within a generator function it will exit and any further calls to .next() will throw a StopIteration exception, take this example code:

def count_to_2or3():
	counter = 0

	while counter < 3:
		counter += 1
		yield counter

		if counter is 2:
			return

c = count_to_2or3()
print c.next() # 1
print c.next() # 2
print c.next() # 3, or ?

It will print 1 and 2, when you call .next() a third time it will hit return (since counter == 2, the if-clause evaluates to true) and throw a StopIteration exception. Basically “return” inside a generator-function does what “break” does inside a loop.

Performance

When you’re using generators as a type of iterator together with for (or manually, for that matter) working with large datasets you will see a substantial performance increase over list-generating functions. These two functions will generate the exact same output, but one will be significantly faster and use less memory:

def count_to_list(stop):
	_list = []
	counter = 0

	while counter < stop:
		counter += 1
		_list.append(counter)

	return _list

def count_to_generator(stop):
	counter = 0

	while counter < stop:
		counter += 1
		yield counter

The first function will generate a list of numbers (which takes quite some time and memory) and then return that while the second function, our generator will produce one number each time .next() is called on it and only consume as much memory as one integer take up while also being a fair bit faster, running this on my VPS yields (again, no pun intended) these results:

fredrik@holmstrom:~/python/generators$ time python list.py

real    0m0.611s
user    0m0.540s
sys     0m0.040s
fredrik@holmstrom:~/python/generators$ time python generator.py

real    0m0.385s
user    0m0.380s
sys     0m0.000s

I didn’t measure memory usage here, but trust me - generator.py will consume a lot less memory, this technique is also called “lazy evaluation” in proper CS terms - there’s a lot more information on this topic alone, but this will do for now.

Advanced usage

As I mentioned in the history introduction in my previous post about generators, Python 2.5 gave generators a substantial usability boost allowing us to pass information back into the function through the yield statement and the .send() and .throw() methods on the generator-object. send() works exactly like next() except that you can pass a value back into the function as it’s first argument, but there are a few caveats you should look out for - take this snippet of code:

def echo():
	while True:
		print yield

Will give you the following SyntaxError:

  File "explained.py", line 3
    print yield
              ^

SyntaxError: invalid syntax

Changing the print yield to this:

def echo():
	while True:
		val = yield
		print val

Will make the code execute properly, however this seems pretty non-pythonic having to store the variable we want to store the result in a temporary variable - so instead we can do this:

def echo():
	while True:
		print (yield)

Wrapping the yield in parenthesizes will allow you to use the result of it directly instead of storing it in a temporary variable, so let’s put our echo() generator to use:

def echo():
	while True:
		print (yield)

e = echo()
e.send("Hello")
e.send("World!")

But, running this code will show you the second caveat of trying to pass values back into the generator function, this TypeError will be thrown in your face if you run this code:

Traceback (most recent call last):
  File "generator.py", line 6, in 
    e.send(”Hello”)
TypeError: can’t send non-None value to a just-started generator

Remember how I said that yield paused the execution of the generator function and that when you call the generator function (in this case e = echo()) no code is yet to be executed until you call .next() on your generator-object? So if .send() can be used to pass data back into a yield statement while the generator is paused, we can’t call .send() when no code has been executed and no yield statement has paused the generator, right?

What this means in practice is that you either have to call .next() or .send(None) the first time you call a generator, and when the generator reaches its first yield statement it will pause execution waiting for another call to .send() (or .next() if you don’t want to pass any data back) that will pass data back into it at the yield statement, confusing? So changing the code above to this:

def echo():
	while True:
		print (yield)

e = echo()
e.send(None) # or e.next()
e.send("Hello")
e.send("World!")

Will make it run, printing:

Hello
World!

To illustrate exactly what’s happening here, I’ll take another example - slightly more advanced but still achieving the same result as above:

def echo():
	counter = 0

	while True:
		counter += 1
		print (yield counter)

e = echo()
print "Yeild nr %s" % e.send(None) 	# Sending nothing in (since we havn't paused
					# anything with yield yet) and yielding nr 1
					# back to the print statement

print "Yeild nr %s" % e.send("Hello")	# the pause from nr 1 gets resumed, passing "Hello"
					# back in and printing it, then doing another loop
					# and yielding nr 2 back and pausing execution

print "Yeild nr %s" % e.send("World!")	# the pause from nr 2 gets resumed, passing "World!"
					# back in and printing it, then doing another loop
					# and yielding nr 3 back to and pausing execution

					# If we would call the same e.send("Blah"), etc.
					# here we could go on forever since the yield
					# statement is stuck in a "while True"-loop

Make sure to read the comments in the above code since I figured it would be a lot easier to explain if the comments where attached to the correct line, running the above code will yield (again, no pun intended ;p) the following results:

Yeild nr 1
Hello
Yeild nr 2
World!
Yeild nr 3

Quite simple, and yet so powerful. There is one last thing I want to demonstrate in this, second part, of the tutorial - the method .throw() those of you familiar with other languages then python might recognize the word throw and figure it would have something to do with exceptions, and you’d be correct - it does.

As I’ve demonstrated, .send() sends in data to the paused yield statement, and .throw() does something similar: it sends in an exception that gets thrown and the paused yield statements line, let’s demonstrate:

def exceptional():
	while True:
		yield

e = exceptional()
e.next()
e.throw(Exception)

Will give you this output:

Traceback (most recent call last):
  File "generator.py", line 7, in 
    e.throw(Exception)
  File “generator.py”, line 3, in exceptional
    yield
Exception

Which is correct, because you sent an Exception in. It is possible to call .throw() as the first method on a new generator object, before any call to .next() or .send(), however that will throw an exception before any code is executed in the method and you will not have a chance to handle it.

In the stack trace above you also see that the exception is actually thrown at the “yield” line when it’s resumed after being paused by .next() the first time.

Let’s do a more advanced example, with a custom exception class:

def exceptional():
	counter = 0
	while True:
		try:
			counter += 1
			yield counter
		except DemoException, exc:
			print "Caught exception with message: %s" % exc

class DemoException(Exception):
	pass

e = exceptional()
print e.next()
print e.throw(DemoException("Hello World"))

The above code will print this:

1
Caught exception with message: Hello World
2

And here’s the magic - if you handle the exception that gets thrown in at the line yield was called at (by wrapping it in a try/except/finally-block) the code will continue executing like it should and .throw() will return the result of the next invocation of yield. All in all .send() and .throw() work exactly the same way except that .throw() raises whatever you feed it with as an exception.

The ability to pass errors (exceptions) *into* generators allows you to do some really neat error handling that doesn’t require your wrapping code to have any information about the generator resulting in a very clean and loosely coupled code.

In the next, and last, part I will go through a real world example using asynchronous i/o and network calls utilizing all the techniques explained in these two posts.

on Aug 29th, 2008Introduction to generators in Python


Part 1: Introduction to generators in Python
Part 2: Indepth generator usage in Python (part2)
Part 3: Managing asynchronous operations with python generators (part3)

First, some history…

Generators is a concept that was introduced in Python at version 2.2, back then they were unidirectional that only allowed information to be passed out of the generator and not back into it, which limited their use to simple iterators and not much else. This was changed / enhanced in Python 2.5 when both data and exceptions now can be passed back into to generator. The changes made in 2.5 allowed for generators to be used as coroutines enabling them to function in complex event-driven programming such as asynchronous I/o, games, etc.

So how does one define a generator in Python? It’s actually very simple, you just define a normal subroutine (or function, if you will) that has the keyword yield somewhere inside of its body, here’s a quick example:

def foo():
    yield

What does yield do to a subroutine then? When a subroutine encounters the yield expression it suspends execution so that it can be resumed at a later time, as chosen by the programmer. You basically tell the routine “I don’t want to continue executing you now, but at a later stage I might want to and you should resume from the point where the yield statement was and not start over”, it’s also important to note that when yield is called the subroutine’s state (variable values, etc.) are all saved, so when you continue executing it everything will be the way you left it.

When you call a generator-function (a subroutine/function with the yield-keyword in its body) you don’t get a result back, instead you get a generator-object back that is used to control the execution of the subroutine, take the foo()-routine we defined above, if we do this:

gen = foo()
print gen

This is what python will print about the “result” of foo(): <generator object at 0x2b8cbb061098>, so when we call a generator function we get a generator object back, not the result of the function call. Note that none of the code inside foo() has yet been executed, as I’ve said the generators execution is controlled through the generator-object, primarily by it’s next()-method which will start/resume execution until a yield statement is found, and then return. So if we do this instead:

gen = foo()
print gen.next()

We get back this: None, not very useful at all, if we try calling gen.next() again we will get something like this:

Traceback (most recent call last):
  File "generators.py", line 7, in 
    print gen.next()
StopIteration

Because the yield statement only gets executed once in our foo-generator and the generator then reaches its end, we can only “resume” execution with next() once. So what if add two yields to foo() instead, making the code look like this:

def foo():
	yield
	yield

gen = foo()
print gen.next()
print gen.next()

This works, giving us back:

None
None

calling gen.next() a third time will, again, raise a StopIteration-exception. We’re still only getting back a lot of nothing (None) from our generators, how about passing something back out from our yield statements, modifying foo again making it look like this:

def foo():
	yield "Hello"
	yield "World!"

gen = foo()
print gen.next()
print gen.next()

Will yield (no pun intended) this result:

Hello
World!

Kind of what you were expecting, huh? So let’s do something a bit more interesting, or well - something that shows what generators are useful for:

def counter(count_to):
	counter = 0;

	while counter < count_to:
		counter = counter+1
		yield counter

As you see this generator named counter takes one argument, an integer which decides how far we should count, remember when we call counter(3) to count to three no code inside the generator gets executed until we call the generator-objects next() method, it then executes normally until it hits a yield statement and then suspends returning (through next()) whatever we fed to yield, let’s see it in action:

c = counter(3)
print c.next()
print c.next()
print c.next()

This will, maybe not to our surprise now, print:

1
2
3

When the three gets “yielded” to us, we can not call next() again without raising an StopIteration-exception because the while-loops condition would return false skipping the yield statement within it and counter() would end, without yielding anything back to us through next().

What happens if we call counter() several times? We will get several generator-objects each representing one invocation of counter() with its own internal state, you can almost think of it like creating an two object instances of a class:

c1 = counter(3)
c2 = counter(4)
print c1.next()
print c2.next()
print c1.next()
print c2.next()
print c1.next()
print c2.next()

print c2.next() # We can run c2 once more c1 since its counting to four and c1 to three

The above code will print,

1
1
2
2
3
3
4

, demonstrating that each invocation of a generator function creates its own generator-object and scope. Generators are used everywhere in python, in most cases they are used as iterators together with the for statement but they have other uses to. Using a generator together with a for statement is very straightforward, take the above counter()-function, we can use it the same way range() is used in python:

for i in counter(5):
	print i

The for-language construct in python has a built in way of handling generators, when it gets fed a generator-object (the result of calling counter(5) in this case) it will call .next() on it putting the value returned in the iteration-variable, i in this case. When for gets an StopIteration exception thrown from the generator for calling .next() one to many times it will silently kill the exception and stop the loop, neat huh?

Lets write a, again useless, generator-function that iterates through every letter in a word and call it with for:

def letters(word):
	for i in range(len(word)):
		yield word[i]

for letter in letters("Hello World"):
	print letter

I think you can guess what this will print, yeah. While the above function is practically useless in python - it’s a good example on how generators and the for statement work together. I hope this run-through gave you a quick look into what generators are, if you’re interested in learning more about them make sure to check out part two of this article series.

on May 23rd, 2008New anti-spam commenting solution

From now on your comments should appear right away when you post them, but you will be required to have javascript enabled to post. If you can’t post a comment for some reason (other then not having javascript enabled), please email me.

on May 23rd, 2008Exceptions, you’re doing it wrong

Edit: After a couple of comments and some personal pondering I decided to remove a small part of the article, if you read it before and think something is missing, then you know why.

Once again a post aimed at the PHP community, not so much of a rant but more of something I’ve seen done horribly wrong in a lot of PHP code recently, first let me take a few examples from a couple of well known PHP frameworks and libraries:

symfony, file: lib/database/sfMySQLDatabase.class.php

$error = 'Failed to create a MySQLDatabase connection';
throw new sfDatabaseException($error);

propel, file: runtime/classes/propel/util/BasePeer.php

throw new PropelException("Expecting to delete 1 record, but criteria match multiple.");

doctrine, file: lib/Doctrine/Connection.php

throw new Doctrine_Connection_Exception('First argument should be an instance of
PDO or implement Doctrine_Adapter_Interface');

I could go on and on and list a couple of hundred of these from each of most poplar PHP libraries, and they all make the same assumption: An exception is a fatal error. And by making this assumption and using one monolithic “DatabaseException”-class, it becomes impossible to handle the exceptions in any other manner then as a fatal errors.

For example the first snippet, coming from the symfony framework is thrown when you can’t connect to a database. Since every other error that can happen to any database connection done in symfony also throws a “sfDatabaseException” how is the user of the library supposed try a backup database or supply a custom “database is down”-error page ? By regex:ing the message of the thrown exception?

Exceptions are not, and I repeat not, a fatal error mechanism (they can be, sure - but it’s not their only or primary use). Taken the above symfony code again, it should look something like this:

throw new DatabaseConnectionFailedException('MySQL');

Or something along those lines, making it possible to somehow distinguish between different type of exceptions allows us to do something like this (again assuming the symfony code):

try {
         // Try to connect to a database

} catch (DatabaseConnectionFailedException $e)  {
         // Try backup database

} catch (DatabaseException $e) {
         // Generic database error
}

So, to sum it up:

Exceptions, you’re doing it wrong!