Comprehensible Comprehensions

/ Python & Django Trainer / @treyhunner

Python Morsels
Truthful Technology

Assumptions

  • You know how for loops work in Python
  • You're not already a fan of list comprehensions

What are comprehensions?

List comprehensions are a special syntax for looping over one list and building a new list from it

What are comprehensions?

List comprehensions are a special syntax for looping over any iterable and building up a new list from it, modifying and/or filtering elements along the way.

Looping in Python

for loops


numbers = [2, 1, 3, 4, 7, 11]

# Print out square of odd numbers
for n in numbers:
    if n % 2 == 1:
        print(n**2)
          

Turning lists into lists


numbers = [2, 1, 3, 4, 7, 11]

# Populate new list with square of odd numbers
squared_odds = []
for n in numbers:
    if n % 2 == 1:
        squared_odds.append(n**2)
          

Python Encourages Making New Lists


numbers = [2, 1, 3, 4, 7, 11]

# Modifying numbers list to square each number
for i, n in enumerate(numbers):
    if n % 2 == 1:
        numbers[i] = n**2



        
          

Python Encourages Making New Lists


numbers = [2, 1, 3, 4, 7, 11]

# Modifying numbers list to square each number
i = 0
while i < len(numbers):
    if numbers[i] % 2 == 1:
        numbers[i] = numbers[i]**2
        i += 1
    else:
        numbers.pop(i)
          

Variables Are References


>>> old = [1, 2, 3]
>>> new = old
>>> new.pop()
3
>>> new
[1, 2]
>>> old
[1, 2]
          

>>> old = [1, 2, 3]
>>> new = old[:-1]
>>> new
[1, 2]
>>> old
[1, 2, 3]
          

Turning Old Lists Into New Lists


squared_odds = []
for n in numbers:
    if n % 2 == 1:
        squared_odds.append(n**2)
          

squared_odds = [n**2 for n in numbers if n % 2 == 1]
          

squared_odds = []
for n in numbers:
    if n % 2 == 1:
        squared_odds.append(n**2)
          

squared_odds = [n**2 for n in numbers if n % 2 == 1]
          

Breaking it down

squared_odds = []
for n in numbers:
    if n % 2 == 1:
        squared_odds.append(n**2)

squared_odds = [n**2 for n in numbers if n % 2 == 1]

Copy-pasting into a comprehension

You can always copy-paste

new_list = []
for x in old_list:
    if condition(x):
        new_list.append(operation(x))

new_list = [operation(x) for x in old_list if condition(x)]

Readability

But they're not readable


for n in numbers:
    if n % 2 == 1:
        squared_odds.append(n**2)
          

squared_odds = [n**2 for n in numbers if n % 2 == 1]
          

Breaking up the components

squared_odds = [n**2 for n in numbers if n % 2 == 1]
squared_odds = [
    n**2
    for n in numbers
    if n % 2 == 1
]

Breaking up the components


squared_odds = [n**2 for n in numbers if n % 2 == 1]
          

squared_odds = [
    n**2
    for n in numbers
    if n % 2 == 1
]
          

They can be more readable


squared_odds = []
for n in numbers:
    if n % 2 == 1:
        squared_odds.append(n**2)
          

squared_odds = [
    n**2
    for n in numbers
    if n % 2 == 1
]
          

They can be more readable


squared_odds = []
for n in squared_odds:
    if n % 2 == 1:
        squared_odds.append(n**2)
          

squared_odds = [
    n**2
    for n in numbers
    if n % 2 == 1
]
          

But wait... there's more!

With Filtering

squared_odds = []
for n in numbers:
    if n % 2 == 1:
        squared_odds.append(n**2)

squared_odds = [
    n**2
    for n in numbers
    if n % 2 == 1
]

Without Filtering

squared_numbers = []
for n in numbers:
    squared_numbers.append(n**2)

squared_numbers = [
    n**2
    for n in numbers
]

With Multiple Loops

flattened = []
for row in matrix:
    for item in row:
        flattened.append(item)
flattened = [
    item
    for row in matrix
    for item in row
]

Set comprehensions


numbers = {2, 1, 3, 4, 7}

squared_odds = set()
for n in numbers:
    if n % 2 == 1:
        squared_odds.add(n**2)
          

numbers = {2, 1, 3, 4, 7}

squared_odds = {
    n**2
    for n in numbers
    if n % 2 == 1
}
          

Dictionary comprehensions


from string import ascii_lowercase

letter_positions = {}
for n, letter in enumerate(ascii_lowercase, start=1):
    letter_positions[letter] = n
          

from string import ascii_lowercase

letter_positions = {
    letter: n
    for n, letter in enumerate(ascii_lowercase, start=1)
}
          

Why not map and filter?


> numbers = [2, 1, 3, 4, 7, 11]
> numbers.filter(n => n % 2 == 1).map(n => n**2)
[ 1, 9, 49, 121 ]
          

>> numbers = [2, 1, 3, 4, 7, 11]
>> numbers.select{ |n| n % 2 == 1 }.map{ |n| n**2 }
=> [1, 9, 49, 121]
          

>>> numbers = [2, 1, 3, 4, 7, 11]
>>> numbers.filter(lambda n: n % 2 == 1).map(lambda n: n**2)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
AttributeError: 'list' object has no attribute 'filter'
          


>>> numbers = [2, 1, 3, 4, 7, 11]
>>> [*map(lambda n: n**2, filter(lambda n: n % 2 == 1, numbers))]
[1, 9, 49, 121]
          

>>> numbers = [2, 1, 3, 4, 7, 11]
>>> [n**2 for n in numbers if n % 2 == 1]
[1, 9, 49, 121]
          

Examples

Modifying each item in a list


lowercased_words = [word.lower() for word in words]
          

Filtering down a list of strings


def get_anagrams(target_word, words):
    return [
        candidate_word
        for candidate_word in words
        if is_anagram(candidate_word, target_word)
    ]


def is_anagram(word1, word2):
    """Return True if words contain same letters."""
    return sorted(word1) == sorted(word2)
          

Modifying a list of lists


negative_matrix = []
for row in matrix:
    new_row = []
    for n in row:
        new_row.append(-n)
    negative_matrix.append(new_row)
          

negative_matrix = []
for row in matrix:
    negative_matrix.append([-n for n in row])
          

negative_matrix = [
    [-n for n in row]
    for row in matrix
]
          

Generator Comprehensions

Generator Expressions

Generator Expressions

  • Generators are lazy single-use iterables
  • Generator expressions are to generators as list comprehensions are to lists
  • If you're making a list to loop over it exactly once, use a generator expression instead of a list comprehension

Generator Expression Syntax


>>> numbers = range(1_000_000)
>>> squared_numbers = (n**2 for n in numbers)
>>> squared_numbers
<generator object <genexpr> at 0x7f129b187780>
          

Generators are weird


>>> numbers = [2, 1, 3, 4, 7]
>>> squared_numbers = (n**2 for n in numbers)
>>> len(squared_numbers)
TypeError: object of type 'generator' has no len()
>>> squared_numbers[0]
TypeError: 'generator' object is not subscriptable
>>> list(squared_numbers)
[4, 1, 9, 16, 49]
>>> list(squared_numbers)
[]

          

Generators are lazy


>>> from itertools import islice
>>> numbers = range(1_000_000_000)
>>> squared_numbers = (n**2 for n in numbers)
>>> next(squared_numbers)
0
>>> next(squared_numbers)
1
>>> next(squared_numbers)
4
>>> next(squared_numbers)
9
>>> next(squared_numbers)
16
          

Generators are single use iterables


>>> numbers = [2, 1, 3, 4, 7]
>>> squared_numbers = (n**2 for n in numbers)
>>> sum(squared_numbers) 17
>>> sum(squared_numbers) 0
>>> cubed_numbers = (n**3 for n in numbers) >>> list(cubed_numbers) [8, 1, 27, 64, 343]
>>> list(cubed_numbers) []

Loop over them right away


>>> numbers = range(1_000_000)
>>> sum((n**2 for n in numbers)) 333332833333500000
>>> sum(n**2 for n in numbers) 333332833333500000

Generator Expressions


>>> numbers = [2, 1, 3, 4, 7]
>>> sum_of_squares = sum([n**2 for n in numbers])
>>> sum_of_squares = sum(n**2 for n in numbers)
          

If you're making a list to loop over it exactly once, use a generator expression instead of a list comprehension

Generator Examples

str.join


def translate(sentence):
    translation = []
    for w in sentence.split():
        translation.append(DICTIONARY[w])
    return " ".join(translation)
          

def translate(sentence):
    translation = [
        DICTIONARY[w]
        for w in sentence.split()
    ]
    return " ".join(translation)
          

def translate(sentence):
    return " ".join(
        DICTIONARY[w]
        for w in sentence.split()
    )
          

sum


sum_of_squares = 0
for n in numbers:
    sum_of_squares += n**2
          

squares = []
for n in numbers:
    squares.append(n**2)
sum_of_squares = sum(squares)
          

sum_of_squares = sum(n**2 for n in numbers)
          

When not to use comprehensions

Don't overdo it

  • Do not call functions with side effects in a comprehension
  • Don't use comprehensions except for making lists
  • List comprehensions should only be used for turning one list (or iterable) into another list

Don't abuse comprehensions


>>> [print(n**2) for n in range(10) if n % 2 == 1]
1
9
25
49
81
[None, None, None, None, None]
          


color_ratios = {}
for color, ratio in zip(colors, ratios):
    color_ratios[color] = ratio
          

color_ratios = {
    color: ratio
    for (color, ratio) in zip(colors, ratios)
}
          

color_ratios = dict(zip(colors, ratios))
          


with open(filename) as my_file:
    lines = [line for line in my_file]
          

with open(filename) as my_file:
    lines = list(my_file)
          

with open(filename) as my_file:
    lines = my_file.readlines()
          

Cheat Sheet

Building up a new list

new_list = []
for item in old_list:
    new_list.append(operation(item))

new_list = [
    operation(item)
    for item in old_list
]

Building up a list while filtering

new_list = []
for item in old_list:
    if condition(item):
        new_list.append(operation(item))

new_list = [
    operation(item)
    for item in old_list
    if condition(item)
]

Building up a new dictionary

new_dict = {}
for key, value in old_dict.items():
    if condition(key, value):
        new_dict[change1(key)] = change2(value)

new_dict = {
    change1(key): change2(value)
    for key, value in old_dict.items()
    if condition(key, value)
}

Looping Deeply

new_list = []
for inner_list in outer_list:
    for item in inner_list:
        new_list.append(item)
new_list = [
    item
    for inner_list in outer_list
    for item in inner_list
]

Not filtering or changing anything?

new_list = [
    item
    for n in old_list
]

new_list = list(old_list)

Making a list and immediately looping

new_list = []
for item in old_list:
    new_list.append(operation(item))
some_result = some_operation(new_list)

some_result = some_operation(
    operation(item)
    for item in old_list
)

Remember

  • Use comprehensions for turning one iterable into another
  • Copy-paste your way from a for loop to a comprehension
  • Use generators when you'll be looping over your new iterable only one time
  • Break comprehensions and generator expressions over multiple lines to improve readability

Recommended resources at

trey.io/comprehensions

Trey Hunner
Python Team Trainer