Python 生成器和迭代器
生成器和迭代器是 Python 中处理大数据和惰性计算的重要工具。
迭代器
什么是迭代器?
迭代器是一个可以记住遍历位置的对象,提供统一的遍历接口。
可迭代对象和迭代器
# 可迭代对象(Iterable)
# 可以使用 for 循环遍历
my_list = [1, 2, 3]
for item in my_list:
print(item)
# 获取迭代器
my_iter = iter(my_list)
print(next(my_iter)) # 1
print(next(my_iter)) # 2
print(next(my_iter)) # 3
# next(my_iter) # StopIteration 异常
自定义迭代器
class Range:
"""自定义范围迭代器"""
def __init__(self, start, end):
self.current = start
self.end = end
def __iter__(self):
return self
def __next__(self):
if self.current >= self.end:
raise StopIteration
value = self.current
self.current += 1
return value
# 使用
for i in Range(1, 5):
print(i) # 1, 2, 3, 4
StopIteration 异常
my_list = [1, 2, 3]
my_iter = iter(my_list)
try:
while True:
item = next(my_iter)
print(item)
except StopIteration:
print("迭代结束")
生成器
什么是生成器?
生成器是一种特殊的迭代器,使用函数和 yield 关键字创建。
创建生成器
def my_generator():
yield 1
yield 2
yield 3
gen = my_generator()
print(next(gen)) # 1
print(next(gen)) # 2
print(next(gen)) # 3
# next(gen) # StopIteration
生成器函数 vs 普通函数
# 普通函数 - 一次性返回所有值
def func():
result = []
for i in range(3):
result.append(i)
return result
print(func()) # [0, 1, 2]
# 生成器函数 - 惰性计算,按需返回
def gen():
for i in range(3):
yield i
g = gen()
print(next(g)) # 0
print(next(g)) # 1
print(next(g)) # 2
生成器的优势
- 节省内存:不需要一次性加载所有数据
- 惰性计算:按需计算,提高性能
- 简洁代码:用简单的语法创建迭代器
# 生成 0-9999999 的平方(不占用大量内存)
def squares(n):
for i in range(n):
yield i * i
# 普通的列表会占用大量内存
# squares_list = [i * i for i in range(10000000)]
生成器表达式
# 类似列表推导式,但返回生成器
gen = (x * x for x in range(5))
print(list(gen)) # [0, 1, 4, 9, 16]
# 可以直接在 for 循环中使用
for i in (x * x for x in range(5)):
print(i)
带条件的生成器表达式
# 筛选偶数的平方
evens_squares = (x * x for x in range(10) if x % 2 == 0)
print(list(evens_squares)) # [0, 4, 16, 36, 64]
yield 关键字
基本用法
def count_up_to(n):
i = 1
while i <= n:
yield i
i += 1
counter = count_up_to(5)
print(list(counter)) # [1, 2, 3, 4, 5]
yield from
用于委托给另一个生成器:
def chain(*iterables):
for iterable in iterables:
yield from iterable
result = chain([1, 2], [3, 4], [5])
print(list(result)) # [1, 2, 3, 4, 5]
yield 的值
def my_gen():
# 可以发送值给生成器
received = yield "开始"
print(f"接收到: {received}")
yield "结束"
gen = my_gen()
print(next(gen)) # "开始"
print(gen.send("Hello")) # "接收到: Hello" / "结束"
生成器的状态
import inspect
def my_gen():
i = 0
while True:
yield i
i += 1
gen = my_gen()
print(inspect.getgeneratorstate(gen)) # GEN_CREATED
next(gen)
print(inspect.getgeneratorstate(gen)) # GEN_SUSPENDED(等待中)
gen.close()
print(inspect.getgeneratorstate(gen)) # GEN_CLOSED(已关闭)
生成器的应用
1. 处理大数据
def read_file_lines(file_path):
"""逐行读取大文件"""
with open(file_path, 'r') as f:
for line in f:
yield line.strip()
# 使用
for line in read_file_lines("large_file.txt"):
process(line)
2. 无限序列
def fibonacci():
"""无限斐波那契数列"""
a, b = 0, 1
while True:
yield a
a, b = b, a + b
fib = fibonacci()
for _ in range(10):
print(next(fib)) # 0, 1, 1, 2, 3, 5, 8, 13, 21, 34
3. 管道处理
def numbers():
yield from range(10)
def even_filter(numbers):
for n in numbers:
if n % 2 == 0:
yield n
def square(numbers):
for n in numbers:
yield n * n
def take(n, sequence):
count = 0
for item in sequence:
if count >= n:
break
yield item
count += 1
# 链式调用
result = take(5, square(even_filter(numbers())))
print(list(result)) # [0, 4, 16, 36, 64]
4. 组合生成器
def permutations(items):
"""生成所有排列"""
if len(items) == 0:
yield []
else:
for i in range(len(items)):
for p in permutations(items[:i] + items[i+1:]):
yield [items[i]] + p
for p in permutations(['A', 'B', 'C']):
print(p)
itertools 模块
Python 的 itertools 模块提供了丰富的迭代器工具:
1. 无限迭代器
import itertools
# count - 计数
for i in itertools.count(10, 2): # 从10开始,每次+2
if i > 20:
break
print(i) # 10, 12, 14, 16, 18, 20
# cycle - 循环
counter = 0
for item in itertools.cycle(['A', 'B', 'C']):
print(item)
counter += 1
if counter > 5:
break # A, B, C, A, B, C
# repeat - 重复
for item in itertools.repeat('X', 3):
print(item) # X, X, X
2. 有限迭代器
import itertools
# accumulate - 累积
from itertools import accumulate
print(list(accumulate([1, 2, 3, 4]))) # [1, 3, 6, 10]
# chain - 连接
print(list(itertools.chain([1, 2], [3, 4], [5]))) # [1, 2, 3, 4, 5]
# compress - 过滤
print(list(itertools.compress([1, 2, 3, 4], [1, 0, 1, 1]))) # [1, 3, 4]
# dropwhile - 条件成立后丢弃
print(list(itertools.dropwhile(lambda x: x < 3, [1, 2, 3, 4, 5]))) # [3, 4, 5]
# takewhile - 条件成立时取
print(list(itertools.takewhile(lambda x: x < 3, [1, 2, 3, 4, 5]))) # [1, 2]
# filterfalse - 过滤为 False 的元素
print(list(itertools.filterfalse(lambda x: x % 2 == 0, [1, 2, 3, 4]))) # [1, 3]
# islice - 切片
print(list(itertools.islice([1, 2, 3, 4, 5], 2))) # [1, 2]
print(list(itertools.islice([1, 2, 3, 4, 5], 1, 4))) # [2, 3, 4]
print(list(itertools.islice([1, 2, 3, 4, 5], 0, 5, 2))) # [1, 3, 5]
3. 组合生成器
import itertools
# product - 笛卡尔积
print(list(itertools.product([1, 2], ['a', 'b'])))
# [(1, 'a'), (1, 'b'), (2, 'a'), (2, 'b')]
# permutations - 排列
print(list(itertools.permutations([1, 2, 3], 2)))
# [(1, 2), (1, 3), (2, 1), (2, 3), (3, 1), (3, 2)]
# combinations - 组合
print(list(itertools.combinations([1, 2, 3], 2)))
# [(1, 2), (1, 3), (2, 3)]
# combinations_with_replacement - 带重复的组合
print(list(itertools.combinations_with_replacement([1, 2], 2)))
# [(1, 1), (1, 2), (2, 2)]
小结
本章我们学习了:
- 迭代器和可迭代对象的概念
- 自定义迭代器
- 生成器的基本用法
- yield 关键字
- 生成器表达式
- 生成器的应用场景
- itertools 模块的使用
练习
- 实现一个无限质数生成器
- 实现一个生成杨辉三角的生成器
- 使用 itertools 实现全排列
- 实现一个管道式数据处理系统