collections и functools — мощные инструменты стандартной библиотеки
Модуль collections
Модуль collections расширяет стандартные типы данных (dict, list, tuple) специализированными контейнерами, которые решают частые задачи эффективнее.
defaultdict — словарь с значениями по умолчанию
Обычный dict выбрасывает KeyError при обращении к несуществующему ключу. defaultdict автоматически создаёт значение по умолчанию:
from collections import defaultdict
# Group items by category
products = [
("фрукты", "яблоко"),
("овощи", "морковь"),
("фрукты", "банан"),
("овощи", "картофель"),
("фрукты", "вишня"),
]
# Without defaultdict
groups_old: dict[str, list[str]] = {}
for category, product in products:
if category not in groups_old:
groups_old[category] = []
groups_old[category].append(product)
# With defaultdict — cleaner
groups: defaultdict[str, list[str]] = defaultdict(list)
for category, product in products:
groups[category].append(product)
print(dict(groups))
# {'фрукты': ['яблоко', 'банан', 'вишня'], 'овощи': ['морковь', 'картофель']}
# Count occurrences
word_count: defaultdict[str, int] = defaultdict(int)
for word in "мама мыла раму мама мыла".split():
word_count[word] += 1
print(dict(word_count)) # {'мама': 2, 'мыла': 2, 'раму': 1}
# Nested defaultdict
tree: defaultdict = defaultdict(lambda: defaultdict(list))
tree["Россия"]["Москва"].append("Кремль")
tree["Россия"]["Москва"].append("Большой театр")
tree["Россия"]["Санкт-Петербург"].append("Эрмитаж")
Counter — подсчёт элементов
from collections import Counter
# Count elements in iterable
text = "абракадабра"
letter_count = Counter(text)
print(letter_count)
# Counter({'а': 5, 'б': 2, 'р': 2, 'к': 1, 'д': 1})
# Most common elements
print(letter_count.most_common(3))
# [('а', 5), ('б', 2), ('р', 2)]
# Counter from list
votes = ["Python", "Go", "Python", "Rust", "Python", "Go"]
results = Counter(votes)
print(results.most_common(1)) # [('Python', 3)]
# Arithmetic operations
inventory_a = Counter(яблоки=5, бананы=3, вишни=2)
inventory_b = Counter(яблоки=2, бананы=5, груши=4)
# Addition
print(inventory_a + inventory_b)
# Counter({'бананы': 8, 'яблоки': 7, 'груши': 4, 'вишни': 2})
# Subtraction (removes zero and negative)
print(inventory_a - inventory_b)
# Counter({'вишни': 2, 'яблоки': 3})
# Total count
print(inventory_a.total()) # 10
deque — двусторонняя очередь
deque (double-ended queue) — оптимизирован для добавления/удаления с обоих концов за O(1):
from collections import deque
# Basic deque
d: deque[str] = deque(["b", "c", "d"])
d.appendleft("a") # Add to left: deque(['a', 'b', 'c', 'd'])
d.append("e") # Add to right: deque(['a', 'b', 'c', 'd', 'e'])
d.popleft() # Remove from left: 'a'
d.pop() # Remove from right: 'e'
# Rotate
d = deque([1, 2, 3, 4, 5])
d.rotate(2) # Rotate right: deque([4, 5, 1, 2, 3])
d.rotate(-1) # Rotate left: deque([5, 1, 2, 3, 4])
# Fixed-size deque (automatically drops oldest)
recent_logs: deque[str] = deque(maxlen=5)
for i in range(10):
recent_logs.append(f"log_{i}")
print(list(recent_logs)) # ['log_5', 'log_6', 'log_7', 'log_8', 'log_9']
# Use as sliding window
def moving_average(data: list[float], window: int) -> list[float]:
"""Calculate moving average using deque."""
result: list[float] = []
window_data: deque[float] = deque(maxlen=window)
for value in data:
window_data.append(value)
if len(window_data) == window:
result.append(sum(window_data) / window)
return result
prices = [10.0, 11.0, 12.0, 11.5, 13.0, 14.0, 13.5]
print(moving_average(prices, 3))
namedtuple и ChainMap
from collections import namedtuple, ChainMap
# namedtuple — lightweight immutable data class
Point = namedtuple("Point", ["x", "y"])
p = Point(3, 4)
print(p.x, p.y) # 3 4
print(p[0], p[1]) # 3 4 — tuple indexing works
# With defaults (Python 3.6.1+)
Color = namedtuple("Color", ["r", "g", "b", "alpha"], defaults=[255])
red = Color(255, 0, 0) # alpha=255 by default
# Convert to dict
print(p._asdict()) # {'x': 3, 'y': 4}
# ChainMap — search through multiple dicts
defaults = {"theme": "light", "lang": "ru", "font_size": 14}
user_prefs = {"theme": "dark"}
session = {"font_size": 18}
config = ChainMap(session, user_prefs, defaults)
print(config["theme"]) # 'dark' — from user_prefs
print(config["lang"]) # 'ru' — from defaults
print(config["font_size"]) # 18 — from session (first found wins)
Модуль functools
functools содержит утилиты для работы с функциями: кэширование, частичное применение, редукция.
lru_cache — мемоизация
from functools import lru_cache
@lru_cache(maxsize=128)
def fibonacci(n: int) -> int:
"""Calculate Fibonacci number with memoization."""
if n < 2:
return n
return fibonacci(n - 1) + fibonacci(n - 2)
# Without cache: exponential time
# With cache: linear time
print(fibonacci(100)) # 354224848179261915075
# Cache statistics
print(fibonacci.cache_info())
# CacheInfo(hits=98, misses=101, maxsize=128, currsize=101)
# Clear cache
fibonacci.cache_clear()
# Python 3.9+: cache without size limit
from functools import cache
@cache
def factorial(n: int) -> int:
"""Calculate factorial with unlimited cache."""
if n <= 1:
return 1
return n * factorial(n - 1)
partial — частичное применение аргументов
from functools import partial
def power(base: int, exponent: int) -> int:
"""Raise base to exponent."""
return base ** exponent
# Create specialized functions
square = partial(power, exponent=2)
cube = partial(power, exponent=3)
print(square(5)) # 25
print(cube(3)) # 27
# Practical example: logging with prefix
def log_message(level: str, prefix: str, message: str) -> None:
print(f"[{level}] {prefix}: {message}")
# Pre-configured loggers
auth_info = partial(log_message, "INFO", "AUTH")
db_error = partial(log_message, "ERROR", "DB")
auth_info("Пользователь вошёл") # [INFO] AUTH: Пользователь вошёл
db_error("Потеряно соединение") # [ERROR] DB: Потеряно соединение
reduce — свёртка последовательности
from functools import reduce
from operator import mul
# Sum all numbers
numbers = [1, 2, 3, 4, 5]
total = reduce(lambda acc, x: acc + x, numbers)
print(total) # 15
# Find maximum
maximum = reduce(lambda a, b: a if a > b else b, numbers)
print(maximum) # 5
# Product of all numbers
product = reduce(mul, [1, 2, 3, 4, 5])
print(product) # 120
# Flatten nested list
nested = [[1, 2], [3, 4], [5, 6]]
flat = reduce(lambda acc, lst: acc + lst, nested, [])
print(flat) # [1, 2, 3, 4, 5, 6]
singledispatch — перегрузка по типу первого аргумента
from functools import singledispatch
@singledispatch
def format_value(value) -> str:
"""Default formatter."""
return str(value)
@format_value.register(int)
def _(value: int) -> str:
return f"{value:,}".replace(",", " ")
@format_value.register(float)
def _(value: float) -> str:
return f"{value:.2f}"
@format_value.register(list)
def _(value: list) -> str:
return f"[{', '.join(str(v) for v in value)}] ({len(value)} шт.)"
print(format_value(1000000)) # 1 000 000
print(format_value(3.14159)) # 3.14
print(format_value([1, 2, 3])) # [1, 2, 3] (3 шт.)
print(format_value("hello")) # hello (default)
Практический пример: анализ текста
from collections import Counter, defaultdict
from functools import lru_cache, reduce
import re
class TextAnalyzer:
"""Analyze text using collections and functools."""
def __init__(self, text: str) -> None:
self.text = text
self.words = re.findall(r'\b\w+\b', text.lower())
@lru_cache(maxsize=1)
def word_frequencies(self) -> Counter:
"""Count word frequencies (cached)."""
return Counter(self.words)
def top_words(self, n: int = 10) -> list[tuple[str, int]]:
"""Get n most common words."""
return self.word_frequencies().most_common(n)
def words_by_length(self) -> dict[int, list[str]]:
"""Group words by their length."""
groups: defaultdict[int, list[str]] = defaultdict(list)
for word in set(self.words):
groups[len(word)].append(word)
return dict(sorted(groups.items()))
def longest_word(self) -> str:
"""Find the longest word using reduce."""
return reduce(
lambda a, b: a if len(a) >= len(b) else b,
self.words,
)
def stats(self) -> dict:
"""Get text statistics."""
freq = self.word_frequencies()
return {
"total_words": len(self.words),
"unique_words": len(freq),
"longest_word": self.longest_word(),
"top_3": self.top_words(3),
}
# Usage
text = """
Python — мощный и универсальный язык программирования.
Python используется для веб-разработки, анализа данных,
машинного обучения и автоматизации. Python прост в изучении
и имеет огромное сообщество разработчиков.
"""
analyzer = TextAnalyzer(text)
print(analyzer.stats())