Easy📖Теория5 min

Словари и множества

Методы словарей, dict comprehension, defaultdict, Counter, set и frozenset

Словари и множества

Словари (dict) и множества (set) -- хеш-таблицы Python. Словари хранят пары ключ-значение, множества -- уникальные элементы. Оба обеспечивают O(1) доступ к элементам.

Словари (dict)

# Creating dictionaries
empty = {}
user = {"name": "Иван", "age": 25, "city": "Москва"}

# dict() constructor
config = dict(host="localhost", port=8080, debug=True)

# From pairs
pairs = [("a", 1), ("b", 2), ("c", 3)]
d = dict(pairs)

# dict.fromkeys() - create with default values
keys = ["name", "age", "city"]
defaults = dict.fromkeys(keys, None)
print(defaults)  # {'name': None, 'age': None, 'city': None}

# Merge dictionaries (Python 3.9+)
defaults = {"theme": "dark", "lang": "ru", "font_size": 14}
user_prefs = {"theme": "light", "font_size": 16}

merged = defaults | user_prefs  # user_prefs overrides defaults
print(merged)  # {'theme': 'light', 'lang': 'ru', 'font_size': 16}

# In-place merge
defaults |= user_prefs

Доступ к элементам

user = {"name": "Иван", "age": 25, "city": "Москва"}

# Direct access (raises KeyError if key missing)
print(user["name"])   # 'Иван'
# print(user["email"])  # KeyError: 'email'

# .get() - safe access with default
print(user.get("name"))           # 'Иван'
print(user.get("email"))          # None
print(user.get("email", "N/A"))   # 'N/A'

# Check for key
print("name" in user)     # True
print("email" in user)    # False

# setdefault() - get or set if missing
user.setdefault("email", "[email protected]")
print(user["email"])  # '[email protected]'
user.setdefault("email", "[email protected]")  # won't overwrite
print(user["email"])  # '[email protected]'

Модификация и удаление

user = {"name": "Иван", "age": 25}

# Add or update
user["email"] = "[email protected]"       # Add new key
user["age"] = 26                     # Update existing

# update() - merge/update from another dict or kwargs
user.update({"city": "Москва", "age": 27})
user.update(phone="+7999123456")

# Remove
age = user.pop("age")                # Remove and return value
print(age)                           # 27
missing = user.pop("salary", 0)      # Default if key missing
print(missing)                       # 0

last_item = user.popitem()           # Remove and return last item (LIFO)
print(last_item)                     # ('phone', '+7999123456')

del user["email"]                    # Delete key (KeyError if missing)
user.clear()                         # Remove all items

Итерация по словарю

config = {"host": "localhost", "port": 8080, "debug": True}

# Keys (default iteration)
for key in config:
    print(key)

# Values
for value in config.values():
    print(value)

# Key-value pairs
for key, value in config.items():
    print(f"{key}: {value}")

# Dictionary preserves insertion order (guaranteed since Python 3.7)
d = {}
d["c"] = 3
d["a"] = 1
d["b"] = 2
print(list(d.keys()))  # ['c', 'a', 'b'] (insertion order)

Dict Comprehension

# Basic dict comprehension
squares = {x: x**2 for x in range(6)}
print(squares)  # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25}

# With filtering
even_squares = {x: x**2 for x in range(10) if x % 2 == 0}
print(even_squares)  # {0: 0, 2: 4, 4: 16, 6: 36, 8: 64}

# Swap keys and values
original = {"a": 1, "b": 2, "c": 3}
swapped = {v: k for k, v in original.items()}
print(swapped)  # {1: 'a', 2: 'b', 3: 'c'}

# From two lists
keys = ["name", "age", "city"]
values = ["Иван", 25, "Москва"]
user = dict(zip(keys, values))
print(user)  # {'name': 'Иван', 'age': 25, 'city': 'Москва'}

# Transform values
prices = {"apple": 100, "banana": 80, "cherry": 200}
discounted = {item: price * 0.8 for item, price in prices.items()}
print(discounted)  # {'apple': 80.0, 'banana': 64.0, 'cherry': 160.0}

collections: defaultdict, Counter, OrderedDict

from collections import defaultdict, Counter, OrderedDict

# defaultdict - auto-creates missing values
word_count = defaultdict(int)
for word in "the cat sat on the mat the cat".split():
    word_count[word] += 1
print(dict(word_count))  # {'the': 3, 'cat': 2, 'sat': 1, 'on': 1, 'mat': 1}

# defaultdict with list
groups = defaultdict(list)
students = [("А", "Иван"), ("Б", "Мария"), ("А", "Пётр"), ("Б", "Анна")]
for group, name in students:
    groups[group].append(name)
print(dict(groups))  # {'А': ['Иван', 'Пётр'], 'Б': ['Мария', 'Анна']}

# Counter - counting elements
text = "абракадабра"
counter = Counter(text)
print(counter)                     # Counter({'а': 5, 'б': 2, 'р': 2, 'к': 1, 'д': 1})
print(counter.most_common(3))      # [('а', 5), ('б', 2), ('р', 2)]
print(counter['а'])                # 5
print(counter['я'])                # 0 (missing keys return 0)

# Counter arithmetic
c1 = Counter(a=3, b=1)
c2 = Counter(a=1, b=2)
print(c1 + c2)   # Counter({'a': 4, 'b': 3})
print(c1 - c2)   # Counter({'a': 2})
print(c1 & c2)   # Counter({'a': 1, 'b': 1}) (min)
print(c1 | c2)   # Counter({'a': 3, 'b': 2}) (max)

Множества (set)

# Creating sets
empty = set()           # NOT {} (that's an empty dict!)
numbers = {1, 2, 3, 4, 5}
from_list = set([1, 2, 2, 3, 3])  # {1, 2, 3} (duplicates removed)
from_string = set("hello")        # {'h', 'e', 'l', 'o'}

# Sets are unordered and contain only unique hashable elements
print({3, 1, 4, 1, 5, 9, 2, 6})  # {1, 2, 3, 4, 5, 6, 9}

# Adding and removing
s = {1, 2, 3}
s.add(4)              # {1, 2, 3, 4}
s.update([5, 6])      # {1, 2, 3, 4, 5, 6}
s.remove(6)           # Remove (KeyError if missing)
s.discard(99)         # Remove (no error if missing)
item = s.pop()        # Remove and return arbitrary element
s.clear()             # Remove all

Операции над множествами

a = {1, 2, 3, 4, 5}
b = {4, 5, 6, 7, 8}

# Union (объединение)
print(a | b)              # {1, 2, 3, 4, 5, 6, 7, 8}
print(a.union(b))         # same

# Intersection (пересечение)
print(a & b)              # {4, 5}
print(a.intersection(b))  # same

# Difference (разность)
print(a - b)              # {1, 2, 3}
print(a.difference(b))    # same

# Symmetric difference (симметрическая разность)
print(a ^ b)                        # {1, 2, 3, 6, 7, 8}
print(a.symmetric_difference(b))    # same

# Subset and superset
print({1, 2} <= {1, 2, 3})    # True (subset)
print({1, 2, 3} >= {1, 2})    # True (superset)
print({1, 2}.issubset({1, 2, 3}))  # True
print({1, 4}.isdisjoint({2, 3}))   # True (no common elements)

frozenset

# frozenset - immutable set (can be used as dict key or set member)
fs = frozenset([1, 2, 3])
# fs.add(4)  # AttributeError!

# Useful as dict keys
permissions = {
    frozenset(["read"]): "Гость",
    frozenset(["read", "write"]): "Пользователь",
    frozenset(["read", "write", "admin"]): "Администратор",
}

user_perms = frozenset(["read", "write"])
print(permissions[user_perms])  # Пользователь

# Set of sets (only with frozenset)
groups = {frozenset({1, 2}), frozenset({3, 4})}

Практические применения

Удаление дубликатов с сохранением порядка

def unique_ordered(items: list) -> list:
    """Remove duplicates preserving insertion order."""
    seen = set()
    result = []
    for item in items:
        if item not in seen:
            seen.add(item)
            result.append(item)
    return result

data = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3]
print(unique_ordered(data))  # [3, 1, 4, 5, 9, 2, 6]

# Python 3.7+ dict preserves order (shorter but less clear)
print(list(dict.fromkeys(data)))  # [3, 1, 4, 5, 9, 2, 6]

Группировка данных

from collections import defaultdict

# Group by first letter
words = ["apple", "banana", "avocado", "blueberry", "cherry", "apricot"]
by_letter = defaultdict(list)
for word in words:
    by_letter[word[0]].append(word)
print(dict(by_letter))
# {'a': ['apple', 'avocado', 'apricot'], 'b': ['banana', 'blueberry'], 'c': ['cherry']}

# Invert a many-to-many mapping
tags = {
    "Python": ["programming", "scripting"],
    "JavaScript": ["programming", "web"],
    "HTML": ["web", "markup"],
}

by_tag = defaultdict(list)
for lang, lang_tags in tags.items():
    for tag in lang_tags:
        by_tag[tag].append(lang)
print(dict(by_tag))
# {'programming': ['Python', 'JavaScript'], 'scripting': ['Python'],
#  'web': ['JavaScript', 'HTML'], 'markup': ['HTML']}

Подсчёт и анализ

from collections import Counter

# Find common elements between two lists
list1 = [1, 2, 3, 4, 5, 5, 6]
list2 = [4, 5, 5, 6, 7, 8, 9]

common = set(list1) & set(list2)
print(common)  # {4, 5, 6}

# Most common words in text
text = """
Python это мощный язык программирования Python используется
для веб разработки Python также популярен в науке о данных
"""
words = text.lower().split()
word_freq = Counter(words)
print(word_freq.most_common(5))
# [('python', 3), ('это', 1), ('мощный', 1), ('язык', 1), ('программирования', 1)]

Итоги

  • dict -- O(1) доступ по ключу, сохраняет порядок вставки (Python 3.7+)
  • .get() безопаснее [] для необязательных ключей
  • Оператор | (Python 3.9+) для объединения словарей
  • defaultdict -- автоматическое создание значений для новых ключей
  • Counter -- подсчёт элементов с арифметикой над счётчиками
  • set -- уникальные элементы, O(1) проверка принадлежности
  • Операции множеств: | объединение, & пересечение, - разность, ^ симметрическая разность
  • frozenset -- неизменяемое множество, можно использовать как ключ словаря

Проверь себя

🧪

Чем dict.get(key) отличается от dict[key]?

🧪

Как объединить два словаря в Python 3.9+?

🧪

Какой результат операции {1,2,3,4} & {3,4,5,6}?

🧪

Как создать пустое множество в Python?