第9课:常用内置模块进阶
课程目标
- 深入学习Python常用内置模块
- 掌握collections、itertools、functools等模块
- 学习json、csv、pickle等数据处理模块
1. collections模块
1.1 Counter类
from collections import Counter# 统计元素出现次数
text = "hello world python programming"
char_count = Counter(text)
print("字符统计:", char_count)# 统计单词出现次数
words = text.split()
word_count = Counter(words)
print("单词统计:", word_count)# 获取最常见的元素
print("最常见的3个字符:", char_count.most_common(3))
1.2 defaultdict类
from collections import defaultdict# 默认字典,自动创建默认值
d = defaultdict(list)
d['a'].append(1)
d['a'].append(2)
d['b'].append(3)
print("defaultdict:", dict(d))# 统计单词首字母
words = ['apple', 'banana', 'apricot', 'blueberry']
letter_words = defaultdict(list)
for word in words:letter_words[word[0]].append(word)
print("按首字母分组:", dict(letter_words))
1.3 namedtuple类
from collections import namedtuple# 创建命名元组
Point = namedtuple('Point', ['x', 'y'])
p1 = Point(3, 4)
p2 = Point(6, 8)print(f"点1: ({p1.x}, {p1.y})")
print(f"点2: ({p2.x}, {p2.y})")# 创建学生记录
Student = namedtuple('Student', ['name', 'age', 'grade'])
students = [Student('张三', 20, 85),Student('李四', 19, 92),Student('王五', 21, 78)
]for student in students:print(f"{student.name}: {student.age}岁,成绩{student.grade}")
2. itertools模块
2.1 无限迭代器
import itertools# 无限计数
counter = itertools.count(1, 2) # 从1开始,步长为2
print("前5个偶数:")
for i, num in enumerate(counter):if i >= 5:breakprint(num, end=" ")
print()# 无限循环
cycle_items = itertools.cycle(['A', 'B', 'C'])
print("循环前10次:")
for i, item in enumerate(cycle_items):if i >= 10:breakprint(item, end=" ")
print()
2.2 组合迭代器
import itertools# 排列
items = ['a', 'b', 'c']
permutations = list(itertools.permutations(items, 2))
print("2个元素的排列:", permutations)# 组合
combinations = list(itertools.combinations(items, 2))
print("2个元素的组合:", combinations)# 笛卡尔积
colors = ['红', '蓝']
sizes = ['S', 'M', 'L']
products = list(itertools.product(colors, sizes))
print("颜色和尺寸的笛卡尔积:", products)
3. functools模块
3.1 partial函数
from functools import partial# 创建偏函数
def greet(greeting, name):return f"{greeting}, {name}!"hello = partial(greet, "Hello")
goodbye = partial(greet, "Goodbye")print(hello("张三"))
print(goodbye("李四"))
3.2 reduce函数
from functools import reduce# 计算列表元素的和
numbers = [1, 2, 3, 4, 5]
total = reduce(lambda x, y: x + y, numbers)
print(f"总和: {total}")# 计算列表元素的最大值
max_num = reduce(lambda x, y: x if x > y else y, numbers)
print(f"最大值: {max_num}")# 阶乘计算
def factorial(n):return reduce(lambda x, y: x * y, range(1, n + 1))print(f"5的阶乘: {factorial(5)}")
4. json模块
4.1 基本操作
import json# Python对象转JSON字符串
data = {'name': '张三','age': 25,'city': '北京','skills': ['Python', 'Java', 'SQL'],'married': False,'salary': None
}json_string = json.dumps(data, ensure_ascii=False, indent=2)
print("JSON字符串:")
print(json_string)# JSON字符串转Python对象
parsed_data = json.loads(json_string)
print("\n解析后的数据:")
print(f"姓名: {parsed_data['name']}")
print(f"技能: {', '.join(parsed_data['skills'])}")# 写入JSON文件
with open('data.json', 'w', encoding='utf-8') as f:json.dump(data, f, ensure_ascii=False, indent=2)
5. csv模块
5.1 读写CSV文件
import csv# 写入CSV文件
students = [['姓名', '年龄', '成绩', '城市'],['张三', 20, 85, '北京'],['李四', 19, 92, '上海'],['王五', 21, 78, '广州']
]with open('students.csv', 'w', newline='', encoding='utf-8') as f:writer = csv.writer(f)writer.writerows(students)print("CSV文件写入完成")# 读取CSV文件
with open('students.csv', 'r', encoding='utf-8') as f:reader = csv.reader(f)for row in reader:print(row)
6. 练习项目
项目:数据分析工具
from collections import Counter, defaultdict
import json
import csvclass DataAnalyzer:def __init__(self):self.data = []def load_from_csv(self, filename):"""从CSV文件加载数据"""with open(filename, 'r', encoding='utf-8') as f:reader = csv.DictReader(f)self.data = list(reader)print(f"从{filename}加载了{len(self.data)}条数据")def analyze_numeric_field(self, field_name):"""分析数值字段"""values = [float(item[field_name]) for item in self.data if item[field_name].replace('.', '').isdigit()]if not values:return Noneanalysis = {'count': len(values),'sum': sum(values),'average': sum(values) / len(values),'min': min(values),'max': max(values)}return analysisdef analyze_categorical_field(self, field_name):"""分析分类字段"""values = [item[field_name] for item in self.data if item[field_name]]counter = Counter(values)analysis = {'unique_count': len(counter),'most_common': counter.most_common(5),'total_count': len(values)}return analysisdef group_by_field(self, group_field, value_field):"""按字段分组统计"""grouped_data = defaultdict(list)for item in self.data:if item[group_field] and item[value_field]:try:value = float(item[value_field])grouped_data[item[group_field]].append(value)except ValueError:continue# 计算每组的统计信息group_stats = {}for group, values in grouped_data.items():group_stats[group] = {'count': len(values),'average': sum(values) / len(values),'min': min(values),'max': max(values)}return group_statsdef main():"""主程序"""print("=== 数据分析工具 ===")analyzer = DataAnalyzer()# 创建示例数据sample_data = [{'姓名': '张三', '年龄': 25, '工资': 8000, '部门': '技术部', '城市': '北京'},{'姓名': '李四', '年龄': 30, '工资': 12000, '部门': '技术部', '城市': '上海'},{'姓名': '王五', '年龄': 28, '工资': 10000, '部门': '销售部', '城市': '广州'},{'姓名': '赵六', '年龄': 35, '工资': 15000, '部门': '管理部', '城市': '北京'}]# 保存示例数据到CSVwith open('sample_data.csv', 'w', newline='', encoding='utf-8') as f:fieldnames = ['姓名', '年龄', '工资', '部门', '城市']writer = csv.DictWriter(f, fieldnames=fieldnames)writer.writeheader()writer.writerows(sample_data)print("示例数据已保存到sample_data.csv")# 加载数据analyzer.load_from_csv('sample_data.csv')# 分析数值字段print("\n=== 年龄分析 ===")age_analysis = analyzer.analyze_numeric_field('年龄')if age_analysis:print(f"年龄统计: {age_analysis}")print("\n=== 工资分析 ===")salary_analysis = analyzer.analyze_numeric_field('工资')if salary_analysis:print(f"工资统计: {salary_analysis}")# 分析分类字段print("\n=== 部门分析 ===")dept_analysis = analyzer.analyze_categorical_field('部门')if dept_analysis:print(f"部门统计: {dept_analysis}")# 分组分析print("\n=== 按部门分组分析工资 ===")dept_salary = analyzer.group_by_field('部门', '工资')for dept, stats in dept_salary.items():print(f"{dept}: {stats}")if __name__ == "__main__":main()
7. 总结
本节课我们学习了:
- collections模块的高级数据结构
- itertools模块的迭代器工具
- functools模块的函数工具
- json、csv等数据处理模块
- 编写了数据分析工具
8. 下节课预告
下节课我们将学习:
- 第三方库的安装和使用
- 项目实战练习
- Python编程最佳实践