Matplotlib 数据可视化

matplotlib 数据可视化

使用 Python 的数学绘图库工具 matplotlib 生成图表。

安装 matplotlib
1
pip3 install matplotlib
绘制简单的折线图
1
2
3
4
5
6
from matplotlib import pyplot


squates = [1, 4, 9, 16, 25]
pyplot.plot(squates)
pyplot.show()

pyplot

修改标签文字和线条粗细
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from matplotlib import pyplot


squates = [1, 4, 9, 16, 25]
pyplot.plot(squates, linewidth=3)

# 设置标题,并给坐标轴加上标签
pyplot.title('Square Numbers', fontsize=17)
pyplot.xlabel('Value', fontsize=10)
pyplot.ylabel('Square of Value',fontsize=10)

# 设置刻度标记的大小
pyplot.tick_params(axis='both', labelsize=13)

pyplot.show()

pyplot-2

校正图形

我们发现向 plot() 提供一系列数字时,它假设第一个数据点对应的 x 轴的值为 0,但是我们的第一个值为 1,我们需要同时提供输入值和输出值。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from matplotlib import pyplot


squates = [1, 4, 9, 16, 25]
input_values = [1, 2, 3, 4, 5]
pyplot.plot(input_values, squates, linewidth=3)

# 设置标题,并给坐标轴加上标签
pyplot.title('Square Numbers', fontsize=17)
pyplot.xlabel('Value', fontsize=10)
pyplot.ylabel('Square of Value',fontsize=10)

# 设置刻度标记的大小
pyplot.tick_params(axis='both', labelsize=13)

pyplot.show()

pyplot-3

使用 scatter() 绘制散点图并设置其样式
1
2
3
4
5
6
7
8
9
10
11
12
13
14
from matplotlib import pyplot


pyplot.scatter(2, 4, s=30)

# 设置标题,并给坐标轴加上标签
pyplot.title('Square Numbers', fontsize=17)
pyplot.xlabel('Value', fontsize=10)
pyplot.ylabel('Square of Value',fontsize=10)

# 设置刻度标记的大小
pyplot.tick_params(axis='both', which='major', labelsize=13)

pyplot.show()
绘制一系列点
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from matplotlib import pyplot


x_values = [1, 2, 3, 4, 5]
y_values = [1, 4, 9, 16, 25]
pyplot.scatter(x_values, y_values, s=30)

# 设置标题,并给坐标轴加上标签
pyplot.title('Square Numbers', fontsize=17)
pyplot.xlabel('Value', fontsize=10)
pyplot.ylabel('Square of Value',fontsize=10)

# 设置刻度标记的大小
pyplot.tick_params(axis='both', which='major', labelsize=13)

pyplot.show()

scatter

自动计算数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from matplotlib import pyplot


x_values = list(range(1, 1001))
y_values = [x**2 for x in x_values]
pyplot.scatter(x_values, y_values)

# 设置标题,并给坐标轴加上标签
pyplot.title('Square Numbers', fontsize=17)
pyplot.xlabel('Value', fontsize=10)
pyplot.ylabel('Square of Value',fontsize=10)

# 设置刻度标记的大小
pyplot.tick_params(axis='both', which='major', labelsize=13)

# 设置每个坐标轴的取值范围
pyplot.axis([0, 1100, 0, 1100000])

pyplot.show()

自动计算

删除数据点轮廓
1
pyplot.scatter(x_values, y_values, edgecolors='none', s=7)
自定义颜色

scatter() 传递参数 c 设置颜色的名称。

1
pyplot.scatter(x_values, y_values, c='red', edgecolors='none', s=7)

RGBA:

1
pyplot.scatter(x_values, y_values, c=(0.8, 0.2, 0.2, 0.4), edgecolors='none', s=7)
使用颜色映射

示例通过 y 的值设置颜色:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from matplotlib import pyplot


x_values = list(range(1, 1001))
y_values = [x**2 for x in x_values]
pyplot.scatter(x_values, y_values, c=y_values, cmap=pyplot.cm.Blues, edgecolors='none', s=7)

# 设置标题,并给坐标轴加上标签
pyplot.title('Square Numbers', fontsize=17)
pyplot.xlabel('Value', fontsize=10)
pyplot.ylabel('Square of Value',fontsize=10)

# 设置刻度标记的大小
pyplot.tick_params(axis='both', which='major', labelsize=13)

# 设置每个坐标轴的取值范围
pyplot.axis([0, 1100, 0, 1100000])

pyplot.show()

color

自动保存图表

要让程序自动将图表保存到文件中,可以将 pyplot.show() 替换为 pyplot.savefig()

1
pyplot.savefig('squares_plot.png', bbox_inches='tight')
随机漫步

随机漫步每次行走都完全是随机的,没有明确的方向,结果是由一系列随机决策决定的。

创建 RandomWalk()

为模拟随机漫步,我们将创建一个 RandomWalk 类,它随机的选择前进方向。这个类需要三个属性,其中一个是存储随机漫步次数的变量,另外两个是列表,分别存储随机漫步经过的每个点的 xy 坐标。

1
2
3
4
5
6
7
8
9
from random import choice

class RandomWalk():
def __init__(self, num_points=5000):
self.num_points = num_points

# 从(0, 0)开始
self.x_values = [0]
self.y_values = [0]
选择方向
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from random import choice

class RandomWalk():
def __init__(self, num_points=5000):
self.num_points = num_points

# 从(0, 0)开始
self.x_values = [0]
self.y_values = [0]

def fill_walk(self):
# 漫步直到指定长度
while len(self.x_values) < self.num_points:
# 决定前进的方向及距离
x_direction = choice([1, -1])
x_distance = choice([0, 1, 2, 3, 4])
x_step = x_direction * x_distance

y_direction = choice([1, -1])
y_distance = choice([0, 1, 2, 3, 4])
y_step = y_direction * y_distance

# 拒绝原地踏步
if x_step == 0 and y_step == 0:
continue

# 计算下一个点的 x 和 y 的值
next_x = self.x_values[-1] + x_step
next_y = self.y_values[-1] + y_step

self.x_values.append(next_x)
self.y_values.append(next_y)

rw_visual

1
2
3
4
5
6
7
8
from matplotlib import pyplot
from random_walk import RandomWalk


rw = RandomWalk()
rw.fill_walk()
pyplot.scatter(rw.x_values, rw.y_values, s=7)
pyplot.show()

rw_visual

模拟多次
1
2
3
4
5
6
7
8
9
10
11
12
13
from matplotlib import pyplot
from random_walk import RandomWalk


while True:
rw = RandomWalk()
rw.fill_walk()
pyplot.scatter(rw.x_values, rw.y_values, s=7)
pyplot.show()

keep_running = input("Make anther walk? (y/n): ")
if keep_running == 'n':
break
给点着色
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# from matplotlib import pyplot
import matplotlib.pyplot as plt
from random_walk import RandomWalk


while True:
rw = RandomWalk()
rw.fill_walk()

point_numbers = list(range(rw.num_points))
plt.scatter(rw.x_values, rw.y_values, c=point_numbers, cmap=plt.cm.Reds, edgecolors='none', s=7)

plt.show()

keep_running = input("Make anther walk? (y/n): ")
if keep_running == 'n':
break

着色

重新绘制起点和终点
1
2
plt.scatter(0, 0, c='green', edgecolors='none', s=30)
plt.scatter(rw.x_values[-1], rw.y_values[-1], c='red', edgecolors='none', s=30)

起点、终点

隐藏坐标轴
1
2
plt.axes().get_xaxis().set_visible(False)
plt.axes().get_yaxis().set_visible(False)

隐藏

增加点数

rw = RandomWalk(50000)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# from matplotlib import pyplot
import matplotlib.pyplot as plt
from random_walk import RandomWalk


while True:
rw = RandomWalk(50000)
rw.fill_walk()

point_numbers = list(range(rw.num_points))
plt.scatter(rw.x_values, rw.y_values, c=point_numbers, cmap=plt.cm.Blues, edgecolors='none', s=7)

# 突出起点和终点
plt.scatter(0, 0, c='green', edgecolors='none', s=30)
plt.scatter(rw.x_values[-1], rw.y_values[-1], c='red', edgecolors='none', s=30)

plt.axes().get_xaxis().set_visible(False)
plt.axes().get_yaxis().set_visible(False)

plt.show()

keep_running = input("Make anther walk? (y/n): ")
if keep_running == 'n':
break

增加点数

调整尺寸

plt.figure(figsize=(10, 6))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# from matplotlib import pyplot
import matplotlib.pyplot as plt
from random_walk import RandomWalk


while True:
rw = RandomWalk(50000)
rw.fill_walk()

plt.figure(figsize=(10, 6))

point_numbers = list(range(rw.num_points))
plt.scatter(rw.x_values, rw.y_values, c=point_numbers, cmap=plt.cm.Blues, edgecolors='none', s=7)

# 突出起点和终点
plt.scatter(0, 0, c='green', edgecolors='none', s=30)
plt.scatter(rw.x_values[-1], rw.y_values[-1], c='red', edgecolors='none', s=30)

plt.axes().get_xaxis().set_visible(False)
plt.axes().get_yaxis().set_visible(False)

plt.show()

keep_running = input("Make anther walk? (y/n): ")
if keep_running == 'n':
break

调整尺寸

使用 Pygal 模拟掷骰子

Pygal 可以生成可缩放的矢量图形文件。

安装
1
pip3 install pygal
画廊

查看 pygal 文档 :http://pygal.org/

创建 Die
1
2
3
4
5
6
7
8
9
from random import randint


class Die():
def __init__(self, num_sides=6):
self.num_sides = num_sides

def roll(self):
return randint(1, self.num_sides)
掷骰子

掷一个 6 面的骰子,将结果打印出来:

1
2
3
4
5
6
7
8
9
10
11
from die import Die


die = Die()

results = []
for roll_num in range(100):
result = die.roll()
results.append(result)

print(results)
分析结果

计算每个点出现的次数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from die import Die


die = Die()

results = []
for roll_num in range(1000):
result = die.roll()
results.append(result)

# print(results)

frequencies = []
for value in range(1, die.num_sides+1):
frequency = results.count(value)
frequencies.append(frequency)

print(frequencies)
绘制直方图
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from die import Die
import pygal


die = Die()

results = []
for roll_num in range(1000):
result = die.roll()
results.append(result)

# print(results)

frequencies = []
for value in range(1, die.num_sides+1):
frequency = results.count(value)
frequencies.append(frequency)

# print(frequencies)

# 对结果进行可视化
hist = pygal.Bar()

hist.title = "Results of rolling one D6 1000 times."
hist.x_labels = ['1', '2', '3', '4', '5', '6']
hist.x_title = "Result"
hist.y_title = "Frequencies"

hist.add('D6', frequencies)
hist.render_to_file('die_visual.svg')

d6

同时掷两个骰子
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from die import Die
import pygal


die_1 = Die()
die_2 = Die()

results = []
for roll_num in range(1000):
result = die_1.roll() + die_2.roll()
results.append(result)

# print(results)

frequencies = []
max_result = die_1.num_sides + die_2.num_sides
for value in range(1, max_result+1):
frequency = results.count(value)
frequencies.append(frequency)

# print(frequencies)

# 对结果进行可视化
hist = pygal.Bar()

hist.title = "Results of rolling two D6 1000 times."
hist.x_labels = ['2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']
hist.x_title = "Result"
hist.y_title = "Frequencies"

hist.add('D6 +D6', frequencies)
hist.render_to_file('die_visual_2d6.svg')

2d6

同时掷两个不同面数的骰子
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from die import Die
import pygal


die_1 = Die()
die_2 = Die(10)

results = []
for roll_num in range(5000):
result = die_1.roll() + die_2.roll()
results.append(result)

# print(results)

frequencies = []
max_result = die_1.num_sides + die_2.num_sides
for value in range(1, max_result+1):
frequency = results.count(value)
frequencies.append(frequency)

# print(frequencies)

# 对结果进行可视化
hist = pygal.Bar()

hist.title = "Results of rolling a D6 and D10 5000 times."
hist.x_labels = ['2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16']
hist.x_title = "Result"
hist.y_title = "Frequencies"

hist.add('D6 +D6', frequencies)
hist.render_to_file('die_visual_d6_d10.svg')

d6d10