OpenCV 快速使用

1. 安装opencv

在windows系统中,打开cmd窗口,输入如下代码:

1
pip install opencv-contrib-python -i https://pypi.tuna.tsinghua.edu.cn/simple

使用contrib版本的功能要全面一些,后面的-i及其后是使用清华的源进行下载,会快很多。

检查是否安装成功

1
2
import cv2 as cv
print(cv.__version__) # 正常则显示版本号,我的例子是'4.9.0'
2. 基础绘制功能

cv.line(), cv.circle() , cv.rectangle(), cv.ellipse(), cv.putText()分别用来在图片上绘制直线、圆形、矩形、椭圆、添加文字。他们的参数非常类似,都包含如下几个

  • img: 绘制图形的目标图片
  • color: 绘制什么颜色的图形
  • thickness: 线条的粗细
  • lineType: 线型 (没发现区别)

举个例子

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import numpy as np
import cv2 as cv

# 创建一个黑色图片
img = np.zeros((512,512,3), np.uint8)

# 画一条蓝色对角线,注意颜色排序是BGR,所以(255,0,0)是蓝色
cv.line(img,(0,0),(500,500),(255,0,0),2)

# 画矩形
cv.rectangle(img, (200, 200), (280, 300), (255, 255, 0), 1)

# 圆,指定圆心坐标(300,100),半径100,颜色.., 线宽2
cv.circle(img, (300, 100), 100, (0, 255, 255), 2)

#椭圆:图像,椭圆中心,(长轴长度,短轴长度),角度,弧起角度,弧结束角度,(B, G, R), 线宽
cv.ellipse(img, (100, 300), (50, 30), 60, 0, 360, (255, 0, 255), 4)


pts = np.array([[10,5],[20,30],[70,20],[50,10]], np.int32)
pts = pts.reshape((-1, 1, 2))

# 绘制多段线,第三个isClosed 如果设为True,可以自动将多段线闭合成多边形
cv.polylines(img, [pts], False, (255, 255, 255))

# 添加文字
font = cv.FONT_HERSHEY_COMPLEX
cv.putText(img, 'hello world', (40, 460),font, 1, (0x11,0xaa,0x11),2)


cv.imshow('draw', img)

cv.waitKey(3000)

cv.destroyAllWindows()
3. 用鼠标绘制

鼠标点击左键,绘制圆形。其中使用到了cv.setMouseCallback来为图像设置回调函数,回调函数draw_cicle接收event以及事件发生时的xy坐标,函数内判断事件类型,进行处理。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import numpy as np
import cv2 as cv

# mouse callback function
def draw_circle(event,x,y,flags,param):
if event == cv.EVENT_LBUTTONDOWN:
cv.circle(img,(x,y),100,(255,0,0),-1)

# Create a black image, a window and bind the function to window
img = np.zeros((512,512,3), np.uint8)
cv.namedWindow('image')
cv.setMouseCallback('image',draw_circle)

while(1):
cv.imshow('image',img)
if cv.waitKey(20) & 0xFF == 27:
break
cv.destroyAllWindows()

更高级的例子,使用m键切换模式,鼠标点击后拖动绘制矩形和圆形。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import numpy as np
import cv2 as cv
import math

drawing = False # true if mouse is pressed
mode = True # if True, draw rectangle. Press 'm' to toggle to curve
ix,iy = -1,-1

# mouse callback function
def draw_circle(event,x,y,flags,param):
global ix,iy,drawing,mode

if event == cv.EVENT_LBUTTONDOWN:
drawing = True
ix,iy = x,y

elif event == cv.EVENT_MOUSEMOVE:
if drawing == True:
if mode == True:
cv.rectangle(img,(ix,iy),(x,y),(0,255,0),-1)
else:
cv.circle(img,(ix,iy),int(abs(math.sqrt((x-ix)**2+(y-iy)**2))),(0,0,255),-1)

elif event == cv.EVENT_LBUTTONUP:
drawing = False
if mode == True:
pass
#cv.rectangle(img,(ix,iy),(x,y),(0,255,0),-1)
else:
pass
#cv.circle(img,(x,y),5,(0,0,255),-1)

img = np.zeros((512,512,3), np.uint8)
cv.namedWindow('image')
cv.setMouseCallback('image',draw_circle)

while(1):
cv.imshow('image',img)
k = cv.waitKey(1) & 0xFF
if k == ord('m'):
mode = not mode
elif k == 27:
break

cv.destroyAllWindows()
4. TrackBar使用

使用cv.createTrackBar()创建控制条,输入参数为:控制条名称、窗口名称、初始值、最大值、回调函数
使用cv.getTrackBarPos()获取控制条当前位置,输入参数:控制条名称、窗口名称
OpenCV里面没有按钮,因此可以创建一个最大值为1的控制条,作为开关使用

举个例子,带有开关的调色板

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import numpy as np
import cv2 as cv

def nothing(x):
pass

# Create a black image, a window
img = np.zeros((300,512,3), np.uint8)
cv.namedWindow('image')

# create trackbars for color change
cv.createTrackbar('R','image',128,255,nothing)

cv.createTrackbar('G','image',0,255,nothing)
cv.createTrackbar('B','image',0,255,nothing)

# create switch for ON/OFF functionality
switch = '0 : OFF \n1 : ON'
cv.createTrackbar(switch, 'image',0,1,nothing)

while(1):
cv.imshow('image',img)
k = cv.waitKey(1) & 0xFF
if k == 27:
break

# get current positions of four trackbars
r = cv.getTrackbarPos('R','image')
g = cv.getTrackbarPos('G','image')
b = cv.getTrackbarPos('B','image')
s = cv.getTrackbarPos(switch,'image')

if s == 0:
img[:] = 0
else:
img[:] = [b,g,r]

cv.destroyAllWindows()

更复杂一些的例子,使用控制条改变绘制的颜色画笔尺寸

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import numpy as np
import cv2 as cv
import math

drawing = False # true if mouse is pressed
mode = True # if True, draw rectangle. Press 'm' to toggle to curve
ix,iy = -1,-1

# mouse callback function
def draw_circle(event,x,y,flags,param):
global ix,iy,drawing,mode
r = cv.getTrackbarPos('r','image')
g = cv.getTrackbarPos('g','image')
b = cv.getTrackbarPos('b','image')
b_size = cv.getTrackbarPos('brush_size','image')

if event == cv.EVENT_LBUTTONDOWN:
drawing = True
ix,iy = x,y
if mode == True:
pass
else:
cv.circle(img,(x,y),b_size,(b,g,r),-1)

elif event == cv.EVENT_MOUSEMOVE:
if drawing == True:
if mode == True:
cv.rectangle(img,(ix,iy),(x,y),(b,g,r),-1)
else:
cv.circle(img,(ix,iy),int(abs(math.sqrt((x-ix)**2+(y-iy)**2))),(b,g,r),-1)

elif event == cv.EVENT_LBUTTONUP:
drawing = False
if mode == True:
pass
#cv.rectangle(img,(ix,iy),(x,y),(0,255,0),-1)
else:
pass
#cv.circle(img,(x,y),5,(0,0,255),-1)


def nothing(x):
pass

img = np.zeros((512,512,3), np.uint8)
cv.namedWindow('image')
cv.setMouseCallback('image',draw_circle)

cv.createTrackbar('r', 'image', 0, 255, nothing)
cv.createTrackbar('g', 'image', 0, 255, nothing)
cv.createTrackbar('b', 'image', 0, 255, nothing)
cv.createTrackbar('brush_size', 'image', 0, 100, nothing)

while(1):
cv.imshow('image',img)
k = cv.waitKey(1) & 0xFF

if k == ord('m'):
mode = not mode
elif k == 27:
break

cv.destroyAllWindows()

5. 像素/通道/边框操作
  • 单像素操作首选array.item()array.itemset()
  • 通道操作直接用numpy切片选择
  • roi不是复制,是view,因此roi的修改会改变原图数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import numpy as np
import cv2 as cv


img = cv.imread('bottle.png')

assert img is not None, "file could not be read, checks with os.path.exists()"

# 索引某个像素点
px = img[100,100]
print(px)

# 索引某个像素点的蓝色值
px_blue = img[100,100,0]
print(px_blue)

# 修改某个像素点的数值
img[100,100] = [255,255,255]

# 实际上使用Numpy的array.item()和array.itemset()做以上操作会更好
print(img.item(50,50,0))
img.itemset((50,50,2),255)

# 获取图像尺寸,可以通过有无通道数判断是否彩色,返回tuple (高,宽,通道数)
print(img.shape)

# 获取图像大小,即以上尺寸的乘积
print(img.size)

# 图像的数据类型,有很多错误就是由于数据类型不匹配造成的
print(img.dtype)

# ROI选择,将一个区域移动到另外一个区域
obj = img[280:340, 330:390]
obj[:,:,:] = 0 # obj是numpy的一个选择,没有copy,修改之会影响img
img[273:333, 100:160] = obj

# 将彩色图像按通道拆分、组合
b, g, r = cv.split(img)
img = cv.merge((b,g,r))

# 注意cv.split耗时,不如使用numpy切片实现
img[:,:,2] = 255

# 给图片加边,这在卷积模型中常用到,使用cv.copyMakeBorder(),输入:目标图、上、下、左、右宽、类型、颜色(常量类型)
# 类型分为:
# cv.BORDER_CONSTANT常量填充
# cv.BORDER_REFLECT镜像填充如:fedcba|abcdefgh|hgfedcb
# cv.BORDER_REFLECT_101镜像填充如:fedcb|abcdefgh|gfedcb
# cv.BORDER_REPLICATE最后元素重复如:aaaaaa|abcdefgh|hhhhhhh
# cv.BORDER_WRAP头尾相接填充如:cdefgh|abcdefgh|abcdefg
img = cv.copyMakeBorder(img,5,5,5,5,cv.BORDER_WRAP)


cv.namedWindow('image')
cv.imshow('image',img)

cv.waitKey(2000)

cv.destroyAllWindows()
6. 图像相加与混合

与普通相加的区别,使用cv.add()相加结果大于数据类型范围会设为最大值。

1
2
3
4
5
6
7
8
import numpy as np
import cv2 as cv

x = np.array(250, dtype=np.uint8)
y = np.array(10, dtype=np.uint8)

print(cv.add(x,y)) # x + y = 260 > 255(uint8) 因此 [[255]]
print(x+y) # 260 % 256 = 4 因此 4

使用cv.addWighted()将两个图片混合起来,

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import numpy as np
import cv2 as cv

img1 = cv.imread('ml.png')
img2 = cv.imread('opencv.png')
assert img1 is not None, "file could not be read, check with os.path.exists()"
assert img2 is not None, "file could not be read, check with os.path.exists()"

# 输入 图像1,系数1,图像2, 系数2,γ
# 系数1 + 系数2 = 1
# γ是添加到每个像素的标量
dst = cv.addWeighted(img1,0.6,img2,0.4,0)

cv.imshow('dst',dst)
cv.waitKey(0)
cv.destroyAllWindows()

按位操作 Bitwise Operations
以下是实现将一个logo扣出来贴到另一个图片的例子

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import cv2 as cv
import numpy as np

# 读两张图片
img1 = cv.imread('bottle.png')
img2 = cv.imread('opencv.png')
assert img1 is not None, "file could not be read, check with os.path.exists()"
assert img2 is not None, "file could not be read, check with os.path.exists()"

# 准备让logo出现在图片的左上,所以创建一个ROI
rows,cols,channels = img2.shape
roi = img1[0:rows, 0:cols]

# 做一个logo的mask,及非logo区域的mask
img2gray = cv.cvtColor(img2,cv.COLOR_BGR2GRAY)
ret, mask = cv.threshold(img2gray, 10, 255, cv.THRESH_BINARY)
mask_inv = cv.bitwise_not(mask) # 按位非操作实现非logo区域mask

# 将ROI中logo的图像去掉
img1_bg = cv.bitwise_and(roi,roi,mask = mask_inv)
cv.imshow('img1_bg',img1_bg)

# 从logo图片中只提取logo的部分
img2_fg = cv.bitwise_and(img2,img2,mask = mask)
cv.imshow('img2_fg',img2_fg)

# 将提取的logo图形与已经去掉图形的背景图相加
dst = cv.add(img1_bg,img2_fg)
img1[0:rows, 0:cols ] = dst

cv.imshow('res',img1)
cv.waitKey(0)
cv.destroyAllWindows()

7. 性能检查与优化

使用cv.getTickCount()获取时钟周期,在要判断耗时的程序前后各获取一次,求差
使用cv.getTickFrequency()获取时钟频率,耗时(秒) = 周期数量 / 频率

1
2
3
4
5
6
7
8
9
10
import cv2 as cv

e1 = cv.getTickCount() # 使用time也是一样的

print('do something')

e2 = cv.getTickCount()
t = (e2 - e1) / cv.getTickFrequency()

print(t)

性能优化

1
2
3
4
5
6
7
8
9
10
11
import cv2 as cv
#cv.setUseOptimized(False) # 默认开了优化,可以手动开关
img = cv.imread('ml.png')
e1 = cv.getTickCount()

for i in range(5, 49, 2):
img1 = cv.medianBlur(img, i)
e2 = cv.getTickCount()
t = (e2 - e1) / cv.getTickFrequency()
print(t)
print(cv.useOptimized())

使用IPython时可以用它的命令%timeit非常方便地对每行代码用时进行分析
创建数组、单个或两个元素运算时,python直接运算以及opencv算法都比numpy快

性能优化方面的思路:首先以简单的方式实现算法,一旦算法开始工作,分析找到其瓶颈优化之

  1. 尽可能避免在Python中使用循环
  2. 最大可能地将算法向量化,因为numpy和opencv都针对向量运算进行了优化
  3. 非必要不复制array,只使用其views
    如果代码还是慢,考虑用Cython

附加资源