OpenCV+MediaPipe實(shí)現(xiàn)手部關(guān)鍵點(diǎn)識(shí)別
發(fā)布日期:2022-02-02 00:34 | 文章來源:源碼中國
可視化輔助函數(shù)
在下面的代碼的注釋內(nèi)有大致的操作
基本操作與前面的人臉檢測的操作相似,增加了可視化的輔助函數(shù)
import matplotlib.pyplot as plt
# 使用ipython的魔法方法,將繪制出的圖像直接嵌入在notebook單元格中
import cv2
# 定義可視化圖像函數(shù)
def look_img(img):
'''opencv讀入圖像格式為BGR,matplotlib可視化格式為RGB,因此需將BGR轉(zhuǎn)RGB'''
img_RGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img_RGB)
plt.show()
#調(diào)用攝像頭拍照
time.sleep(2) # 運(yùn)行本代碼后兩秒拍照
# 獲取攝像頭,0為電腦默認(rèn)攝像頭,1為外接攝像頭
cap = cv2.VideoCapture(0)
# 從攝像頭捕獲一幀畫面
success, image = cap.read()
# 關(guān)閉攝像頭
cap.release()
# 關(guān)閉圖像窗口
cv2.destroyAllWindows()
cv2.imwrite('photo.jpg', image)
#調(diào)用攝像頭拍視頻
import cv2
import time
# 定義逐幀處理函數(shù),可不進(jìn)行任何處理,直接將攝像頭捕獲的畫面寫入視頻幀
def process_frame(img):
return img
output_name = 'record_video.mp4'
# 獲取攝像頭,傳入0表示獲取系統(tǒng)默認(rèn)攝像頭
cap = cv2.VideoCapture(0)
# 打開cap
cap.open(0)
frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter(output_name, fourcc, fps, (int(frame_size[0]), int(frame_size[1])))
# 無限循環(huán),直到break被觸發(fā)
while cap.isOpened():
# 獲取畫面
success, frame = cap.read()
if not success:
break
# 對(duì)捕獲的幀進(jìn)行圖像處理
frame = process_frame(frame)
## 將幀寫入視頻文件中
out.write(frame)
# 展示處理后的三通道圖像
cv2.imshow('press q to break', frame)
if cv2.waitKey(1) in [ord('q'), 27]: # 按鍵盤上的q或esc退出(在英文輸入法下)
break
# 關(guān)閉圖像窗口
cv2.destroyAllWindows()
out.release()
# 關(guān)閉攝像頭
cap.release()
print('視頻已保存', output_name)
單張圖片
import cv2 as cv
import mediapipe as mp
import tqdm
import time
import matplotlib.pyplot as plt
def look_img(img):
img_RGB=cv.cvtColor(img,cv.COLOR_BGR2RGB)
plt.imshow(img_RGB)
plt.show()
# 手部關(guān)鍵點(diǎn)檢測模型
mp_hand=mp.solutions.hands
# 導(dǎo)入模型
hands=mp_hand.Hands(static_image_mode=False,
max_num_hands=5,
min_detection_confidence=0.3,
min_tracking_confidence=0.3
)
# 導(dǎo)入繪圖函數(shù)
mpDraw=mp.solutions.drawing_utils
img=cv.imread('hand2.png')
# look_img(img)
img_RGB=cv.cvtColor(img,cv.COLOR_BGR2RGB)
results=hands.process(img_RGB)
if results.multi_hand_landmarks:
for hand_idx in range(len(results.multi_hand_landmarks)):
hand_21=results.multi_hand_landmarks[hand_idx]
mpDraw.draw_landmarks(img, hand_21, mp_hand.HAND_CONNECTIONS) # 可視化
look_img(img)
cv.imwrite('hands2.jpg',img)
# 在三維坐標(biāo)系中可視化索引為0的手
mpDraw.plot_landmarks(results.multi_hand_landmarks[0], mp_

攝像頭檢測
import cv2
# mediapipe人工智能工具包
import mediapipe as mp
# 進(jìn)度條庫
from tqdm import tqdm
# 時(shí)間庫
import time
# 導(dǎo)入模型
# 導(dǎo)入solution
mp_hands = mp.solutions.hands
# 導(dǎo)入模型
hands = mp_hands.Hands(static_image_mode=False, # 是靜態(tài)圖片還是連續(xù)視頻幀
max_num_hands=2, # 最多檢測幾只手
min_detection_confidence=0.7,# 置信度閾值
min_tracking_confidence=0.5) # 追蹤閾值
# 導(dǎo)入繪圖函數(shù)
mpDraw = mp.solutions.drawing_utils
# 處理單幀函數(shù)
# 處理幀函數(shù)
def process_frame(img):
# 水平鏡像翻轉(zhuǎn)圖像,使圖中左右手與真實(shí)左右手對(duì)應(yīng)
# 參數(shù) 1:水平翻轉(zhuǎn),0:豎直翻轉(zhuǎn),-1:水平和豎直都翻轉(zhuǎn)
img = cv2.flip(img, 1)
# BGR轉(zhuǎn)RGB
img_RGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# 將RGB圖像輸入模型,獲取預(yù)測結(jié)果
results = hands.process(img_RGB)
if results.multi_hand_landmarks: # 如果有檢測到手
# 遍歷每一只檢測出的手
for hand_idx in range(len(results.multi_hand_landmarks)):
hand_21 = results.multi_hand_landmarks[hand_idx] # 獲取該手的所有關(guān)鍵點(diǎn)坐標(biāo)
mpDraw.draw_landmarks(img, hand_21, mp_hands.HAND_CONNECTIONS) # 可視化
# 在三維坐標(biāo)系中可視化索引為0的手
# mpDraw.plot_landmarks(results.multi_hand_landmarks[0], mp_hands.HAND_CONNECTIONS)
return img
# 導(dǎo)入opencv-python
import cv2
import time
# 獲取攝像頭,傳入0表示獲取系統(tǒng)默認(rèn)攝像頭
cap = cv2.VideoCapture(1)
# 打開cap
cap.open(0)
# 無限循環(huán),直到break被觸發(fā)
while cap.isOpened():
# 獲取畫面
success, frame = cap.read()
if not success:
print('Error')
break
## !!!處理幀函數(shù)
frame = process_frame(frame)
# 展示處理后的三通道圖像
cv2.imshow('my_window', frame)
if cv2.waitKey(1) in [ord('q'), 27]: # 按鍵盤上的q或esc退出(在英文輸入法下)
break
# 關(guān)閉攝像頭
cap.release()
# 關(guān)閉圖像窗口
cv2.destroyAllWindows()
改變關(guān)鍵點(diǎn)數(shù)據(jù)特征
import cv2
# mediapipe人工智能工具包
import mediapipe as mp
# 進(jìn)度條庫
from tqdm import tqdm
# 時(shí)間庫
import time
# 導(dǎo)入solution
mp_hands = mp.solutions.hands
# 導(dǎo)入模型
hands = mp_hands.Hands(static_image_mode=False, # 是靜態(tài)圖片還是連續(xù)視頻幀
max_num_hands=2, # 最多檢測幾只手
min_detection_confidence=0.7,# 置信度閾值
min_tracking_confidence=0.5) # 追蹤閾值
# 導(dǎo)入繪圖函數(shù)
mpDraw = mp.solutions.drawing_utils
def process_frame(img):
# 記錄該幀開始處理的時(shí)間
start_time = time.time()
# 獲取圖像寬高
h, w = img.shape[0], img.shape[1]
# 水平鏡像翻轉(zhuǎn)圖像,使圖中左右手與真實(shí)左右手對(duì)應(yīng)
# 參數(shù) 1:水平翻轉(zhuǎn),0:豎直翻轉(zhuǎn),-1:水平和豎直都翻轉(zhuǎn)
img = cv2.flip(img, 1)
# BGR轉(zhuǎn)RGB
img_RGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# 將RGB圖像輸入模型,獲取預(yù)測結(jié)果
results = hands.process(img_RGB)
if results.multi_hand_landmarks: # 如果有檢測到手
handness_str = ''
index_finger_tip_str = ''
for hand_idx in range(len(results.multi_hand_landmarks)):
# 獲取該手的21個(gè)關(guān)鍵點(diǎn)坐標(biāo)
hand_21 = results.multi_hand_landmarks[hand_idx]
# 可視化關(guān)鍵點(diǎn)及骨架連線
mpDraw.draw_landmarks(img, hand_21, mp_hands.HAND_CONNECTIONS)
# 記錄左右手信息
temp_handness = results.multi_handedness[hand_idx].classification[0].label
handness_str += '{}:{} '.format(hand_idx, temp_handness)
# 獲取手腕根部深度坐標(biāo)
cz0 = hand_21.landmark[0].z
for i in range(21): # 遍歷該手的21個(gè)關(guān)鍵點(diǎn)
# 獲取3D坐標(biāo)
cx = int(hand_21.landmark[i].x * w)
cy = int(hand_21.landmark[i].y * h)
cz = hand_21.landmark[i].z
depth_z = cz0 - cz
# 用圓的半徑反映深度大小
radius = max(int(6 * (1 + depth_z * 5)), 0)
if i == 0: # 手腕
img = cv2.circle(img, (cx, cy), radius, (0, 0, 255), -1)
if i == 8: # 食指指尖
img = cv2.circle(img, (cx, cy), radius, (193, 182, 255), -1)
# 將相對(duì)于手腕的深度距離顯示在畫面中
index_finger_tip_str += '{}:{:.2f} '.format(hand_idx, depth_z)
if i in [1, 5, 9, 13, 17]: # 指根
img = cv2.circle(img, (cx, cy), radius, (16, 144, 247), -1)
if i in [2, 6, 10, 14, 18]: # 第一指節(jié)
img = cv2.circle(img, (cx, cy), radius, (1, 240, 255), -1)
if i in [3, 7, 11, 15, 19]: # 第二指節(jié)
img = cv2.circle(img, (cx, cy), radius, (140, 47, 240), -1)
if i in [4, 12, 16, 20]: # 指尖(除食指指尖)
img = cv2.circle(img, (cx, cy), radius, (223, 155, 60), -1)
scaler = 1
img = cv2.putText(img, handness_str, (25 * scaler, 100 * scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, (255, 0, 255), 2 * scaler)
img = cv2.putText(img, index_finger_tip_str, (25 * scaler, 150 * scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, (255, 0, 255), 2 * scaler)
# 記錄該幀處理完畢的時(shí)間
end_time = time.time()
# 計(jì)算每秒處理圖像幀數(shù)FPS
FPS = 1 / (end_time - start_time)
# 在圖像上寫FPS數(shù)值,參數(shù)依次為:圖片,添加的文字,左上角坐標(biāo),字體,字體大小,顏色,字體粗細(xì)
img = cv2.putText(img, 'FPS ' + str(int(FPS)), (25 * scaler, 50 * scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, (255, 0, 255), 2 * scaler)
return img
# 獲取攝像頭,傳入0表示獲取系統(tǒng)默認(rèn)攝像頭
cap = cv2.VideoCapture(0)
# 打開cap
cap.open(0)
# 無限循環(huán),直到break被觸發(fā)
while cap.isOpened():
# 獲取畫面
success, frame = cap.read()
if not success:
break
frame = process_frame(frame)
# 展示處理后的三通道圖像
cv2.imshow('my_window', frame)
if cv2.waitKey(1) in [ord('q'), 27]: # 按鍵盤上的q或esc退出(在英文輸入法下)
break
# 關(guān)閉攝像頭
cap.release()
# 關(guān)閉圖像窗口
cv2.destroyAllWindows()

以上就是OpenCV+MediaPipe實(shí)現(xiàn)手部關(guān)鍵點(diǎn)識(shí)別的詳細(xì)內(nèi)容,更多關(guān)于OpenCV MediaPipe手部關(guān)鍵點(diǎn)識(shí)別的資料請關(guān)注本站其它相關(guān)文章!
版權(quán)聲明:本站文章來源標(biāo)注為YINGSOO的內(nèi)容版權(quán)均為本站所有,歡迎引用、轉(zhuǎn)載,請保持原文完整并注明來源及原文鏈接。禁止復(fù)制或仿造本網(wǎng)站,禁止在非maisonbaluchon.cn所屬的服務(wù)器上建立鏡像,否則將依法追究法律責(zé)任。本站部分內(nèi)容來源于網(wǎng)友推薦、互聯(lián)網(wǎng)收集整理而來,僅供學(xué)習(xí)參考,不代表本站立場,如有內(nèi)容涉嫌侵權(quán),請聯(lián)系alex-e#qq.com處理。
相關(guān)文章
關(guān)注官方微信