训练
- train.py
#! /bin/python3
from ultralytics import YOLO
import sys
import os
package_root = os.path.join(os.path.dirname(__file__), '..')
sys.path.append(package_root)
from script.my_path import get_parent_directory
def main():
print('train\r\n')
parent_directory = get_parent_directory()
print(f'parent_directory: {parent_directory}')
yolo_file = f'{parent_directory}/script/yolov8m.yaml'
model = YOLO(yolo_file, task='detect')
# Train the model
data_file = f'{parent_directory}/script/nanwang.yaml'
results = model.train(
data=data_file,
epochs=600,
batch=50,
imgsz=640,
scale=0.5,
workers=8,
device='cpu',
project=f'{parent_directory}/train',
name='output',
exist_ok=True,
resume=True,
save_dir=f'{parent_directory}/train'
)
print(results)
# model.export(format="engine", half=True, device="0")
model.export(format="onnx", half=True, device="cpu")
if __name__ == '__main__':
main()
- yolov8m.yaml
# YOLOv8 🚀, AGPL-3.0 license
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# CFG file for YOLOv8-turbo
# Parameters
nc: 10 # number of classes
scales: # model compound scaling constants, i.e. 'model=YOLOv8n.yaml' will call YOLOv8.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.50, 0.25, 1024] # summary: 497 layers, 2,553,904 parameters, 2,553,888 gradients, 6.2 GFLOPs
s: [0.50, 0.50, 1024] # summary: 497 layers, 9,127,424 parameters, 9,127,408 gradients, 19.7 GFLOPs
m: [0.50, 1.00, 512] # summary: 533 layers, 19,670,784 parameters, 19,670,768 gradients, 60.4 GFLOPs
l: [1.00, 1.00, 512] # summary: 895 layers, 26,506,496 parameters, 26,506,480 gradients, 83.3 GFLOPs
x: [1.00, 1.50, 512] # summary: 895 layers, 59,414,176 parameters, 59,414,160 gradients, 185.9 GFLOPs
# YOLO8-turbo backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2, 1, 2]] # 1-P2/4
- [-1, 2, C3k2, [256, False, 0.25]]
- [-1, 1, Conv, [256, 3, 2, 1, 4]] # 3-P3/8
- [-1, 2, C3k2, [512, False, 0.25]]
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
- [-1, 4, A2C2f, [512, True, 4]]
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
- [-1, 4, A2C2f, [1024, True, 1]] # 8
# YOLO8-turbo head
head:
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 2, A2C2f, [512, False, -1]] # 11
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 2, A2C2f, [256, False, -1]] # 14
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 11], 1, Concat, [1]] # cat head P4
- [-1, 2, A2C2f, [512, False, -1]] # 17
- [-1, 1, Conv, [512, 3, 2]]
- [[-1, 8], 1, Concat, [1]] # cat head P5
- [-1, 2, C3k2, [1024, True]] # 20 (P5/32-large)
- [[14, 17, 20], 1, Detect, [nc]] # Detect(P3, P4, P5)
- nanwang.yaml
# YOLOv12 Configuration File
path: /your/label_map
train:
- src/yolo_video/dataset/train
test:
- src/yolo_video/dataset/test
val:
nc: 10
names:
0: zbgytg
1: hwckskg
2: yjsdlhgq
3: djyjsdyhgq
4: ljyjsdyhgq
5: cqsdlhgq
6: gsdlhgq
7: djblq
8: ljblq
9: pass
- 训练文件目录
dataset
├── README.md
├── test
│ ├── images
│ └── labels
└── train
├── images
└── labels
- images是图片文件夹,包含图标a.jpg, b.jpg等等
- labels 是标注文件夹,包含a.txt, b.txt等等
- 标注文件格式示例如下
6 0.658594 0.351852 0.220313 0.700000
6 0.433594 0.596759 0.341667 0.805556
检测
- detect.py
self._yolo_model = YOLO(f"{onnx_path}/best.pt", task='detect')
left_image = cv2.imread(left_png)
results = self._yolo_model(left_png)
class_names = self._yolo_model.names
object_map = {}
for result in results:
for det in result.boxes:
x1, y1, x2, y2 = det.xyxy[0].tolist() # 获取边界框的坐标
class_id = int(det.cls) # 获取类别ID
class_name = class_names[class_id] # 获取类别名称
confidence = det.conf.item() # 获取置信度
print(f"Class: {class_name}, Confidence: {confidence:.2f}, BBox: ({x1:.2f}, {y1:.2f}, {x2:.2f}, {y2:.2f})")
xyz = get_xyz(left_image.copy(), right_image.copy(), class_name, (x1, y1), (x2, y2), self._camera_config)
object_map[class_name] = [confidence, xyz]
def get_xyz(imgL, imgR, class_name, top_xy, bottom_xy, camera_param: CameraConfig):
disparity, validPixROI1, _, Q = get_disparity(imgL, imgR, camera_param)
threeD = cv2.reprojectImageTo3D(disparity, Q, handleMissingValues=True) #计算三维坐标数据值
# 获取输入点 (x, y)
x1, y1 = top_xy
x2, y2 = bottom_xy
x, y = (x1+x2)/2, (y1+y2)/2 # 取中点作为参考
# 检查是否在有效 ROI 内
x_roi, y_roi, w_roi, h_roi = validPixROI1
if (x < x_roi) or (y < y_roi) or (x >= x_roi + w_roi) or (y >= y_roi + h_roi):
print(f"Warning: Point ({x}, {y}) is outside valid ROI {validPixROI1}")
return None
# 调整坐标到校正后的图像坐标系
x_rect = int(x - x_roi)
y_rect = int(y - y_roi)
# 检查是否在 threeD 范围内
if (x_rect < 0) or (y_rect < 0) or (x_rect >= threeD.shape[1]) or (y_rect >= threeD.shape[0]):
print(f"Error: Adjusted point ({x_rect}, {y_rect}) is outside threeD map")
return None
# 计算距离
x = threeD[y_rect, x_rect, 0]/1000
y = threeD[y_rect, x_rect, 1]/1000
z = threeD[y_rect, x_rect, 2]/1000
local = (x, y, z)
print("x,y,z(mm): ", local)
cv2.rectangle(imgL, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
cv2.putText(imgL, class_name, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.imshow("left", imgL)
key = cv2.waitKey(1)
if key == ord("q"):
return
return local