yolo训练预测

训练

train.py

#! /bin/python3

from ultralytics import YOLO
import sys
import os
package_root = os.path.join(os.path.dirname(__file__), '..')
sys.path.append(package_root)

from script.my_path import get_parent_directory

def main():
    print('train\r\n')
    
    parent_directory = get_parent_directory()
    print(f'parent_directory: {parent_directory}')
    
    yolo_file = f'{parent_directory}/script/yolov8m.yaml'
    model = YOLO(yolo_file, task='detect')

    # Train the model
    data_file = f'{parent_directory}/script/nanwang.yaml'
    results = model.train(
        data=data_file,
        epochs=600, 
        batch=50, 
        imgsz=640,
        scale=0.5,
        workers=8,
        device='cpu',
        project=f'{parent_directory}/train',
        name='output',
        exist_ok=True,
        resume=True,
        save_dir=f'{parent_directory}/train'
    )
    print(results)

    # model.export(format="engine", half=True, device="0")
    model.export(format="onnx", half=True, device="cpu") 


if __name__ == '__main__':
    main()

yolov8m.yaml

# YOLOv8 🚀, AGPL-3.0 license
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# CFG file for YOLOv8-turbo

# Parameters
nc: 10 # number of classes
scales: # model compound scaling constants, i.e. 'model=YOLOv8n.yaml' will call YOLOv8.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.50, 0.25, 1024] # summary: 497 layers, 2,553,904 parameters, 2,553,888 gradients, 6.2 GFLOPs
  s: [0.50, 0.50, 1024] # summary: 497 layers, 9,127,424 parameters, 9,127,408 gradients, 19.7 GFLOPs
  m: [0.50, 1.00, 512] # summary: 533 layers, 19,670,784 parameters, 19,670,768 gradients, 60.4 GFLOPs
  l: [1.00, 1.00, 512] # summary: 895 layers, 26,506,496 parameters, 26,506,480 gradients, 83.3 GFLOPs
  x: [1.00, 1.50, 512] # summary: 895 layers, 59,414,176 parameters, 59,414,160 gradients, 185.9 GFLOPs


# YOLO8-turbo backbone
backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv,  [64, 3, 2]] # 0-P1/2
  - [-1, 1, Conv,  [128, 3, 2, 1, 2]] # 1-P2/4
  - [-1, 2, C3k2,  [256, False, 0.25]]
  - [-1, 1, Conv,  [256, 3, 2, 1, 4]] # 3-P3/8
  - [-1, 2, C3k2,  [512, False, 0.25]]
  - [-1, 1, Conv,  [512, 3, 2]] # 5-P4/16
  - [-1, 4, A2C2f, [512, True, 4]]
  - [-1, 1, Conv,  [1024, 3, 2]] # 7-P5/32
  - [-1, 4, A2C2f, [1024, True, 1]] # 8

# YOLO8-turbo head
head:
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
  - [-1, 2, A2C2f, [512, False, -1]] # 11

  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
  - [-1, 2, A2C2f, [256, False, -1]] # 14

  - [-1, 1, Conv, [256, 3, 2]]
  - [[-1, 11], 1, Concat, [1]] # cat head P4
  - [-1, 2, A2C2f, [512, False, -1]] # 17

  - [-1, 1, Conv, [512, 3, 2]]
  - [[-1, 8], 1, Concat, [1]] # cat head P5
  - [-1, 2, C3k2, [1024, True]] # 20 (P5/32-large)

  - [[14, 17, 20], 1, Detect, [nc]] # Detect(P3, P4, P5)

nanwang.yaml

# YOLOv12 Configuration File
path: /your/label_map

train:
  - src/yolo_video/dataset/train

test:
  - src/yolo_video/dataset/test

val:


nc: 10
names:
  0: zbgytg
  1: hwckskg
  2: yjsdlhgq
  3: djyjsdyhgq
  4: ljyjsdyhgq
  5: cqsdlhgq
  6: gsdlhgq
  7: djblq
  8: ljblq
  9: pass

训练文件目录
dataset
├── README.md
├── test
│ ├── images
│ └── labels
└── train
├── images
└── labels

images是图片文件夹，包含图标a.jpg, b.jpg等等
labels 是标注文件夹，包含a.txt, b.txt等等

标注文件格式示例如下

6 0.658594 0.351852 0.220313 0.700000
6 0.433594 0.596759 0.341667 0.805556

检测

detect.py

self._yolo_model = YOLO(f"{onnx_path}/best.pt", task='detect')
left_image = cv2.imread(left_png)
results = self._yolo_model(left_png)
class_names = self._yolo_model.names
object_map = {}
for result in results:
	for det in result.boxes:
		x1, y1, x2, y2 = det.xyxy[0].tolist()  # 获取边界框的坐标
		class_id = int(det.cls)  # 获取类别ID
		class_name = class_names[class_id]  # 获取类别名称
		confidence = det.conf.item()  # 获取置信度
		
		print(f"Class: {class_name}, Confidence: {confidence:.2f}, BBox: ({x1:.2f}, {y1:.2f}, {x2:.2f}, {y2:.2f})")
		xyz = get_xyz(left_image.copy(), right_image.copy(), class_name, (x1, y1), (x2, y2), self._camera_config)
		object_map[class_name] = [confidence, xyz]
		
def get_xyz(imgL, imgR, class_name, top_xy, bottom_xy, camera_param: CameraConfig):
    disparity, validPixROI1, _, Q = get_disparity(imgL, imgR, camera_param)
    
    threeD = cv2.reprojectImageTo3D(disparity, Q, handleMissingValues=True)  #计算三维坐标数据值

    # 获取输入点 (x, y)
    x1, y1 = top_xy
    x2, y2 = bottom_xy
    x, y = (x1+x2)/2, (y1+y2)/2  # 取中点作为参考

    # 检查是否在有效 ROI 内
    x_roi, y_roi, w_roi, h_roi = validPixROI1
    if (x < x_roi) or (y < y_roi) or (x >= x_roi + w_roi) or (y >= y_roi + h_roi):
        print(f"Warning: Point ({x}, {y}) is outside valid ROI {validPixROI1}")
        return None

    # 调整坐标到校正后的图像坐标系
    x_rect = int(x - x_roi)
    y_rect = int(y - y_roi)

    # 检查是否在 threeD 范围内
    if (x_rect < 0) or (y_rect < 0) or (x_rect >= threeD.shape[1]) or (y_rect >= threeD.shape[0]):
        print(f"Error: Adjusted point ({x_rect}, {y_rect}) is outside threeD map")
        return None

    # 计算距离
    x = threeD[y_rect, x_rect, 0]/1000
    y = threeD[y_rect, x_rect, 1]/1000
    z = threeD[y_rect, x_rect, 2]/1000
    local = (x, y, z)
    print("x,y,z(mm): ", local)

    cv2.rectangle(imgL, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
    cv2.putText(imgL, class_name, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    cv2.imshow("left", imgL)
    key = cv2.waitKey(1)
    if key == ord("q"):
        return

    return local

训练

检测

文章目录