内容主要是获取目标位姿,步骤如下:
1.获取相机的数据,然后通过相机内参将像素坐标转换为相机坐标;
2.yolov8的分割模型获取抓取物体的烟码;
3.把深度图的深度值和分割的彩图值结合构成3维数据;
4.foundationpose对3维数据找位姿。
1.环境搭建
项目用conda搭建环境。
系统:ubuntu22.04 ROS2 cuda12.4
相机环境搭建参考https://blog.csdn.net/weixin_71719718/article/details/160549145?spm=1011.2124.3001.6209。
一、基础环境说明
- 已安装:ROS2 Humble、Miniconda
- 显卡:RTX 4060 Laptop
- 系统 CUDA:11.8(nvcc)
- 核心原则:所有 CUDA 相关依赖必须统一为 11.8 版本
二、Conda 环境创建与基础配置
# 1. 创建conda环境(Python3.10) conda create -n ros2_tensorrt python=3.10 -y conda activate ros2_tensorrt # 2. 基础pip升级 pip install --upgrade pip setuptools wheel ninja三、核心 CUDA/PyTorch 安装(关键版本)
# 卸载冲突包 pip uninstall -y torch torchvision torchaudio triton # 安装指定版本(CUDA12.4 匹配系统) python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 # 验证(必须输出 12.4) python -c "import torch; print(torch.version.cuda, torch.cuda.is_available())"四、NumPy 降级(修复兼容性问题)
pip install numpy==1.24.4五、一键安装 YOLO 分割环境
numpy==1.24.0 opencv-python open3d pyyaml scikit-learn==1.2.2 ultralytics==8.2.100 numpy==1.24.0分割代码如下:
import rclpy from rclpy.node import Node from sensor_msgs.msg import Image, CameraInfo import cv2 import numpy as np from ultralytics import YOLO def imgmsg_to_cv2(img_msg): # 🔥 修复这里:np.16 → np.uint16 if img_msg.encoding == "16UC1": return np.frombuffer(img_msg.data, dtype=np.uint16).reshape(img_msg.height, img_msg.width) img = np.frombuffer(img_msg.data, dtype=np.uint8).reshape(img_msg.height, img_msg.width, -1) return img[..., ::-1] def save_point_cloud_to_ply(points_3d, colors, filename="phone_only_cloud.ply"): with open(filename, 'w') as f: f.write("ply\n") f.write("format ascii 1.0\n") f.write(f"element vertex {len(points_3d)}\n") f.write("property float x\n") f.write("property float y\n") f.write("property float z\n") f.write("property uchar red\n") f.write("property uchar green\n") f.write("property uchar blue\n") f.write("end_header\n") for (x, y, z), (r, g, b) in zip(points_3d, colors): f.write(f"{x:.6f} {y:.6f} {z:.6f} {int(r)} {int(g)} {int(b)}\n") # ==================== 颜色分割1:中间均值法(最强去桌面) ==================== def color_segment_center_mean(points_3d, point_colors, crop_ratio=0.25, thresh=18): if len(points_3d) == 0: return np.array([]), np.array([]) pts = np.array(points_3d) cols = np.array(point_colors, dtype=np.float32) x_min, x_max = pts[:,0].min(), pts[:,0].max() y_min, y_max = pts[:,1].min(), pts[:,1].max() dx = (x_max - x_min) * crop_ratio dy = (y_max - y_min) * crop_ratio center_mask = (pts[:,0] > x_min+dx) & (pts[:,0] < x_max-dx) & \ (pts[:,1] > y_min+dy) & (pts[:,1] < y_max-dy) if np.sum(center_mask) < 10: return pts, cols center_rgb = np.mean(cols[center_mask], axis=0) diff = np.linalg.norm(cols - center_rgb, axis=1) keep = diff < thresh return pts[keep], cols[keep].astype(np.uint8) # ==================== 颜色分割2:边缘差异法 ==================== def color_segment_edge_diff(points_3d, point_colors, edge_ratio=0.2, thresh=12): if len(points_3d) == 0: return np.array([]), np.array([]) pts = np.array(points_3d) cols = np.array(point_colors, dtype=np.float32) x_min, x_max = pts[:,0].min(), pts[:,0].max() y_min, y_max = pts[:,1].min(), pts[:,1].max() dx = (x_max - x_min) * edge_ratio dy = (y_max - y_min) * edge_ratio edge_mask = (pts[:,0] < x_min+dx) | (pts[:,0] > x_max-dx) | \ (pts[:,1] < y_min+dy) | (pts[:,1] > y_max-dy) if np.sum(edge_mask) < 10: return pts, cols mean_rgb = np.mean(cols, axis=0) diff = np.linalg.norm(cols - mean_rgb, axis=1) keep = diff < thresh return pts[keep], cols[keep].astype(np.uint8) class CameraAlignmentNode(Node): def __init__(self): super().__init__('camera_phone_segment') self.color_sub = self.create_subscription(Image, '/camera/color/image_raw', self.color_callback, 10) self.depth_sub = self.create_subscription(Image, '/camera/depth/image_raw', self.depth_callback, 10) self.info_sub = self.create_subscription(CameraInfo, '/camera/color/camera_info', self.info_cb, 10) self.color = None self.depth = None self.K = None self.model = YOLO("best_v3.pt") def info_cb(self, msg): self.K = np.array(msg.k).reshape(3,3) self.destroy_subscription(self.info_sub) def color_callback(self, msg): self.color = imgmsg_to_cv2(msg) def depth_callback(self, msg): self.depth = imgmsg_to_cv2(msg) if self.color is not None and self.K is not None: self.process() def process(self): h, w = self.color.shape[:2] depth_aligned = cv2.resize(self.depth, (w, h), interpolation=cv2.INTER_NEAREST) draw_img = self.color.copy() # ---------------------- 1. YOLO 分割 ---------------------- res = self.model(self.color, conf=0.6, verbose=False) mask = np.zeros((h,w), dtype=np.uint8) if res[0].masks is not None: for m in res[0].masks.data: m = cv2.resize(m.cpu().numpy(), (w,h)) mask = np.clip(mask + m*255, 0,255).astype(np.uint8) # ---------------------- 2. 立刻补洞(实心无洞) ---------------------- kernel_fill = np.ones((7,7), np.uint8) mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_fill, iterations=2) # ---------------------- 3. 轻微膨胀,不包含桌面 ---------------------- kernel = np.ones((2,2), np.uint8) mask = cv2.dilate(mask, kernel, iterations=1) # ---------------------- 4. 只保留最大连通域(只留手机) ---------------------- num_labels, labels_im = cv2.connectedComponents(mask) if num_labels > 1: max_component = 1 max_count = 0 for label in range(1, num_labels): count = np.sum(labels_im == label) if count > max_count: max_count = count max_component = label mask = (labels_im == max_component).astype(np.uint8) * 255 # ---------------------- 5. 深度过滤(彻底剔除桌面) ---------------------- final_mask = mask.copy() depth_mask = (depth_aligned > 100) & (depth_aligned < 3000) final_mask[~depth_mask] = 0 # ---------------------- 可视化 ---------------------- color_mask = np.zeros_like(draw_img) color_mask[final_mask>0] = [255,0,255] draw_img = cv2.addWeighted(draw_img, 1, color_mask, 0.4, 0) cv2.imshow("result", draw_img) cv2.imshow("mask", final_mask) # ---------------------- 生成点云 ---------------------- yy, xx = np.where(final_mask > 0) fx, fy, cx, cy = self.K[0,0], self.K[1,1], self.K[0,2], self.K[1,2] points_3d = [] point_colors = [] for v, u in zip(yy, xx): z = depth_aligned[v, u] x = (u - cx) * z / fx y = (v - cy) * z / fy r, g, b = self.color[v, u] points_3d.append([x,y,z]) point_colors.append([r,g,b]) if len(points_3d) < 50: cv2.waitKey(1) return # ---------------------- 二次颜色分割(纯净手机) ---------------------- pts, cols = color_segment_center_mean(points_3d, point_colors, thresh=18) # pts, cols = color_segment_edge_diff(points_3d, point_colors, thresh=12) # ---------------------- 保存 ---------------------- if len(pts) > 50: w = np.max(pts[:,0]) - np.min(pts[:,0]) h = np.max(pts[:,1]) - np.min(pts[:,1]) d = np.max(pts[:,2]) - np.min(pts[:,2]) print(f"手机尺寸:{w:.3f}m {h:.3f}m {d:.3f}m") save_point_cloud_to_ply(pts, cols) cv2.waitKey(1) def main(args=None): rclpy.init(args=args) node = CameraAlignmentNode() try: rclpy.spin(node) except KeyboardInterrupt: pass finally: cv2.destroyAllWindows() node.destroy_node() rclpy.shutdown() if __name__ == '__main__': main()2. foundationpose环境搭建
一、克隆官方仓库(用国内镜像克隆)
git clone https://gitcode.com/gh_mirrors/fo/FoundationPose.git mv FoundationPose ~/ros2_ws/二、安装FoundationPose 的依赖
cd ~/ros2_ws/FoundationPose pip install -r requirements.txt三、安装PyTorch3D
cd ~/ros2_ws git clone https://github.com/facebookresearch/pytorch3d.git cd pytorch3d # 环境变量(强制CUDA11.8 + 4线程编译) export CUDA_HOME=/usr/local/cuda-11.8 export FORCE_CUDA=1 export MAX_JOBS=4 # 编译安装 pip install . --no-build-isolation # 验证 python -c "import pytorch3d; print('✅ pytorch3d 安装成功')"四、安装nvdiffrast
打开这个链接,浏览器会自动开始下载 nvdiffrast 压缩包: 👉 https://github.com/NVlabs/nvdiffrast/archive/refs/heads/master.zip
回到终端
# 1. 把下载好的 zip 包挪到当前目录(你下载的文件一般在 ~/下载/) mv ~/下载/nvdiffrast-master.zip . # 2. 解压 unzip nvdiffrast-master.zip # 3. 把文件夹重命名成项目需要的名字 nvdiffrast mv nvdiffrast-master nvdiffrast切到nvdiffrast-main文件夹里
cd ~/ros2_ws/FoundationPose/nvdiffrast-main ls修改setup.py文件成下面
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. # # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation # and any modifications thereto. Any use, reproduction, disclosure or # distribution of this software and related documentation without an express # license agreement from NVIDIA CORPORATION is strictly prohibited. import setuptools import os import torch from torch.utils.cpp_extension import BuildExtension, CUDAExtension # Print an error message if there's no PyTorch installed. #try: #from torch.utils.cpp_extension import BuildExtension, CUDAExtension #except ImportError: # This happens if the user runs 'pip install' with default build isolation # OR if they simply don't have torch installed at all. #print("\n\n" + "*" * 70) #print("ERROR! Cannot compile nvdiffrast CUDA extension. Please ensure that:\n") #print("1. You have PyTorch installed") #print("2. You run 'pip install' with --no-build-isolation flag") #print("*" * 70 + "\n\n") #exit(1) setuptools.setup( ext_modules=[ CUDAExtension( "_nvdiffrast_c", sources=[ "csrc/common/antialias.cu", "csrc/common/common.cpp", "csrc/common/cudaraster/impl/Buffer.cpp", "csrc/common/cudaraster/impl/CudaRaster.cpp", "csrc/common/cudaraster/impl/RasterImpl.cpp", "csrc/common/cudaraster/impl/RasterImpl_kernel.cu", "csrc/common/interpolate.cu", "csrc/common/rasterize.cu", "csrc/common/texture.cpp", "csrc/common/texture_kernel.cu", "csrc/torch/torch_antialias.cpp", "csrc/torch/torch_bindings.cpp", "csrc/torch/torch_interpolate.cpp", "csrc/torch/torch_rasterize.cpp", "csrc/torch/torch_texture.cpp", ], extra_compile_args={ "cxx": ["-DNVDR_TORCH"] # Disable warnings in torch headers. + (["/wd4067", "/wd4624", "/wd4996"] if os.name == "nt" else []), "nvcc": ["-DNVDR_TORCH", "-lineinfo"], }, ) ], cmdclass={"build_ext": BuildExtension}, )降级setuptools
conda activate ros2_tensorrt # 降级到 81.x(最后一个带 pkg_resources 的版本) pip install "setuptools<82" --force-reinstall 验证 pkg_resources python -c "import pkg_resources; print('✅ pkg_resources OK')"回去编译nvdiffrast
cd ~/ros2_ws/FoundationPose/nvdiffrast-main rm -rf build dist *.egg-info python setup.py build_ext --inplace pip install . --no-build-isolation #最后测试 python -c "import nvdiffrast; print('✅ nvdiffrast 安装成功')"最后在 ~/ros2_ws/FoundationPose 下验证
python -c "from estimater import FoundationPose; print('✅ FoundationPose 导入成功!可以跑6D位姿了!')"会看到
✅ FoundationPose 导入成功!可以跑6D位姿了!3.编译
编译如下:
cd ~/ros2_ws colcon build --packages-select camera_info_get source install/setup.bash ros2 run camera_info_get get_camera_info或者直接运行
conda activate ros2_tensorrt cd ~/ros2_ws source install/setup.bash python -m camera_info_get.get_camera_info