Vision) 3d 모델을 이용해 무한의 dataset을 생성할 수 있을까

카테고리 없음

Vision) 3d 모델을 이용해 무한의 dataset을 생성할 수 있을까

MightyTedKim 2025. 5. 26. 02:32

728x90

모든 AI 분야에서는 학습 데이터셋을 준비하는게 큰 과제입니다

Vision 분야에서도 데이터셋을 만들 방법이 Gann등 여러가지가 있지만
3D 데이터를 이용해 2D 이미지 셋을 만드는 방법에 대해 고민해보았습니다

가설

3D 데이터로 생성한 Vision Datset을 이용해, 현실 세계에서도 탐지가 되는지 확인한다.

조건

2GB 이내 3D 데이터 -> 개인 컴퓨터로 진행하기 때문에 용량이 너무 크면 인식을 못할 듯

1. 데이터 준비

* 3D 데이터셋

이름: 금제 관모
형식: FBX
크기: 1.4GB
출처: https://digital.khs.go.kr/heritage/heriTage.do

2D 데이터셋
- 비슷하게 생겼지만 약간 다른 관모, 이걸 구분할 수 있을지 테스트해보려고 합니다

2. 3D -> 2D 변환

금제 관모는 일단 홈페이지에서 다운받으니 1.39GB 입니다.

FBX 는 파일 1개 44MB인데
Texture가 1.4GB나 됩니다.금색 질감을 표현하는데 엄청 용량이 큰가봅니다.

어쩃든 웹페이지 간단한걸 만들어봅니다. 대충 각도를 설정하고 여러 장 캡처하고 zip파일을 만듦니다.

불빛 각도 등 세부 조정을 따로 백엔드에서 돌려야겠어요.
저장하기로한 이미지의 *5 를 자동으로 돌려줍니다.
1이면 5장, 2면 10장, 100이면 500장 (알아서 각도 계산해서 쭉 돌려줍니다)

500장을 캡처했습니다.

- 이제 데이터셋을 만들어줍니다
classification을 하려면 제가 만든 캡처 이미지만으로는 충분하겠지만
저는 obb (사각형)을 만들거에요

SAM2를 이용해서 라벨을 생성해줍니다
bbox와 좌표를 이용하는 방법이 있는데요

저는 좌표를 선택하는 방식을 이용했습니다
검은색이 아닌 부분의 가장 중간 좌표를 구해서, 그걸 찍어서 테두리를 구했어요
저의 방식에서는 그게 맞으니까요

```

import numpy as np

import cv2

import matplotlib.pyplot as plt

import json

from pathlib import Path

from datetime import datetime

# === SETUP ===

IMAGE_DIR = Path("images/sample")

OUTPUT_JSON = Path("labels/sample/coco_format.json")

OUTPUT_JSON.parent.mkdir(parents=True, exist_ok=True)

image_paths = sorted(IMAGE_DIR.glob("*.*"))

# === Assume predictor is already initialized ===

# from segment_anything import SamPredictor, sam_model_registry

# sam = sam_model_registry["vit_h"](checkpoint="sam_vit_h.pth").to("cuda")

# predictor = SamPredictor(sam)

def mask_to_segmentation(mask):

mask_uint8 = mask.astype(np.uint8)

contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

if not contours:

return []

largest = max(contours, key=cv2.contourArea)

return largest.flatten().astype(float).tolist()

# === COCO 기본 구조 ===

coco = {

"info": {

"description": "SAM2 Batch Dataset",

"version": "1.0",

"year": 2024,

"contributor": "You",

"date_created": datetime.today().strftime('%Y-%m-%d')

"licenses": [{

"id": 1,

"name": "CC-BY-4.0",

"url": "https://creativecommons.org/licenses/by/4.0/"

}],

"images": [],

"annotations": [],

"categories": [{

"id": 0,

"name": "object",

"supercategory": "none"

}]

}

# === ID 카운터 ===

image_id = 1

annotation_id = 1

# === LOOP OVER IMAGES ===

for image_path in image_paths:

image = cv2.imread(str(image_path))

if image is None:

print(f"⚠️ Skipping unreadable: {image_path.name}")

continue

h, w = image.shape[:2]

predictor.set_image(image)

input_point = np.array([[1055, 655]]) # fixed input point or dynamically generated

input_label = np.array([1]) # foreground

masks, scores, _ = predictor.predict(

point_coords=input_point,

point_labels=input_label,

multimask_output=True,

)

sorted_idx = np.argsort(scores)[::-1]

best_mask = masks[sorted_idx[0]]

best_score = scores[sorted_idx[0]]

segmentation = mask_to_segmentation(best_mask)

if not segmentation:

print(f"❌ No valid mask for: {image_path.name}")

continue

x, y, w_box, h_box = cv2.boundingRect(best_mask.astype(np.uint8))

bbox = [x, y, w_box, h_box]

# === VISUALIZATION ===

plt.figure(figsize=(8, 8))

plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

plt.imshow(best_mask, alpha=0.5, cmap='Reds')

plt.scatter(input_point[:, 0], input_point[:, 1],

c='lime', s=200, marker='*', label='Input Point')

plt.gca().add_patch(plt.Rectangle((x, y), w_box, h_box,

edgecolor='yellow', linewidth=2, fill=False))

plt.title(f"{image_path.name} | Score: {best_score:.3f}")

plt.axis("off")

plt.legend()

plt.show()

# === COCO 저장 ===

coco["images"].append({

"id": image_id,

"file_name": image_path.name,

"width": w,

"height": h,

"license": 1,

"date_captured": datetime.today().strftime('%Y-%m-%d')

})

coco["annotations"].append({

"id": annotation_id,

"image_id": image_id,

"category_id": 0,

"bbox": bbox,

"area": w_box * h_box,

"segmentation": [segmentation],

"iscrowd": 0,

"score": float(best_score)

})

image_id += 1

annotation_id += 1

# === SAVE COCO JSON ===

with open(OUTPUT_JSON, "w") as f:

json.dump(coco, f, indent=2)

print(f"✅ COCO format saved to: {OUTPUT_JSON.resolve()}")

```

이제 반복문을 돌려줍니다

import json

import cv2

import numpy as np

import matplotlib.pyplot as plt

from pathlib import Path

from collections import defaultdict

def draw_coco_annotations(image_folder: str, coco_json_path: str, show_all=False):

"""

Draw COCO-format annotations (bbox + segmentation + label) on top of images.

Works with matplotlib and shows labels clearly.

"""

image_dir = Path(image_folder)

json_path = Path(coco_json_path)

with open(json_path, 'r') as f:

coco = json.load(f)

image_id_map = {img["id"]: img for img in coco["images"]}

image_to_anns = defaultdict(list)

for ann in coco["annotations"]:

image_to_anns[ann["image_id"]].append(ann)

category_id_to_name = {cat["id"]: cat["name"] for cat in coco["categories"]}

for image_id, image_info in image_id_map.items():

file_name = image_info["file_name"]

img_path = image_dir / file_name

img = cv2.imread(str(img_path))

if img is None:

print(f"[SKIP] {file_name} not found.")

continue

img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

plt.figure(figsize=(10, 10))

plt.imshow(img_rgb)

ax = plt.gca()

for ann in image_to_anns[image_id]:

cat_id = ann["category_id"]

cat_name = category_id_to_name.get(cat_id, str(cat_id))

# Draw bbox

x, y, w, h = ann["bbox"]

rect = plt.Rectangle((x, y), w, h, linewidth=2,

edgecolor='lime', facecolor='none')

ax.add_patch(rect)

# Draw label

ax.text(x, y - 5, cat_name,

color='lime', fontsize=10,

bbox=dict(facecolor='black', alpha=0.5, pad=2))

# Draw segmentation

if "segmentation" in ann and ann["segmentation"]:

seg = ann["segmentation"][0]

pts = np.array(seg).reshape(-1, 2)

ax.plot(pts[:, 0], pts[:, 1], color='red', linewidth=2)

plt.title(file_name)

plt.axis("off")

plt.show()

if not show_all:

break

draw_coco_annotations(

image_folder="images/sample",

coco_json_path="labels/sample/coco_format.json",

show_all=True # or False to view just the first

)

json 파일 읽어줍니다

이제 학습만하면 되겠군요

728x90

저작자표시 (새창열림)

현재글Vision) 3d 모델을 이용해 무한의 dataset을 생성할 수 있을까

기록의 공간 :: mightytedkim

Vision) 3d 모델을 이용해 무한의 dataset을 생성할 수 있을까

1. 데이터 준비

'카테고리 없음'의 다른글

티스토리툴바