【免费】COCO格式数据集处理需要的各种脚本文件资源-CSDN文库

共15个文件

py：7个

xml：5个

gitignore：1个

数据集

需积分: 0 88 浏览量 2023-06-21 14:21:32 上传评论收藏 12KB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

COCO格式数据集处理.zip （15个子文件）

rename.py 1KB

repath.py 795B

png2jpg.py 655B

.idea

.name 10B

DataProcessing.iml 334B

workspace.xml 8KB

misc.xml 198B

inspectionProfiles

Project_Default.xml 3KB

profiles_settings.xml 174B

modules.xml 287B

.gitignore 50B

relabel.py 1KB

sumcoco.py 6KB

delabel.py 1KB

labelme2coco.py 9KB

# 命令行执行： python labelme2coco.py --input_dir original --output_dir coco --labels label.txt # 输出文件夹必须为空文件夹 # -*- coding:utf-8 -*- import argparse import collections import datetime import glob import json import os import os.path as osp import sys import uuid import imgviz import numpy as np import labelme from sklearn.model_selection import train_test_split try: import pycocotools.mask except ImportError: print("Please install pycocotools:\n\n pip install pycocotools\n") sys.exit(1) def to_coco(args, label_files, train): # 创建总标签data now = datetime.datetime.now() data = dict( info=dict( description=None, url=None, version=None, year=now.year, contributor=None, date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"), ), licenses=[dict(url=None, id=0, name=None, )], images=[ # license, url, file_name, height, width, date_captured, id ], type="instances", annotations=[ # segmentation, area, iscrowd, image_id, bbox, category_id, id ], categories=[ # supercategory, id, name ], ) # 创建一个 {类名 : id} 的字典，并保存到总标签data 字典中。 class_name_to_id = {} for i, line in enumerate(open(args.labels).readlines()): class_id = i - 1 # starts with -1 class_name = line.strip() # strip() 方法用于移除字符串头尾指定的字符(默认为空格或换行符)或字符序列。 if class_id == -1: assert class_name == "__ignore__" # background:0, class1:1, ,, continue class_name_to_id[class_name] = class_id data["categories"].append( dict(supercategory=None, id=class_id, name=class_name, ) ) # out_ann_file = osp.join(args.output_dir, "annotations", "instances.json") if train: out_ann_file = osp.join(args.output_dir, "annotations", "instances_train.json") else: out_ann_file = osp.join(args.output_dir, "annotations", "instances_val.json") for image_id, filename in enumerate(label_files): label_file = labelme.LabelFile(filename=filename) t1 = args.input_dir + "/" + osp.basename(filename) with open(t1, 'r', encoding='utf-8') as jf: info = json.load(jf) suffix = osp.splitext(info['imagePath'])[-1] base = osp.splitext(osp.basename(filename))[0] # 文件名不带后缀 if train: out_img_file = osp.join(args.output_dir, "train", base + suffix) else: out_img_file = osp.join(args.output_dir, "val", base + suffix) print("| ", out_img_file) # ************************** 对图片的处理开始 ******************************************* # 将标签文件对应的图片进行保存到对应的文件夹。train保存到 train/ test保存到 val/ img = labelme.utils.img_data_to_arr(label_file.imageData) # .json文件中包含图像，用函数提出来 imgviz.io.imsave(out_img_file, img) # 将图像保存到输出路径 # ************************** 对图片的处理结束 ******************************************* # ************************** 对标签的处理开始 ******************************************* data["images"].append( dict( license=0, url=None, file_name=osp.relpath(out_img_file, osp.dirname(out_ann_file)), height=img.shape[0], width=img.shape[1], date_captured=None, id=image_id, ) ) masks = {} # for area segmentations = collections.defaultdict(list) # for segmentation for shape in label_file.shapes: points = shape["points"] label = shape["label"] group_id = shape.get("group_id") shape_type = shape.get("shape_type", "polygon") mask = labelme.utils.shape_to_mask( img.shape[:2], points, shape_type ) if group_id is None: group_id = uuid.uuid1() instance = (label, group_id) if instance in masks: masks[instance] = masks[instance] | mask else: masks[instance] = mask if shape_type == "rectangle": (x1, y1), (x2, y2) = points x1, x2 = sorted([x1, x2]) y1, y2 = sorted([y1, y2]) points = [x1, y1, x2, y1, x2, y2, x1, y2] else: points = np.asarray(points).flatten().tolist() segmentations[instance].append(points) segmentations = dict(segmentations) for instance, mask in masks.items(): cls_name, group_id = instance if cls_name not in class_name_to_id: continue cls_id = class_name_to_id[cls_name] mask = np.asfortranarray(mask.astype(np.uint8)) mask = pycocotools.mask.encode(mask) area = float(pycocotools.mask.area(mask)) bbox = pycocotools.mask.toBbox(mask).flatten().tolist() data["annotations"].append( dict( id=len(data["annotations"]), image_id=image_id, category_id=cls_id, segmentation=segmentations[instance], area=area, bbox=bbox, iscrowd=0, ) ) # ************************** 对标签的处理结束 ******************************************* # ************************** 可视化的处理开始 ******************************************* ''' if not args.noviz: labels, captions, masks = zip( *[ (class_name_to_id[cnm], cnm, msk) for (cnm, gid), msk in masks.items() if cnm in class_name_to_id ] ) viz = imgviz.instances2rgb( image=img, labels=labels, masks=masks, captions=captions, font_size=15, line_width=2, ) out_viz_file = osp.join( args.output_dir, "visualization", base + ".jpg" ) imgviz.io.imsave(out_viz_file, viz) ''' # ************************** 可视化的处理结束 ******************************************* with open(out_ann_file, "w") as f: # 将每个标签文件汇总成data后，保存总标签data文件 json.dump(data, f) # 主程序执行 def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument("--input_dir", help="input annotated directory") parser.add_argument("--output_dir", help="output dataset directory") parser.add_argument("--labels", help="labels file", required=True) parser.add_argument("--noviz", help="no visualization", action="store_true") args = parser.parse_args() if osp.exists(args.output_dir): print("Output directory already exists:", args.output_dir) sys.exit(1) os.makedirs(args.output_dir) print("| Creating dataset dir:", args.output_dir) # if not args.noviz: # os.makedirs(osp.join(args.output_dir, "visualization")) # 创建保存的文件夹 if not os.path.exists(osp.join(args.output_dir, "annotations")): os.makedirs(osp.join(args.output_dir, "annotations")) if not os.pat

评论收藏

内容反馈