import os
import shutil
import random
def numebr_of_files(folder1, folder2):
if len(os.listdir(folder1))==len(os.listdir(folder2)):
return len(os.listdir(folder2))
else :
#输出两个文件夹中文件的数量
print('folder1:',len(os.listdir(folder1)))
print('folder2:',len(os.listdir(folder2)))
def get_file_extensions(folder_path):
#输出文件夹中所有后缀名
#如果只有一种则以字符串形式输出一种
#如果有多种,则输出一个列表
#如果其中包含文件夹,则文件夹会以('')显示
#集合会自动去重且无序
extensions = set()
for file_name in os.listdir(folder_path):
_, extension = os.path.splitext(file_name)
extensions.add(extension)
if len(extensions) == 1:
for x in extensions:
return x
else :
return [x for x in extensions if x!='']
def delete_different_files(folder1, folder2):
#将两个文件夹中名称不同的文件删除掉
#后缀名可以不同,例如jpg与txt
files1 = set([os.path.splitext(file)[0] for file in os.listdir(folder1)])
files2 = set([os.path.splitext(file)[0] for file in os.listdir(folder2)])
different_files = files1.symmetric_difference(files2)
# print(different_files)
# print(len(different_files))
for file in different_files:
file_path1 = os.path.join(folder1, file+get_file_extensions(folder1))
file_path2 = os.path.join(folder2, file+get_file_extensions(folder2))
# print(file_path1)
# print(file_path2)
if os.path.isfile(file_path1):
os.remove(file_path1)
if os.path.isfile(file_path2):
os.remove(file_path2)
def copy_img_and_label(img_list,img_dir,label_dir,img_save_dir,label_save_dir):
# 根据文件名称将对应的img与label复制到指定文件夹
for img_name in img_list:
name = os.path.splitext(img_name)[0]
# print(name)
label_name = name + get_file_extensions(label_dir)
img_path = os.path.join(img_dir,img_name)
label_path = os.path.join(label_dir,label_name)
img_save_path = os.path.join(img_save_dir,img_name)
label_save_path = os.path.join(label_save_dir,label_name)
if os.path.exists(label_path):
shutil.copyfile(label_path, label_save_path)
shutil.copyfile(img_path, img_save_path)
def delete_img_and_label_from_tar_dir(img_dir,label_dir,img_tar_dir,label_tar_dir):
#将指定文件夹中的图像,从目标文件夹中删除
# img_dir = r'F:\workspace\result\preson_iou'
# tar_img_dir = r'F:\workspace\result\preson\定机位摄像头数据集\images'
# tar_label_dir = r'F:\workspace\result\preson\定机位摄像头数据集\labels'
img_list = os.listdir(img_dir)
# n=0
# for dir_name in dir_list:
# img_dir = os.path.join(img_dir,dir_name)
# img_dir = os.path.join(img_dir,'images')
# label_dir = os.path.join(img_dir,dir_name)
# label_dir = os.path.join(label_dir,'labels')
# # print(os.path.isdir(img_dir))
# # print(os.path.isdir(label_dir))
# img_list = os.listdir(img_dir)
# # print(len(img_list))
for img_name in img_list:
name = os.path.splitext(img_name)[0]
label_name = name.split('.')[0] + get_file_extensions(label_dir)
img_path = os.path.join(img_dir,img_name)
label_path = os.path.join(label_dir,label_name)
img_tar_path = os.path.join(img_tar_dir,img_name)
label_tar_path = os.path.join(label_tar_dir,label_name)
if os.path.exists(label_path)==os.path.exists(label_tar_path) and os.path.exists(img_path)==os.path.exists(img_tar_path):
# print(label_path)
# print(label_tar_path)
# n+=1
os.remove(label_tar_path)
os.remove(img_tar_path)
# print(n)
def create_datasets(img_dir,label_dir,save_dir,test=False):
#创建训练验证测试集文件夹
img_train_path = save_dir + '/images/train'
img_val_path = save_dir + '/images/val'
label_train_path = save_dir + '/labels/train'
label_val_path = save_dir + '/labels/val'
os.makedirs(img_train_path, exist_ok=True)
os.makedirs(img_val_path, exist_ok=True)
os.makedirs(label_train_path, exist_ok=True)
os.makedirs(label_val_path, exist_ok=True)
if test == True:
img_test_path = save_dir + '/images/test'
label_test_path = save_dir + '/labels/test'
os.makedirs(img_test_path, exist_ok=True)
os.makedirs(label_test_path, exist_ok=True)
img_list = os.listdir(img_dir)
print('The nuber of dataset is:',len(img_list))
#直接打乱,前面不要等于某个变量
random.seed(0)
random.shuffle(img_list)
# 划分数据集
if test == True:
train_size = int(0.7 * len(img_list)) # 训练集占总数据集的70%
val_size = int(0.2 * len(img_list)) # 验证集占总数据集的20%
# test_size = len(img_list) - train_size - val_size # 测试集占总数据集的剩余部分
train_img_list = img_list[:train_size]
val_img_list = img_list[train_size:train_size+val_size]
test_img_list = img_list[train_size+val_size:]
copy_img_and_label(train_img_list,img_dir,label_dir,img_train_path,label_train_path)
copy_img_and_label(val_img_list,img_dir,label_dir,img_val_path,label_val_path)
copy_img_and_label(test_img_list,img_dir,label_dir,img_test_path,label_test_path)
print('The number of train is:',numebr_of_files(img_train_path,label_train_path))
print('The number of val is:',numebr_of_files(img_val_path,label_val_path))
print('The number of test is:',numebr_of_files(img_test_path,label_test_path))
else:
# 计算划分的索引
train_size = int(0.7 * len(img_list)) # 训练集占总数据集的70%
# val_size = int(0.3 * len(img_list)) # 验证集占总数据集的20%
# 划分数据集
train_img_list = img_list[:train_size]
val_img_list = img_list[train_size:]
copy_img_and_label(train_img_list,img_dir,label_dir,img_train_path,label_train_path)
copy_img_and_label(val_img_list,img_dir,label_dir,img_val_path,label_val_path)
print('The number of train is:',numebr_of_files(img_train_path,label_train_path))
print('The number of test is:',numebr_of_files(img_val_path,label_val_path))
if __name__ == '__main__':
# 指定两个文件夹的路径
# folder1 = '/home/ai/workspace_hxy/project/yolov5/datasets/person_detect3/images/val'
# folder2 = '/home/ai/workspace_hxy/project/yolov5/datasets/preson_iou/dataset/images/val'
# # # # # 计算两个文件夹中文件数量
# n = numebr_of_files(folder1, folder2)
# print(n)
# # 调用函数删除名称不同的文件
# delete_different_files(folder1, folder2)
# #将图片与对应(同名)标签复制到指定文件夹
# img_dir = r"/home/ai/workspace_hxy/project/yolov5/datasets/preson_iou/person60/images" # 原文件夹路径
# label_dir = r'/home/ai/workspace_hxy/project/yolov5/datasets/preson_iou/person60/labels' # txt 路径
img_dir = r"/home/ai/workspace_hxy/data/camus_single/images" # 原文件夹路径
label_dir = r'/home/ai/workspace_hxy/data/camus_single/labels' # txt 路径
save_dir =r'/home/ai/workspace_hxy/data/camus_datasets/camus_single' # 保存地址
create_datasets(img_dir,label_dir,save_dir,test=False)
# # create_datasets(img_dir,label_dir,save_dir,test=True)
# #删除相同的文件
# img_dir = r'/home/ai/workspace_hxy/project/yolov5/datasets/preson_iou/all/images'
# label_dir = r'/home/ai/workspace_hxy/project/yolov5/datasets/preson_iou/all/labels'
# img_tar_dir = r'/home/ai/workspace_hxy/project/yolov5/datasets/person_detect2/images/val'
# label_tar_dir = r'/home/ai/workspac