Modify_json_ImagePath
import json
import os
import shutil
from pathlib import Path
from typing import Union
def modify_image_paths (
base_dir: Union[ str , Path] ,
image_relative_path: str = "../images" ,
backup: bool = True ,
verbose: bool = True
) - > None :
"""
批量修改JSON文件中的imagePath字段
参数说明:
base_dir - 需要处理的根目录路径
image_relative_path - 图片新相对路径(相对于JSON文件)
backup - 是否创建备份文件(默认True)
verbose - 显示处理进度(默认True)
"""
base_dir = Path( base_dir)
if not base_dir. exists( ) :
raise FileNotFoundError( f"目录不存在: { base_dir} " )
backup_dir = base_dir. parent / f" { base_dir. name} _backup"
if backup:
backup_dir. mkdir( exist_ok= True )
json_files = list ( base_dir. glob( "**/*.json" ) )
total_files = len ( json_files)
for idx, json_path in enumerate ( json_files, 1 ) :
try :
if backup:
backup_path = backup_dir / json_path. relative_to( base_dir)
backup_path. parent. mkdir( parents= True , exist_ok= True )
shutil. copy2( json_path, backup_path)
with open ( json_path, 'r+' , encoding= 'utf-8' ) as f:
data = json. load( f)
if image_relative_path == "@filename" :
new_path = f" { json_path. stem} .jpg"
else :
new_path = Path( image_relative_path) / Path( data[ "imagePath" ] ) . name
data[ "imagePath" ] = str ( new_path) . replace( "\\" , "/" )
f. seek( 0 )
json. dump( data, f, indent= 4 , ensure_ascii= False )
f. truncate( )
if verbose:
print ( f"[ { idx} / { total_files} ] ✅ 已更新: { json_path} → { data[ 'imagePath' ] } " )
except KeyError:
print ( f"⚠️ 文件缺少imagePath字段: { json_path} " )
except json. JSONDecodeError as e:
print ( f"❌ JSON解析失败: { json_path} \n错误详情: { str ( e) } " )
except Exception as e:
print ( f"⛔ 未知错误: { type ( e) . __name__} - { str ( e) } " )
if __name__ == "__main__" :
INPUT_DIR = "/home/liweijia/Data/Mango_1000_Split/val/annotations"
NEW_RELATIVE_PATH = "@filename"
modify_image_paths(
base_dir= INPUT_DIR,
image_relative_path= NEW_RELATIVE_PATH,
backup= True ,
verbose= True
)
Coco_format
import os
import json
import shutil
from PIL import Image
data_images_dir = '/home/liweijia/Data/Mango_1000_Split/test/images'
data_annotations_dir = '/home/liweijia/Data/Mango_1000_Split/test/annotations'
output_dir = '/home/liweijia/Data/Mango_1000_Aug_COCO'
output_images_dir = os. path. join( output_dir, 'test/images' )
os. makedirs( output_images_dir, exist_ok= True )
coco_data = {
"images" : [ ] ,
"annotations" : [ ] ,
"categories" : [ ]
}
category_map = { }
category_id = 1
annotation_id = 1
image_id = 1
for json_file in os. listdir( data_annotations_dir) :
if not json_file. endswith( '.json' ) :
continue
json_path = os. path. join( data_annotations_dir, json_file)
with open ( json_path, 'r' ) as f:
data = json. load( f)
image_filename = data. get( 'imagePath' , json_file. replace( '.json' , '.jpg' ) )
image_path = os. path. join( data_images_dir, image_filename)
if not os. path. exists( image_path) :
print ( f"跳过 { image_filename} ,图片文件不存在" )
continue
with Image. open ( image_path) as img:
actual_width, actual_height = img. size
json_width = data. get( 'imageWidth' , actual_width)
json_height = data. get( 'imageHeight' , actual_height)
if ( json_width, json_height) != ( actual_width, actual_height) :
scale_x = actual_width / json_width
scale_y = actual_height / json_height
for shape in data[ 'shapes' ] :
shape[ 'points' ] = [ [ x * scale_x, y * scale_y] for ( x, y) in shape[ 'points' ] ]
coco_data[ 'images' ] . append( {
"id" : image_id,
"file_name" : image_filename,
"width" : actual_width,
"height" : actual_height
} )
shutil. copy( image_path, os. path. join( output_images_dir, image_filename) )
for shape in data[ 'shapes' ] :
label = shape[ 'label' ]
if label not in category_map:
category_map[ label] = category_id
category_id += 1
cat_id = category_map[ label]
points = shape[ 'points' ]
x_coords = [ p[ 0 ] for p in points]
y_coords = [ p[ 1 ] for p in points]
x_min, y_min = min ( x_coords) , min ( y_coords)
width = max ( x_coords) - x_min
height = max ( y_coords) - y_min
annotation = {
"id" : annotation_id,
"image_id" : image_id,
"category_id" : cat_id,
"segmentation" : [ sum ( points, [ ] ) ] ,
"bbox" : [ x_min, y_min, width, height] ,
"area" : width * height,
"iscrowd" : 0
}
coco_data[ 'annotations' ] . append( annotation)
annotation_id += 1
image_id += 1
for label, cat_id in category_map. items( ) :
coco_data[ 'categories' ] . append( {
"id" : cat_id,
"name" : label,
"supercategory" : "none"
} )
os. makedirs( output_dir, exist_ok= True )
with open ( os. path. join( output_dir, 'annotations.json' ) , 'w' ) as f:
json. dump( coco_data, f, indent= 2 )
print ( f" COCO 格式转换完成,共转换 { image_id - 1 } 张图片" )
Data_Augmentation
"""
Mango Dataset Augmentation with Original Preservation
Created: 2025-03-03
Author: AI Assistant
"""
import os
import json
import cv2
import shutil
import albumentations as A
import numpy as np
from datetime import datetime
from tqdm import tqdm
class Config :
image_dir = "/home/liweijia/Data/Mango_1000_Split/train/images"
json_dir = "/home/liweijia/Data/Mango_1000_Split/train/annotations"
dest_dir = "/home/liweijia/Data/Mango_1000_Aug/train"
augmentation_times = 7
min_polygon_area = 30
epsilon = 1e - 6
transform = A. Compose( [
A. HorizontalFlip( p= 0.5 ) ,
A. VerticalFlip( p= 0.4 ) ,
A. RandomRotate90( p= 0.3 ) ,
A. ShiftScaleRotate(
shift_limit= 0.03 ,
rotate_limit= 15 ,
interpolation= cv2. INTER_NEAREST,
border_mode= cv2. BORDER_REPLICATE,
p= 0.4
) ,
A. RandomBrightnessContrast( p= 0.3 ) ,
A. GaussianBlur( blur_limit= ( 2 , 4 ) , p= 0.3 ) ,
A. CLAHE( p= 0.2 ) ,
A. ColorJitter(
brightness= 0.1 ,
contrast= 0.1 ,
saturation= 0.05 ,
hue= 0.02 ,
p= 0.5
) ,
A. ToGray( p= 0.1 ) ,
A. ChannelShuffle( p= 0.05 )
] , keypoint_params= A. KeypointParams(
format = 'xy' ,
remove_invisible= False ,
angle_in_degrees= True
) )
def validate_polygon ( points: list , min_area: float ) - > bool :
"""验证多边形有效性(基于Shoelace公式)"""
if len ( points) < 3 :
return False
x = np. array( [ p[ 0 ] for p in points] )
y = np. array( [ p[ 1 ] for p in points] )
area = 0.5 * np. abs ( np. dot( x, np. roll( y, 1 ) ) - np. dot( y, np. roll( x, 1 ) ) )
return area >= min_area
def safe_convert ( value) :
"""安全转换numpy类型为Python原生类型"""
if isinstance ( value, np. generic) :
return value. item( )
return value
def process_annotation ( original_ann: dict , keypoints: list , new_size: tuple ) - > dict :
"""标注文件转换(包含类型转换和精度处理)"""
new_shapes = [ ]
idx = 0
new_size = ( int ( new_size[ 0 ] ) , int ( new_size[ 1 ] ) )
for shape in original_ann[ "shapes" ] :
if "shapes" not in original_ann:
print ( f"⚠️ 无效标注结构:缺少shapes字段" )
return None
if not isinstance ( original_ann[ "shapes" ] , list ) :
print ( f"⚠️ 无效标注结构:shapes字段非列表类型" )
return None
num_points = len ( shape[ "points" ] )
new_points = keypoints[ idx: idx + num_points]
idx += num_points
valid_points = [ ]
for p in new_points:
x = np. clip( p[ 0 ] , 0 , new_size[ 0 ] - 1 )
y = np. clip( p[ 1 ] , 0 , new_size[ 1 ] - 1 )
x = safe_convert( x)
y = safe_convert( y)
x = x if abs ( x) > Config. epsilon else 0.0
y = y if abs ( y) > Config. epsilon else 0.0
valid_points. append( [ x, y] )
if validate_polygon( valid_points, Config. min_polygon_area) :
new_shapes. append( {
"label" : shape[ "label" ] ,
"points" : valid_points,
"group_id" : safe_convert( shape. get( "group_id" ) ) ,
"shape_type" : "polygon" ,
"flags" : shape. get( "flags" , { } )
} )
return {
"version" : original_ann[ "version" ] ,
"flags" : original_ann[ "flags" ] ,
"imagePath" : original_ann[ "imagePath" ] ,
"imageData" : None ,
"imageHeight" : new_size[ 1 ] ,
"imageWidth" : new_size[ 0 ] ,
"shapes" : new_shapes
}
def main ( ) :
os. makedirs( os. path. join( Config. dest_dir, "images" ) , exist_ok= True )
os. makedirs( os. path. join( Config. dest_dir, "labels" , "json" ) , exist_ok= True )
image_files = [ f for f in os. listdir( Config. image_dir) if f. lower( ) . endswith( ( '.png' , '.jpg' , '.jpeg' ) ) ]
for image_file in tqdm( image_files) :
image_path = os. path. join( Config. image_dir, image_file)
json_base = os. path. splitext( image_file) [ 0 ]
json_path = os. path. join( Config. json_dir, json_base + ".json" )
if not os. path. exists( json_path) :
print ( f"⚠️ 标注文件缺失: { json_path} ,跳过该图像处理" )
continue
if os. path. getsize( json_path) == 0 :
print ( f"⛔ 空标注文件: { json_path} ,跳过处理" )
continue
image = cv2. cvtColor( cv2. imread( image_path) , cv2. COLOR_BGR2RGB)
if image is None :
print ( f"无法读取图像文件: { image_path} " )
continue
try :
with open ( json_path, 'r' , encoding= 'utf-8-sig' ) as f:
raw_content = f. read( ) . strip( )
if not raw_content:
raise ValueError( "文件内容为空" )
original_ann = json. loads( raw_content)
except json. JSONDecodeError as e:
print ( f"❌ JSON格式错误: { json_path} \n错误详情: { str ( e) } \n错误上下文: { raw_content[ : 200] } ..." )
continue
except Exception as e:
print ( f"❌ 未知错误: { json_path} \n错误类型: { type ( e) . __name__} \n错误信息: { str ( e) } " )
continue
original_keypoints = [ ]
for shape in original_ann[ "shapes" ] :
original_keypoints. extend( [ [ p[ 0 ] , p[ 1 ] ] for p in shape[ "points" ] ] )
for aug_idx in range ( Config. augmentation_times + 1 ) :
try :
if aug_idx == 0 :
transformed_image = image
transformed_keypoints = original_keypoints
else :
augmented = Config. transform( image= image, keypoints= original_keypoints)
transformed_image = augmented[ "image" ]
transformed_keypoints = augmented[ "keypoints" ]
new_ann = process_annotation(
original_ann= original_ann,
keypoints= transformed_keypoints,
new_size= ( transformed_image. shape[ 1 ] , transformed_image. shape[ 0 ] )
)
suffix = f"_ { aug_idx} " if aug_idx > 0 else ""
new_filename = os. path. splitext( image_file) [ 0 ] + suffix + ".jpg"
cv2. imwrite(
os. path. join( Config. dest_dir, "images" , new_filename) ,
cv2. cvtColor( transformed_image, cv2. COLOR_RGB2BGR)
)
new_ann[ "imagePath" ] = new_filename
with open ( os. path. join( Config. dest_dir, "labels" , "json" , new_filename. replace( ".jpg" , ".json" ) ) , "w" ) as f:
json. dump( new_ann, f, indent= 2 )
except Exception as e:
print ( f"Error processing { image_file} augmentation { aug_idx} : { str ( e) } " )
continue
if __name__ == "__main__" :
main( )
Data_split
import os
import random
import shutil
import time
data_images_dir = '/home/liweijia/Data/Mango_1000/images'
data_annotations_dir = '/home/liweijia/Data/Mango_1000/labels/json'
output_dir = '/home/liweijia/Data/Mango_1000_Split'
split_ratios = { 'train' : 0.7 , 'val' : 0.15 , 'test' : 0.15 }
random_seed = int ( time. time( ) * 1000 ) % 2 ** 32
random. seed( random_seed)
splits = [ 'train' , 'val' , 'test' ]
for split in splits:
os. makedirs( os. path. join( output_dir, split, 'images' ) , exist_ok= True )
os. makedirs( os. path. join( output_dir, split, 'annotations' ) , exist_ok= True )
all_json_files = [ f for f in os. listdir( data_annotations_dir) if f. endswith( '.json' ) ]
random. shuffle( all_json_files)
total = len ( all_json_files)
train_end = int ( total * split_ratios[ 'train' ] )
val_end = train_end + int ( total * split_ratios[ 'val' ] )
split_data = {
'train' : all_json_files[ : train_end] ,
'val' : all_json_files[ train_end: val_end] ,
'test' : all_json_files[ val_end: ]
}
for split, file_list in split_data. items( ) :
for json_file in file_list:
base_name = os. path. splitext( json_file) [ 0 ]
image_file = base_name + '.jpg'
src_image_path = os. path. join( data_images_dir, image_file)
src_json_path = os. path. join( data_annotations_dir, json_file)
dst_image_path = os. path. join( output_dir, split, 'images' , image_file)
dst_json_path = os. path. join( output_dir, split, 'annotations' , json_file)
if not os. path. exists( src_image_path) :
print ( f"警告:找不到图片文件 { src_image_path} ,跳过" )
continue
if not os. path. exists( src_json_path) :
print ( f"警告:找不到标注文件 { src_json_path} ,跳过" )
continue
shutil. copy( src_image_path, dst_image_path)
shutil. copy( src_json_path, dst_json_path)
print ( "数据划分完成 ✅" )
Modify_Image_size
from PIL import Image
import os
import json
def resize_images ( image_dir, json_file_path, expected_size) :
modified_images = [ ]
with open ( json_file_path, 'r' ) as json_file:
data = json. load( json_file)
if "images" not in data:
print ( "Error: JSON file does not contain 'images' key." )
return
for filename in os. listdir( image_dir) :
if filename. endswith( '.jpg' ) or filename. endswith( '.png' ) :
image_path = os. path. join( image_dir, filename)
image = Image. open ( image_path)
current_size = image. size
print ( f"Current size of { filename} : { current_size} " )
should_resize = False
for annotation in data[ "images" ] :
if annotation[ 'file_name' ] == filename:
if ( current_size[ 0 ] != expected_size[ 0 ] or current_size[ 1 ] != expected_size[ 1 ] or
annotation[ 'width' ] != expected_size[ 0 ] or annotation[ 'height' ] != expected_size[ 1 ] ) :
should_resize = True
break
if should_resize:
image = image. resize( expected_size, Image. BILINEAR)
image. save( image_path)
modified_images. append( filename)
print ( f"Resized image: { filename} from { current_size} to { expected_size} " )
for annotation in data[ "images" ] :
if annotation[ 'file_name' ] == filename:
annotation[ 'width' ] = expected_size[ 0 ]
annotation[ 'height' ] = expected_size[ 1 ]
print ( f"Updated annotation for { filename} : width= { expected_size[ 0 ] } , height= { expected_size[ 1 ] } " )
with open ( json_file_path, 'w' ) as json_file:
json. dump( data, json_file, indent= 4 )
print ( f"Updated JSON file: { json_file_path} " )
if not modified_images:
print ( "No images were modified." )
else :
print ( f"Total modified images: { len ( modified_images) } " )
image_directory = '/home/liweijia/SparseInst/MangoGroup_Augmented_Data_split/val/images'
json_file_path = '/home/liweijia/SparseInst/MangoGroup_Augmented_Data_split/val/annotations/instances_val.json'
expected_dimensions = ( 3072 , 4096 )
resize_images( image_directory, json_file_path, expected_dimensions)
RegisterDataSet
from detectron2. data import DatasetCatalog, MetadataCatalog
import torch
def register_coco_dataset ( name, json_file, image_root) :
"""
注册 COCO 数据集。
Args:
name (str): 数据集名称。
json_file (str): COCO 格式的 JSON 文件路径。
image_root (str): 图像文件夹路径。
"""
from detectron2. data. datasets. coco import load_coco_json
DatasetCatalog. register( name, lambda : load_coco_json( json_file, image_root, name) )
metadata = MetadataCatalog. get( name)
metadata. set ( thing_classes= [ "Branch" , "Mango" ] )
metadata. evaluator_type = "coco"
def register_datasets ( ) :
"""
注册所有数据集。
"""
register_coco_dataset( "my_dataset_train" ,
"/home/liweijia/Data/Mango_Group_Aug_V2_COCO/train/annotations.json" ,
"/home/liweijia/Data/Mango_Group_Aug_V2_COCO/train/images" )
register_coco_dataset( "my_dataset_test" ,
"/home/liweijia/Data/Mango_Group_Aug_V2_COCO/test/annotations.json" ,
"/home/liweijia/Data/Mango_Group_Aug_V2_COCO/test/images" )
register_coco_dataset( "my_dataset_val" ,
"/home/liweijia/Data/Mango_Group_Aug_V2_COCO/val/annotations.json" ,
"/home/liweijia/Data/Mango_Group_Aug_V2_COCO/val/images" )
Calculate_Image
import os
image_dir = "/home/liweijia/Data/Mango_1000/images"
image_files = [ f for f in os. listdir( image_dir) if f. endswith( ( ".jpg" , ".png" , ".jpeg" ) ) ]
print ( f"文件夹中共有 { len ( image_files) } 张图片。" )
Calculate_categories_of_image
import os
import json
from collections import defaultdict
def get_classes_for_image ( image_id, annotations, categories) :
category_ids = { annotation[ 'category_id' ] for annotation in annotations if annotation[ 'image_id' ] == image_id}
class_names = { cat[ 'name' ] for cat in categories if cat[ 'id' ] in category_ids}
return class_names
def check_class_consistency ( data_dir, annotations_file) :
with open ( annotations_file, 'r' ) as f:
annotations_data = json. load( f)
categories = annotations_data[ 'categories' ]
annotations = annotations_data[ 'annotations' ]
class_counts = defaultdict( int )
for image_info in annotations_data[ 'images' ] :
image_id = image_info[ 'id' ]
image_name = image_info[ 'file_name' ]
image_path = os. path. join( data_dir, image_name)
classes = get_classes_for_image( image_id, annotations, categories)
num_classes = len ( classes)
class_counts[ num_classes] += 1
print ( f"Image: { image_path} , Number of classes: { num_classes} " )
print ( "\nClass counts:" )
for num_classes, count in class_counts. items( ) :
print ( f"Number of classes: { num_classes} , Count: { count} " )
if len ( class_counts) > 1 :
print ( "\nWarning: There are inconsistent class counts in the dataset." )
else :
print ( "\nAll images have the same number of classes." )
data_directory = '/home/liweijia/SparseInst/MangoGroup_Augmented_Data_split/val/images'
annotations_file = '/home/liweijia/SparseInst/MangoGroup_Augmented_Data_split/val/annotations/instances_val.json'
check_class_consistency( data_directory, annotations_file)
Warmup_Cosine
from torch. optim. lr_scheduler import CosineAnnealingLR, _LRScheduler
from detectron2. solver import build_lr_scheduler
class WarmupCosineAnnealingLR ( _LRScheduler) :
def __init__ ( self, optimizer, max_iters, warmup_iters, warmup_factor, last_epoch= - 1 ) :
self. max_iters = max_iters
self. warmup_iters = warmup_iters
self. warmup_factor = warmup_factor
super ( ) . __init__( optimizer, last_epoch)
def get_lr ( self) :
if self. last_epoch < self. warmup_iters:
alpha = self. last_epoch / self. warmup_iters
return [ base_lr * self. warmup_factor * ( 1 - alpha) + alpha * base_lr for base_lr in self. base_lrs]
else :
return [ base_lr * ( 1 + math. cos( math. pi * ( self. last_epoch - self. warmup_iters) / ( self. max_iters - self. warmup_iters) ) ) / 2 for base_lr in self. base_lrs]
def build_warmup_cosine_scheduler ( cfg, optimizer) :
return WarmupCosineAnnealingLR(
optimizer,
max_iters= cfg. SOLVER. MAX_ITER,
warmup_iters= cfg. SOLVER. WARMUP_ITERS,
warmup_factor= cfg. SOLVER. WARMUP_FACTOR,
)