import h5py import re import random def update_suffix(original_string, increment): updated_string = re.sub(r'(\d+)$', lambda x: str(int(x.group(1)) + increment), original_string) return updated_string def merge(output_file, input_files, total_size, truncate_len=-1): numbers = list(range(total_size)) random.shuffle(numbers) with h5py.File(output_file, 'w') as h5out: h5out_data = h5out.create_group('data') i = 0 for input_file in input_files: with h5py.File(input_file, 'r') as f: d = f['data'] for key in d: new_key = f"demo_{numbers[i]}" print(new_key) if isinstance(d[key], h5py.Group): d[key].copy(d[key], h5out_data, name=new_key) elif isinstance(d[key], h5py.Dataset): h5out_data.create_dataset(key, data=d[key][:]) i+=1 if truncate_len: if i == truncate_len: break print(len(h5out_data)) with h5py.File(input_files[0], 'r') as f: d1 = f['data'] if "env_args" in d1.attrs: h5out_data.attrs["env_args"] = d1.attrs["env_args"] def print_hdf5_structure(file_path): def recursively_print(group, indent=0): for key in group: item = group[key] if isinstance(item, h5py.Group): print(" " * indent + f"Group: {key}") recursively_print(item, indent + 1) elif isinstance(item, h5py.Dataset): print(" " * indent + f"Dataset: {key}, Shape: {item.shape}, Type: {item.dtype}") with h5py.File(file_path, 'r') as f: print(f"File: {file_path}") recursively_print(f) dataset = f["data"] if "env_args" in dataset.attrs: env_args = dataset.attrs["env_args"] print(f"env_args: {env_args}") # input_files = ["/home/siweih/Project/EmbodiedBM/equidiff/data/robomimic/datasets/square_d2/square_d2.hdf5","/home/siweih/Project/EmbodiedBM/equidiff/mix_4000.hdf5"] # demo_num = 5000 # output_file = f"mix_{demo_num}.hdf5" # merge(output_file, input_files, demo_num) print_hdf5_structure("/home/siweih/Project/EmbodiedBM/mimicgen/core_datasets/square/demo_src_square_task_D2/demo.hdf5")