Equidiff / equidiff /equi_diffpo /scripts /download_datasets.py

mimicgen

c1f1d32 7 months ago

3.26 kB

	# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	#
	# Licensed under the NVIDIA Source Code License [see LICENSE for details].

	"""
	Script to download datasets packaged with the repository.
	"""
	import os
	import argparse

	import mimicgen_envs
	import mimicgen_envs.utils.file_utils as FileUtils
	from mimicgen_envs import DATASET_REGISTRY


	if __name__ == "__main__":
	parser = argparse.ArgumentParser()

	# directory to download datasets to
	parser.add_argument(
	"--download_dir",
	type=str,
	default=None,
	help="Base download directory. Created if it doesn't exist. Defaults to datasets folder in repository.",
	)

	# dataset type to download datasets for
	parser.add_argument(
	"--dataset_type",
	type=str,
	default="core",
	choices=list(DATASET_REGISTRY.keys()),
	help="Dataset type to download datasets for (e.g. source, core, object, robot, large_interpolation). Defaults to core.",
	)

	# tasks to download datasets for
	parser.add_argument(
	"--tasks",
	type=str,
	nargs='+',
	default=["square_d0"],
	help="Tasks to download datasets for. Defaults to square_d0 task. Pass 'all' to download all tasks\
	for the provided dataset type or directly specify the list of tasks.",
	)

	# dry run - don't actually download datasets, but print which datasets would be downloaded
	parser.add_argument(
	"--dry_run",
	action='store_true',
	help="set this flag to do a dry run to only print which datasets would be downloaded"
	)

	args = parser.parse_args()

	# set default base directory for downloads
	default_base_dir = args.download_dir
	if default_base_dir is None:
	default_base_dir = "data/robomimic/datasets"

	# load args
	download_dataset_type = args.dataset_type
	download_tasks = args.tasks
	if "all" in download_tasks:
	assert len(download_tasks) == 1, "all should be only tasks argument but got: {}".format(args.tasks)
	download_tasks = list(DATASET_REGISTRY[download_dataset_type].keys())
	else:
	for task in download_tasks:
	assert task in DATASET_REGISTRY[download_dataset_type], "got unknown task {} for dataset type {}. Choose one of {}".format(task, download_dataset_type, list(DATASET_REGISTRY[download_dataset_type].keys()))

	# download requested datasets
	for task in download_tasks:
	download_dir = os.path.abspath(os.path.join(default_base_dir, task))
	download_path = os.path.join(download_dir, "{}.hdf5".format(task))
	print("\nDownloading dataset:\n dataset type: {}\n task: {}\n download path: {}"
	.format(download_dataset_type, task, download_path))
	url = DATASET_REGISTRY[download_dataset_type][task]["url"]
	if args.dry_run:
	print("\ndry run: skip download")
	else:
	# Make sure path exists and create if it doesn't
	os.makedirs(download_dir, exist_ok=True)
	print("")
	FileUtils.download_url_from_gdrive(
	url=url,
	download_dir=download_dir,
	check_overwrite=True,
	)
	print("")