SMILE / masking_generator.py

Upload 26 files

4940c8b verified 3 months ago

9.96 kB

	import numpy as np
	import random
	import ast

	class TubeMaskingGenerator:
	def __init__(self, input_size, mask_ratio):
	self.frames, self.height, self.width = input_size
	self.num_patches_per_frame = self.height * self.width
	self.total_patches = self.frames * self.num_patches_per_frame
	self.num_masks_per_frame = int(mask_ratio * self.num_patches_per_frame)
	self.total_masks = self.frames * self.num_masks_per_frame

	def __repr__(self):
	repr_str = "Maks: total patches {}, mask patches {}".format(
	self.total_patches, self.total_masks
	)
	return repr_str

	def __call__(self):
	mask_per_frame = np.hstack([
	np.zeros(self.num_patches_per_frame - self.num_masks_per_frame),
	np.ones(self.num_masks_per_frame),
	])
	np.random.shuffle(mask_per_frame)
	mask = np.tile(mask_per_frame, (self.frames,1)).flatten()
	return mask


	class TubeletMaskingGenerator:
	def __init__(self, input_size, mask_ratio, visible_frames, mask_type="tube", traj_unmask_ratio=0.1):
	self.tube_masking_generator = TubeMaskingGenerator(input_size, mask_ratio)
	self.frames, self.height, self.width = input_size
	self.num_patches_per_frame = self.height * self.width
	self.total_patches = self.frames * self.num_patches_per_frame
	self.num_masks_per_frame = int(mask_ratio * self.num_patches_per_frame)
	self.total_masks = self.frames * self.num_masks_per_frame
	self.patch_size = 16
	self.traj_unmask_ratio = traj_unmask_ratio
	if visible_frames is not None:
	visible_list = ast.literal_eval(visible_frames)
	self.visible_frames = [int(element) for element in visible_list]
	else:
	self.visible_frames = None

	self.mask_type = mask_type

	def _balance_num_masks(self, combined_mask,
	unmasked_object_patches_index,
	unmasked_non_object_patches_index,
	masked_object_patches_index,
	tube_masked_index=None,
	tube_unmasked_index=None):
	current_masks = np.sum(combined_mask)
	num_diff = np.abs(self.total_masks - current_masks)

	if tube_masked_index is None or tube_unmasked_index is None:
	# tubelet masking without tube mask
	# if too many masked patches, we unmask some patches
	if current_masks > self.total_masks:
	picked_index = masked_object_patches_index[np.random.choice(masked_object_patches_index.size, size=int(num_diff), replace=False)]
	combined_mask[picked_index] = 0.
	# if too few masked patches, we first try to mask non-object patches, if not enough, then we mask protected patches
	elif current_masks < self.total_masks:
	if num_diff <= len(unmasked_non_object_patches_index):
	picked_index = unmasked_non_object_patches_index[np.random.choice(unmasked_non_object_patches_index.size, size=int(num_diff), replace=False)]
	combined_mask[picked_index] = 1.
	else:
	combined_mask[unmasked_non_object_patches_index] = 1.
	picked_index = unmasked_object_patches_index[np.random.choice(unmasked_object_patches_index.size, size=int(num_diff - len(unmasked_non_object_patches_index)), replace=False)]
	combined_mask[picked_index] = 1.
	else:
	# if too many masked patches, we first try to unmask tube masked patches, if not enough, then we unmask object patches
	tube_masked_non_object_index = np.array(list(set(tube_masked_index) - set(masked_object_patches_index) - set(unmasked_object_patches_index)))
	if current_masks > self.total_masks:
	if num_diff <= len(tube_masked_non_object_index):
	picked_index = tube_masked_non_object_index[np.random.choice(tube_masked_non_object_index.size, size=int(num_diff), replace=False)]
	combined_mask[picked_index] = 0.
	else:
	combined_mask[tube_masked_non_object_index] = 0.
	picked_index = masked_object_patches_index[np.random.choice(masked_object_patches_index.size, size=int(num_diff - len(tube_masked_non_object_index)), replace=False)]
	combined_mask[picked_index] = 0.
	# if too few masked patches, we first try to mask non-object patches, if not enough, then we mask protected patches
	elif current_masks < self.total_masks:
	tube_unmasked_non_object_index = np.array(list(set(tube_unmasked_index) - set(masked_object_patches_index) - set(unmasked_object_patches_index)))
	if num_diff <= len(tube_unmasked_non_object_index):
	picked_index = tube_unmasked_non_object_index[np.random.choice(tube_unmasked_non_object_index.size, size=int(num_diff), replace=False)]
	combined_mask[picked_index] = 1.
	else:
	combined_mask[tube_unmasked_non_object_index] = 1.
	picked_index = unmasked_object_patches_index[np.random.choice(unmasked_object_patches_index.size, size=int(num_diff - len(tube_unmasked_non_object_index)), replace=False)]
	combined_mask[picked_index] = 1.

	balanced_mask = combined_mask
	return balanced_mask

	def __repr__(self):
	repr_str = "Maks: total patches {}, mask patches {}".format(
	self.total_patches, self.total_masks
	)
	return repr_str

	# 1 in mask array means masked, 0 means unmasked
	def __call__(self, traj_rois):
	# generate original VideoMAE tube mask and intialize the tube mask index
	tube_mask = self.tube_masking_generator()
	tube_masked_index = None
	tube_unmasked_index = None

	# initialize mask
	num_tubelet, num_frame, box = traj_rois.shape
	assert num_frame % 2 == 0 and self.frames == (num_frame // 2)
	combined_mask = np.zeros((num_frame // 2, self.height, self.width))
	# assume patch size is (2, 16, 16) so mask shape should be (8, 14, 14)
	# we combine the traj_rois of two consecutive frames to one large traj_rois

	# pick one tubelet that is not masked
	if self.visible_frames is None:
	picked_frame = np.random.randint(0, (num_frame // 2))
	picked_list = [picked_frame]
	else:
	picked_list = self.visible_frames

	# combined mask 1 means object patches that should be masked, 2 means object patches that should not be masked, 0 means non-object patches
	for roi_idx, roi in enumerate(traj_rois):
	for i in range(num_frame // 2):
	min_x = min( (roi[2 * i][0], roi[2 * i + 1][0]) )
	max_x = max( (roi[2 * i][2], roi[2 * i + 1][2]) )
	min_y = min( (roi[2 * i][1], roi[2 * i + 1][1]) )
	max_y = max( (roi[2 * i][3], roi[2 * i + 1][3]) )

	patch_index_x_min = max( int(np.floor(min_x / self.patch_size)), 0)
	patch_index_x_max = min( int(np.ceil(max_x / self.patch_size)) + 1, 14)
	patch_index_y_min = max( int(np.floor(min_y / self.patch_size)), 0)
	patch_index_y_max = min( int(np.ceil(max_y / self.patch_size)) + 1, 14)

	if i in picked_list:
	combined_mask[i][patch_index_y_min:patch_index_y_max, patch_index_x_min:patch_index_x_max] = 2.
	else:
	combined_mask[i][patch_index_y_min:patch_index_y_max, patch_index_x_min:patch_index_x_max] = 1.

	combined_mask = combined_mask.flatten()
	masked_object_patches_index = np.where(combined_mask == 1.)[0]
	unmasked_non_object_patches_index = np.where(combined_mask == 0.)[0]
	unmasked_object_patches_index = np.where(combined_mask == 2.)[0]
	combined_mask[unmasked_object_patches_index] = 0.

	tube_masked_index = np.where(tube_mask == 1.)[0]
	tube_unmasked_index = np.where(tube_mask == 0.)[0]

	# combine tubelet mask and tube mask
	combined_mask = np.bitwise_or(combined_mask.astype(bool), tube_mask.astype(bool)).astype(np.float32)

	if self.mask_type == "tube+picked_frame_visible":
	# unmasked the protected patches
	combined_mask[unmasked_object_patches_index] = 0.

	elif self.mask_type == "tube+traj_mask":
	# get index of unmasked traj patches
	traj_unmask_ratio = self.traj_unmask_ratio
	traj_patches_index = np.array(list(set(masked_object_patches_index) \| set(unmasked_object_patches_index)))
	unmasked_traj_patches_index = traj_patches_index[np.random.choice(traj_patches_index.size, size=int(traj_unmask_ratio * len(traj_patches_index)), replace=False)]

	# mask the whole traj
	combined_mask[traj_patches_index] = 1.
	# unmask those selected patches
	combined_mask[unmasked_traj_patches_index] = 0.

	# update indexes
	unmasked_object_patches_index = unmasked_traj_patches_index
	masked_object_patches_index = np.array(list(set(traj_patches_index) - set(unmasked_traj_patches_index)))


	# balance masked patch number
	mask = self._balance_num_masks(combined_mask,
	unmasked_object_patches_index,
	unmasked_non_object_patches_index,
	masked_object_patches_index,
	tube_masked_index,
	tube_unmasked_index)


	assert np.sum(mask) == self.total_masks
	return mask