SMILE / dynamic_utils.py
fmthoker's picture
Upload 26 files
4940c8b verified
raw
history blame
5.46 kB
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import numpy as np
from typing import List
def sample_key_frames(num_frames: int,
key_frame_probs: List[float]) -> np.ndarray:
""" Sample the indices of key frames.
Args:
num_frames (int): number of frames in whole video
key_frame_probs (List[float]): the sampling probability of how many
key frames will be sampled. The sum of this array should be 1.0.
Returns:
frame_inds (np.ndarray): key frame index, in range
of [0, num_frames - 1]. Note that the first frame and the
last frame will always be key frames.
Examples:
>>> sample_key_frames(16, [1.0, ])
np.ndarray([0, 15])
>>> sample_key_frames(16, [0.5, 0.5])
np.ndarray([0, 15])
np.ndarray([0, 7, 15])
np.ndarray([0, 8, 15])
np.ndarray([0, 15])
"""
# how many key frames
num_key_frames = np.random.choice(len(key_frame_probs), p=key_frame_probs)
# if there is no inner key frame, we will directly
# sample the first frame and the last frame.
if num_key_frames == 0:
return np.array([0, num_frames - 1], dtype=np.int32)
avg_duration = num_frames / (num_key_frames + 1)
ticks = np.array([int(avg_duration * i)
for i in range(1, num_key_frames + 1)], dtype=np.int32)
# add random jitter
jitter_range = int(avg_duration / 3)
if jitter_range > 0:
jitter = np.random.randint(-jitter_range,
jitter_range, size=len(ticks))
else:
jitter = np.zeros((len(ticks),), np.int32)
ticks = ticks + jitter
# add the first frame and last frame
ticks = np.concatenate((ticks, np.array([0, num_frames - 1])), axis=0)
# remove duplication and sort array
ticks = np.sort(np.unique(ticks))
return ticks
def extend_key_frame_to_all(array: np.ndarray,
key_frame_inds: np.ndarray,
interpolate: str = 'uniform') -> np.ndarray:
""" Interpolate the values between key frames.
This function is used in some data augmentations for video clips. For
example, we first decide the color distortion values in some key frames,
then we can interpolate the values in the rest of frames. This strategy
will make the data augmentations more smooth over the entire video clip.
Args:
array (np.ndarray): The values in the key frames, in shape of [K, *]
key_frame_inds (np.ndarray): the frame index list of key frames, in
shape of [K, ]
interpolate (str): interpolation type. 'uniform' means the linear
interpolation; 'accelerate' means the constant acceleration.
'decelerate' means the reverse order of 'accelerate'.
Returns:
out_array (np.ndarray): the interpolated values, in shape of [N, *].
N denotes the value of key_frame_inds[-1].
Examples:
>>> values = np.array([0.0, 5.0])
>>> inds = np.array([0, 10])
>>> extend_key_frame_to_all(values, inds)
array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. ])
>>> extend_key_frame_to_all(values, inds, 'accelerate')
array([0. , 0.05, 0.2 , 0.45, 0.8 , 1.25, 1.8 , 2.45, 3.2 , 4.05, 5.])
"""
def _uniform_interpolate(start_state, end_state, index_delta):
delta_state = (end_state - start_state) * (1.0 / index_delta)
return np.concatenate([start_state + _ * delta_state
for _ in range(index_delta+1)], axis=0)
def _accelerate_interpolate(start_state, end_state, index_delta):
a = 2 * (end_state - start_state) / (index_delta ** 2)
return np.concatenate([start_state + 0.5 * a * (_**2)
for _ in range(index_delta+1)], axis=0)
def _decelerate_interpolate(start_state, end_state, index_delta):
a = 2 * (start_state - end_state) / (index_delta ** 2)
return np.concatenate([end_state + 0.5 * a * ((index_delta-_)**2)
for _ in range(index_delta+1)], axis=0)
assert key_frame_inds[0] == 0 and key_frame_inds[-1] > 0
num_key_frames = len(key_frame_inds)
assert num_key_frames == len(array)
num_frames = key_frame_inds[-1] + 1
out_array = np.zeros((num_frames, ) + array.shape[1:], dtype=array.dtype)
for i in range(num_key_frames - 1):
# fill the values between i -> i+1
st_idx, end_idx = key_frame_inds[i:i+2]
if interpolate == 'uniform':
inter_func = _uniform_interpolate
elif interpolate == 'accelerate':
inter_func = _accelerate_interpolate
elif interpolate == 'decelerate':
inter_func = _decelerate_interpolate
elif interpolate == 'random':
inter_index = np.random.choice(3, p=[0.7, 0.15, 0.15])
if inter_index == 0:
inter_func = _uniform_interpolate
elif inter_index == 1:
inter_func = _accelerate_interpolate
else:
inter_func = _decelerate_interpolate
else:
raise NotImplementedError
i_out = inter_func(array[i:i+1],
array[i+1:i+2],
end_idx - st_idx)
out_array[st_idx:end_idx+1] = i_out
return out_array