|
|
|
@ -1,4 +1,6 @@ |
|
|
|
|
import argparse |
|
|
|
|
import random |
|
|
|
|
import copy |
|
|
|
|
import os |
|
|
|
|
import os.path as osp |
|
|
|
|
from glob import glob |
|
|
|
@ -198,6 +200,20 @@ def create_file_list(file_list, path_tuples, sep=' '): |
|
|
|
|
f.write(line + '\n') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_label_list(label_list, labels): |
|
|
|
|
""" |
|
|
|
|
Create label list. |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
|
label_list (str): Path of label list to create. |
|
|
|
|
labels (list[str]|tuple[str]]): Label names. |
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
with open(label_list, 'w') as f: |
|
|
|
|
for label in labels: |
|
|
|
|
f.write(label + '\n') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def link_dataset(src, dst): |
|
|
|
|
""" |
|
|
|
|
Make a symbolic link to a dataset. |
|
|
|
@ -211,5 +227,57 @@ def link_dataset(src, dst): |
|
|
|
|
raise ValueError(f"{dst} exists and is not a directory.") |
|
|
|
|
elif not osp.exists(dst): |
|
|
|
|
os.makedirs(dst) |
|
|
|
|
src = osp.realpath(src) |
|
|
|
|
name = osp.basename(osp.normpath(src)) |
|
|
|
|
os.symlink(src, osp.join(dst, name), target_is_directory=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def random_split(samples, |
|
|
|
|
ratios=(0.7, 0.2, 0.1), |
|
|
|
|
inplace=True, |
|
|
|
|
drop_remainder=False): |
|
|
|
|
""" |
|
|
|
|
Randomly split the dataset into two or three subsets. |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
|
samples (list): All samples of the dataset. |
|
|
|
|
ratios (tuple[float], optional): If the length of `ratios` is 2, |
|
|
|
|
the two elements indicate the ratios of samples used for training |
|
|
|
|
and evaluation. If the length of `ratios` is 3, the three elements |
|
|
|
|
indicate the ratios of samples used for training, validation, and |
|
|
|
|
testing. Defaults to (0.7, 0.2, 0.1). |
|
|
|
|
inplace (bool, optional): Whether to shuffle `samples` in place. |
|
|
|
|
Defaults to True. |
|
|
|
|
drop_remainder (bool, optional): Whether to discard the remaining samples. |
|
|
|
|
If False, the remaining samples will be included in the last subset. |
|
|
|
|
For example, if `ratios` is (0.7, 0.1) and `drop_remainder` is False, |
|
|
|
|
the two subsets after splitting will contain 70% and 30% of the samples, |
|
|
|
|
respectively. Defaults to False. |
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
if not inplace: |
|
|
|
|
samples = copy.deepcopy(samples) |
|
|
|
|
|
|
|
|
|
if len(samples) == 0: |
|
|
|
|
raise ValueError("There are no samples!") |
|
|
|
|
|
|
|
|
|
if len(ratios) not in (2, 3): |
|
|
|
|
raise ValueError("`len(ratios)` must be 2 or 3!") |
|
|
|
|
|
|
|
|
|
random.shuffle(samples) |
|
|
|
|
|
|
|
|
|
n_samples = len(samples) |
|
|
|
|
acc_r = 0 |
|
|
|
|
st_idx, ed_idx = 0, 0 |
|
|
|
|
splits = [] |
|
|
|
|
for r in ratios: |
|
|
|
|
acc_r += r |
|
|
|
|
ed_idx = round(acc_r * n_samples) |
|
|
|
|
splits.append(samples[st_idx:ed_idx]) |
|
|
|
|
st_idx = ed_idx |
|
|
|
|
|
|
|
|
|
if ed_idx < len(ratios) and not drop_remainder: |
|
|
|
|
# Append remainder to the last split |
|
|
|
|
splits[-1].append(splits[ed_idx:]) |
|
|
|
|
|
|
|
|
|
return splits |