import os import glob import shutil from tutils import tfilename HOME_PATH="/quanquan/datasets/" def check_existing(img_path, label_path): if os.path.exists(img_path) and os.path.exists(label_path): return True else: if not os.path.exists(img_path): print("IMAGE Not exist: ", img_path) if not os.path.exists(label_path): print("LABEL Not exist: ", label_path) return False def get_availabel_files(names, label_names): llist = [[n,n2] for n,n2 in zip(names, label_names) if check_existing(n, n2)] names = [n[0] for n in llist] label_names = [n[1] for n in llist] return names, label_names def write_txt(img_paths, label_paths, meta_info, split, writing_mode='a+'): dataset_id = meta_info['dataset_id'] assert split in ['train', 'val', 'test'], f" split in ['train', 'val', 'test'] , but Got {split}" save_path = meta_info["save_txt_path"].replace("_train.txt", f"_{split}.txt") count = 0 with open(save_path, writing_mode) as f: for p1, p2 in zip(img_paths, label_paths): p1 = p1.replace(meta_info['home_path'], "") p2 = p2.replace(meta_info['home_path'], "") line = f"{p1}\t{p2}\n" # print(line, end=" ") f.write(line) count += 1 if count <= 0: raise ValueError(f"ID: {meta_info['dataset_id']}, \tTask: {meta_info['dataset_name']}\t, {count} files are writen.") print(f"ID: {meta_info['dataset_id']}, \tTask: {meta_info['dataset_name']}\t, {count} files are writen.\t Writing Over! write into ", save_path) def organize_in_nnunet_style(meta_info): dirpath = os.path.join(meta_info['home_path'], meta_info['dirpath']) if os.path.exists(os.path.join(dirpath, "imagesTr")) and os.path.exists(os.path.join(dirpath, "labelsTr")): img_paths = glob.glob(os.path.join(dirpath, "imagesTr", "*.nii.gz")) img_paths.sort() label_paths = [p.replace("imagesTr", "labelsTr")[:-12]+".nii.gz" for p in img_paths] img_paths, label_paths = get_availabel_files(img_paths, label_paths) write_txt(img_paths, label_paths, meta_info=meta_info, split='train', writing_mode="a+") if os.path.exists(os.path.join(dirpath, "imagesVa")) and os.path.exists(os.path.join(dirpath, "labelsVa")): img_paths = glob.glob(os.path.join(dirpath, "imagesVa", "*.nii.gz")) img_paths.sort() label_paths = [p.replace("imagesVa", "labelsVa")[:-12]+".nii.gz" for p in img_paths] img_paths, label_paths = get_availabel_files(img_paths, label_paths) write_txt(img_paths, label_paths, meta_info=meta_info, split='val', writing_mode="a+") if os.path.exists(os.path.join(dirpath, "imagesTs")) and os.path.exists(os.path.join(dirpath, "labelsTs")): img_paths = glob.glob(os.path.join(dirpath, "imagesTs", "*.nii.gz")) img_paths.sort() label_paths = [p.replace("imagesTs", "labelsTs")[:-12]+".nii.gz" for p in img_paths] img_paths, label_paths = get_availabel_files(img_paths, label_paths) write_txt(img_paths, label_paths, meta_info=meta_info, split='test', writing_mode="a+") def organize_in_style2(meta_info): dirpath = os.path.join(meta_info['home_path'], meta_info['dirpath']) if os.path.exists(os.path.join(dirpath, "imagesTr")) and os.path.exists(os.path.join(dirpath, "labelsTr")): img_paths = glob.glob(os.path.join(dirpath, "imagesTr", "*.nii.gz")) img_paths.sort() label_paths = [p.replace("imagesTr", "labelsTr") for p in img_paths] img_paths, label_paths = get_availabel_files(img_paths, label_paths) write_txt(img_paths, label_paths, meta_info=meta_info, split='train', writing_mode="a+") if os.path.exists(os.path.join(dirpath, "imagesVa")) and os.path.exists(os.path.join(dirpath, "labelsVa")): img_paths = glob.glob(os.path.join(dirpath, "imagesVa", "*.nii.gz")) img_paths.sort() label_paths = [p.replace("imagesVa", "labelsVa") for p in img_paths] img_paths, label_paths = get_availabel_files(img_paths, label_paths) write_txt(img_paths, label_paths, meta_info=meta_info, split='val', writing_mode="a+") if os.path.exists(os.path.join(dirpath, "imagesTs")) and os.path.exists(os.path.join(dirpath, "labelsTs")): img_paths = glob.glob(os.path.join(dirpath, "imagesTs", "*.nii.gz")) img_paths.sort() label_paths = [p.replace("imagesTs", "labelsTs") for p in img_paths] img_paths, label_paths = get_availabel_files(img_paths, label_paths) write_txt(img_paths, label_paths, meta_info=meta_info, split='test', writing_mode="a+") def organize_by_names(names_in, label_names_in, meta_info): assert len(names_in) > 0, f"Meta info: {meta_info}" names, label_names = get_availabel_files(names_in, label_names_in) assert len(names) > 0, f"Meta info: {meta_info}, \n {names_in[:2]} \n {label_names_in[:2]}" assert len(label_names) > 0, f"Meta info: {meta_info}, \n {names_in[:2]} \n {label_names_in[:2]}" # print("debug files", len(names)) if len(names) > 10: num_valid = min(int(len(names) // 10), 10) # print("num valid", num_valid) train_names = names[:-num_valid*2] valid_names = names[-num_valid*2:-num_valid] test_names = names[-num_valid:] train_labels = label_names[:-num_valid*2] valid_labels = label_names[-num_valid*2:-num_valid] test_labels = label_names[-num_valid:] write_txt(train_names, train_labels, meta_info=meta_info, split="train") write_txt(valid_names, valid_labels, meta_info=meta_info, split="val") write_txt(test_names, test_labels, meta_info=meta_info, split="test") else: write_txt(names, label_names, meta_info=meta_info, split="train") def clear_files(train_path): if os.path.exists(train_path): parent, name = os.path.split(train_path) shutil.move(train_path, tfilename(parent, "misc", name)) val_path = train_path.replace("_train.txt", "_val.txt") if os.path.exists(val_path): parent, name = os.path.split(val_path) shutil.move(val_path, os.path.join(parent, "misc", name)) test_path = train_path.replace("_train.txt", "_test.txt") if os.path.exists(test_path): parent, name = os.path.split(test_path) shutil.move(test_path, os.path.join(parent, "misc", name)) print("Files cleared!") # from tutils.nn.data import read # def convert_to_nii(paths): ################################################################################### ################################################################################### def get_BCV_Abdomen(save_path=None): meta_info = { "dataset_name": "BTCV", "dataset_id": "01", "modality": "CT", "home_path": HOME_PATH, "dirpath": "01_BCV-Abdomen/Training/", "save_txt_path": save_path, } names = glob.glob(os.path.join(meta_info['home_path'], meta_info['dirpath'], "img/*.nii.gz")) names.sort() label_names = [p.replace("img", "label") for p in names] organize_by_names(names, label_names, meta_info=meta_info) def get_AbdomenCT_1K(save_path): meta_info = { "dataset_name": "AbdomenCT-1K", "dataset_id": "08", "modality": "CT", "home_path": HOME_PATH, "dirpath": "08_AbdomenCT-1K", "save_txt_path": save_path, } # print(names) organize_in_nnunet_style(meta_info=meta_info) def get_AMOS(save_path): meta_info = { "dataset_name": "AMOS", "dataset_id": "09", "modality": "CT", "home_path": HOME_PATH, "dirpath": "09_AMOS", "save_txt_path": save_path, } organize_in_style2(meta_info) def get_MSD(save_path): meta_info = { "dataset_name": "MSD", # Decathlon "dataset_id": "10", "modality": "CT", "home_path": HOME_PATH, "parent_dirpath": "10_Decathlon", "dirpath": "", "save_txt_path": save_path, } subtasks = ["Task06_Lung", "Task08_HepaticVessel", "Task09_Spleen", "Task10_Colon"] for task in subtasks: # print("Processing ", task) meta_info_subtask = { "dataset_name":task, "dataset_id": f"{meta_info['dataset_id']}_{task[4:6]}", "home_path":HOME_PATH, "dirpath": f"{meta_info['parent_dirpath']}/{task}", "save_txt_path": save_path, } # print(meta_info_subtask) organize_in_style2(meta_info=meta_info_subtask) def get_MSD_MRI(save_path): meta_info = { "dataset_name": "MSD", # Decathlon "dataset_id": "10", "modality": "MRI", "home_path": HOME_PATH, "parent_dirpath": "10_Decathlon", "dirpath": "", "save_txt_path": save_path, } subtasks = ["Task02_Heart", "Task05_Prostate"] for task in subtasks: # print("Processing ", task) meta_info_subtask = { "dataset_name":task, "dataset_id": f"{meta_info['dataset_id']}_{task[4:6]}", "home_path":HOME_PATH, "dirpath": f"{meta_info['parent_dirpath']}/{task}", "save_txt_path": save_path, } # print(meta_info_subtask) organize_in_style2(meta_info=meta_info_subtask) def get_ASOCA(save_path): meta_info = { "dataset_name": "ASOCA", "dataset_id": "51", "modality": "CT", "home_path": HOME_PATH, "dirpath": "51_ASOCA", "save_txt_path": save_path, } names = glob.glob(os.path.join(meta_info['home_path'], meta_info['dirpath'], "image/*.nii.gz")) names.sort() label_names = [p.replace("/image", "/label") for p in names] # print(os.path.join(meta_info['home_path'], meta_info['dirpath'], "image/*.nii.gz") # print("debug ,", names) organize_by_names(names, label_names, meta_info=meta_info) def get_BCV_Cervix(save_path): meta_info = { "dataset_name": "BCV-Cervix", "dataset_id": "52", "modality": "CT", "home_path": HOME_PATH, "dirpath": "52_BCV-Cervix/Training/", "save_txt_path": save_path, } names = glob.glob(os.path.join(meta_info['home_path'], meta_info['dirpath'], "img/*.nii.gz")) names.sort() label_names = [p.replace("/img/", "/label/").replace("-Image", "-Mask") for p in names] organize_by_names(names, label_names, meta_info=meta_info) def get_NIHPancrease(save_path): meta_info = { "dataset_name": "NIHPancrease", "dataset_id": "53", "modality": "CT", "home_path": HOME_PATH, "dirpath": "53_NIHPancrease", "save_txt_path": save_path, } names = glob.glob(os.path.join(meta_info['home_path'], meta_info['dirpath'], "data/*.nii.gz") ) names.sort() label_names = [p.replace("/data/PANCREAS_", "/label/label") for p in names] organize_by_names(names, label_names, meta_info=meta_info) def get_CTPelvic(save_path): meta_info = { "dataset_name": "CTPelvic1K", "dataset_id": "54", "modality": "CT", "home_path": HOME_PATH, "dirpath": "54_CTPelvic1K", "save_txt_path": save_path, } names = [] names += glob.glob(os.path.join(meta_info['home_path'], meta_info['dirpath'], "CTPelvic1K_dataset1_data/*.nii.gz")) names += glob.glob(os.path.join(meta_info['home_path'], meta_info['dirpath'], "CTPelvic1K_dataset2_data/*.nii.gz")) names += glob.glob(os.path.join(meta_info['home_path'], meta_info['dirpath'], "CTPelvic1K_dataset3_data/*.nii.gz")) names += glob.glob(os.path.join(meta_info['home_path'], meta_info['dirpath'], "CTPelvic1K_dataset4_data/*.nii.gz")) names += glob.glob(os.path.join(meta_info['home_path'], meta_info['dirpath'], "CTPelvic1K_dataset5_data/*.nii.gz")) names += glob.glob(os.path.join(meta_info['home_path'], meta_info['dirpath'], "CTPelvic1K_dataset7_data/*.nii.gz")) names.sort() # xx_data.nii.gz xx_mask_4label.nii.gz label_names = [p.replace("_data/", "_mask/").replace("_data.nii.gz", "_mask_4label.nii.gz") for p in names] organize_by_names(names, label_names, meta_info=meta_info) def get_FLARE(save_path): meta_info = { "dataset_name": "FLARE", "dataset_id": "55", "modality": "CT", "home_path": HOME_PATH, "dirpath": "55_FLARE22Train", "save_txt_path": save_path, "class": ['liver', 'right kidney', 'spleen', 'pancrease', 'aorta','postcava','right adrenal gland','left darenal gland','gallbladder','esophagus','stomach','duodenum','left kidney'], } organize_in_nnunet_style(meta_info=meta_info) # def get_HAN(save_path): # meta_info = { # "dataset_name": "Head-and-neck", # "dataset_id": "56", # "modality": "CT", # "home_path": HOME_PATH, # "dirpath": "56_Head-and-Neck-challenge", # "save_txt_path": save_path, # } # names = glob.glob(os.path.join(meta_info['home_path'], meta_info['dirpath'], "data/*.nii.gz")) # names.sort() # label_names = [p.replace("/data/", "/label/") for p in names] # organize_by_names(names, label_names, meta_info=meta_info) # def get_StructSeg(save_path): # meta_info = { # "dataset_name": "StructSeg2019", # "dataset_id": "57", # "modality": "CT", # "home_path": HOME_PATH, # "dirpath": "57_StructSeg", # "save_txt_path": save_path, # } # names = glob.glob(os.path.join(meta_info['home_path'], meta_info['dirpath'], "HaN_OAR/data/*")) # names = [f"{name}/data.nii.gz" for name in names] # names.sort() # label_names = [p.replace("/data.nii.gz", "/label.nii.gz") for p in names] # organize_by_names(names, label_names, meta_info=meta_info) def get_CHAOS(save_path): meta_info = { "dataset_name": "CHAOS", "dataset_id": "58", "modality": "MRI", "home_path": HOME_PATH, "dirpath": "58_CHAOST2/chaos_MR_T2_normalized/", "save_txt_path": save_path, "class": ["liver", "right kidney", "left kidney", "spleen"], } names = glob.glob(os.path.join(meta_info['home_path'], meta_info['dirpath'], "image*.nii.gz")) names.sort() label_names = [p.replace("/image_", "/label_") for p in names] organize_by_names(names, label_names, meta_info=meta_info) def get_SABS(save_path): meta_info = { "dataset_name": "SABS", # BTCV ? "dataset_id": "59", "modality": "CT", "home_path": HOME_PATH, "dirpath": "59_SABS/sabs_CT_normalized/", "save_txt_path": save_path, "class": ["spleen", "right kidney", "left kidney", "gallbladder", "esophagus", "liver", "stomach", "aorta", "postcava", "portal vein and splenic vein", "pancrease", "right adrenal gland", "left adrenal gland"], } names = glob.glob(os.path.join(meta_info['home_path'], meta_info['dirpath'], "image_*.nii.gz")) names.sort() label_names = [p.replace("/image_", "/label_") for p in names] organize_by_names(names, label_names, meta_info=meta_info) def get_Totalseg(save_path): meta_info = { "dataset_name": "Totalseg", "dataset_id": "60", "modality": "CT", "home_path": HOME_PATH, # "dirpath": "nnUNet_raw/Dataset101_Totalseg", "dirpath": "60_Totalseg", "save_txt_path": save_path, "class": [], } organize_in_nnunet_style(meta_info=meta_info) def get_WORD(save_path): meta_info = { "dataset_name": "WORDs", # BTCV ? "dataset_id": "07", "modality": "CT", "home_path": HOME_PATH, "dirpath": "07_WORD/WORD-V0.1.0/", "save_txt_path": save_path, } organize_in_style2(meta_info=meta_info) def generate_all(): save_path="./datasets/dataset_list/all_train.txt" clear_files(save_path) get_BCV_Abdomen(save_path) get_AbdomenCT_1K(save_path) get_AMOS(save_path) get_MSD(save_path) # get_ASOCA() # get_BCV_Cervix() # # get_NIHPancrease() # bug in data ? # get_CTPelvic() # get_FLARE() # get_SABS() def generate_their(): save_path="./datasets/dataset_list/their_train.txt" clear_files(save_path) save_path="./datasets/dataset_list/their_train.txt" get_BCV_Abdomen(save_path) get_AbdomenCT_1K(save_path) get_AMOS(save_path) get_MSD(save_path) def generate_ours(): save_path="./datasets/dataset_list/ours_train.txt" get_ASOCA(save_path) get_BCV_Cervix(save_path) # get_NIHPancrease() # bug in data ? get_CTPelvic(save_path) get_FLARE(save_path) get_SABS(save_path) def generate_alp_dataset(): save_path = "./datasets/dataset_list/alp_train.txt" clear_files(save_path) get_SABS(save_path) get_CHAOS(save_path) if __name__ == "__main__": print(__file__) # generate_alp_dataset() save_path ="./datasets/dataset_list/totalseg_train.txt" clear_files(save_path) get_Totalseg(save_path) # save_path="./datasets/dataset_list/word_train.txt" print("Over")