written by hyojung chang
1. Define the custom Dataset according to the required structure by Torchvision
1-1) Parse one annotation of image from annotation.csv file
def parse_one_annot(filepath, filename): # Load image and check position and classname of RoI # At this time, convert classname to label(integer type).
# The reason it starts from 1 is that 0 is set as the label of the background.
# 1 is covid-19, 2 is nodule and 3 is cancer. data = pd.read_csv(filepath) boxes_array = data[data["filename"] == filename][["minX", "minY", "maxX", "maxY"]].values for i in range(len(boxes_array)) : minX = boxes_array[i, 0] minY = boxes_array[i, 1] maxX = boxes_array[i, 2] maxY = boxes_array[i, 3] classnames = data[data["filename"] == filename][["classname"]] classes = [] for i in range(len(classnames)) : if classnames.iloc[i, 0] == 'covid-19' : classes.append(1) elif classnames.iloc[i, 0] == 'nodule' : classes.append(2) elif classnames.iloc[i, 0] == 'cancer' : classes.append(3) return boxes_array, classes |
1-2) Define our custom Dataset class
root : path where images are stored
df_path : path where annotation.csv file is stored
class OpenDataset(torch.utils.data.Dataset): # Class for creating dataset and importing dataset into the Datalader # Transforms means whether or not the image is preprocessed (left/right transform, etc.) def __init__(self, root, df_path, transforms=None): self.root = root self.transforms = transforms self.df = df_path names = pd.read_csv(df_path)[['filename']] names = names.drop_duplicates() self.imgs = list(np.array(names['filename'].tolist())) def __getitem__(self, idx): # Load image and check image information img_path = os.path.join(self.root, self.imgs[idx]) if img_path.split('.')[-1] != 'png' : img_path += '.png' img = Image.open(img_path).convert("RGB") box_list, classes = parse_one_annot(self.df, self.imgs[idx]) # Convert to format suitable for learning(torch.tensor type) boxes = torch.as_tensor(box_list, dtype=torch.float32) labels = torch.as_tensor(classes, dtype=torch.int64) image_id = torch.tensor([idx]) # area means the area corresponding to RoI area_list = [(i[2] - i[0]) * (i[3] - i[1]) for i in box_list] areas = torch.as_tensor(area_list, dtype=torch.float32) # whether the roi is hidden from others # 0 if hidden, 1 if not iscrowd = torch.zeros((len(boxes),), dtype=torch.int64) target = {} target["boxes"] = boxes target["labels"] = labels target["image_id"] = image_id target["area"] = areas target["iscrowd"] = iscrowd if self.transforms is not None: return img, target def __len__(self): return len(self.imgs) |
1-3) Instantiate our training and validation data classes and assign them to Dataloader
dataset_train = OpenDataset(train_root,'/content/drive/My Drive/test/train.csv', transforms = get_transform(train=True)) dataset_val = OpenDataset(val_root,'/content/drive/My Drive/test/val.csv', transforms = get_transform(train=False)) # Randomly reorder images in a dataset torch.manual_seed(1) indices_train = torch.randperm(len(dataset_train)).tolist() indices_val = torch.randperm(len(dataset_val)).tolist() dataset_train = torch.utils.data.Subset(dataset_train, indices_train) dataset_val = torch.utils.data.Subset(dataset_val, indices_val) # Define Dataloader data_loader = torch.utils.data.DataLoader( dataset_train, batch_size=4, shuffle=True, num_workers=4, collate_fn=utils.collate_fn) data_loader_val = torch.utils.data.DataLoader( dataset_val, batch_size=1, shuffle=False, num_workers=4, collate_fn=utils.collate_fn) print("We have: {} examples, {} are training and {} testing".format(len(dataset_train)+len(dataset_val), len(dataset_train), len(dataset_val))) |
2. Train and evaluate the model
2-1) Get pre-trained model(= ResNet-50)
def get_instance_segmentation_model(num_classes): # Load a model pre-trained pre-trained on COCO model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) # Replace the classifier with a new one, that has # num_classes which is user-defined # Get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # Replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) |
2-2) Set up the model
# Proceed with GPU for learning but if GPU is not available, use CPU device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') num_classes = 4 # 3 class (number of classname) + 1 class (background)model = get_instance_segmentation_model(num_classes) # Move model to GPU or CPU model.to(device) # Construct an optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.05, momentum=0.9, weight_decay=0.0005) # Construct a learning rate scheduler # Learning rate scheduler decreases by 10x every 5 epochs lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) |
2-3) Train and evaluate the model
num_epochs = 40 for epoch in range(num_epochs): # Train for 1 epoch and print every 10 iterations train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10) # Update learning rate lr_scheduler.step() # Evaluate on the validation data evaluate(model, data_loader_val, device=device) |
3. Test the model
dataset_test = OpenDataset(test_root,'/content/drive/My Drive/test/test.csv', transforms = get_transform(train=False)) for i in range(len(dataset_test)) : img, _ = dataset_test[i] label_boxes = np.array(dataset_test[i][1]["boxes"]) # Put the model in evaluation mode model.eval() with torch.no_grad(): prediction = model([img.to(device)]) result = drawPrediction(img, label_boxes, prediction) result |