From 7af1b4c266fef1a0554c2077509b3be16d972e1b Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 10 Sep 2021 14:34:09 +0200 Subject: [PATCH] Improved `detect.py` timing (#4741) * Improved detect.py timing * Eliminate 1 time_sync() call * Inference-only time * dash * #Save section * Cleanup --- detect.py | 24 ++++++++++++++---------- val.py | 16 ++++++++-------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/detect.py b/detect.py index 8acd5bf71593..5cb131220e89 100644 --- a/detect.py +++ b/detect.py @@ -8,7 +8,6 @@ import argparse import sys -import time from pathlib import Path import cv2 @@ -123,8 +122,9 @@ def wrap_frozen_graph(gd, inputs, outputs): # Run inference if pt and device.type != 'cpu': model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.parameters()))) # run once - t0 = time.time() + dt, seen = [0.0, 0.0, 0.0], 0 for path, img, im0s, vid_cap in dataset: + t1 = time_sync() if onnx: img = img.astype('float32') else: @@ -133,9 +133,10 @@ def wrap_frozen_graph(gd, inputs, outputs): img = img / 255.0 # 0 - 255 to 0.0 - 1.0 if len(img.shape) == 3: img = img[None] # expand for batch dim + t2 = time_sync() + dt[0] += t2 - t1 # Inference - t1 = time_sync() if pt: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred = model(img, augment=augment, visualize=visualize)[0] @@ -162,17 +163,20 @@ def wrap_frozen_graph(gd, inputs, outputs): pred[..., 2] *= imgsz[1] # w pred[..., 3] *= imgsz[0] # h pred = torch.tensor(pred) + t3 = time_sync() + dt[1] += t3 - t2 # NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) - t2 = time_sync() + dt[2] += time_sync() - t3 # Second-stage classifier (optional) if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process predictions - for i, det in enumerate(pred): # detections per image + for i, det in enumerate(pred): # per image + seen += 1 if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count else: @@ -209,8 +213,8 @@ def wrap_frozen_graph(gd, inputs, outputs): if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) - # Print time (inference + NMS) - print(f'{s}Done. ({t2 - t1:.3f}s)') + # Print time (inference-only) + print(f'{s}Done. ({t3 - t2:.3f}s)') # Stream results im0 = annotator.result() @@ -237,15 +241,15 @@ def wrap_frozen_graph(gd, inputs, outputs): vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) + # Print results + t = tuple(x / seen * 1E3 for x in dt) # speeds per image + print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {colorstr('bold', save_dir)}{s}") - if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning) - print(f'Done. ({time.time() - t0:.3f}s)') - def parse_opt(): parser = argparse.ArgumentParser() diff --git a/val.py b/val.py index b7068e041e57..c8f503351ad9 100644 --- a/val.py +++ b/val.py @@ -154,22 +154,22 @@ def run(data, names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)} class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', 'mAP@.5', 'mAP@.5:.95') - p, r, f1, mp, mr, map50, map, t0, t1, t2 = 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. + dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0, 0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): - t_ = time_sync() + t1 = time_sync() img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width - t = time_sync() - t0 += t - t_ + t2 = time_sync() + dt[0] += t2 - t1 # Run model out, train_out = model(img, augment=augment) # inference and training outputs - t1 += time_sync() - t + dt[1] += time_sync() - t2 # Compute loss if compute_loss: @@ -178,9 +178,9 @@ def run(data, # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling - t = time_sync() + t3 = time_sync() out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls) - t2 += time_sync() - t + dt[2] += time_sync() - t3 # Statistics per image for si, pred in enumerate(out): @@ -247,7 +247,7 @@ def run(data, print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds - t = tuple(x / seen * 1E3 for x in (t0, t1, t2)) # speeds per image + t = tuple(x / seen * 1E3 for x in dt) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t)