diff --git a/README.md b/README.md index 80db91a..e58bfeb 100644 --- a/README.md +++ b/README.md @@ -2,53 +2,50 @@ ![](https://habrastorage.org/webt/gy/-1/xd/gy-1xdtfz3_i7xxt-nqzl4mfhuw.jpeg) -A Python wrapper for the YoloV5Face model, providing easy-to-use functionalities for face detection in images. +A user-friendly Python wrapper for the YoloV5Face model, designed to simplify face detection in images. This wrapper offers straightforward functionalities for quick integration into Python projects, along with customization options for handling various face detection scenarios. ## Installation -Install the YoloV5Face wrapper using pip: +Install the YoloV5Face wrapper using pip to easily incorporate it into your projects: ```bash pip install -U yolo5face ``` -## Inference +## Face Detection: Standard and Enhanced -Use the wrapper to quickly deploy face detection in your projects: +The YoloV5Face wrapper supports both standard and enhanced face detection. The standard detection is suitable for most use cases, while the enhanced detection, which aggregates results over multiple target sizes, is ideal for images with faces of varying sizes. + +### Getting Started + +To detect faces in an image: ```bash from yolo5face.get_model import get_model import cv2 -model = get_model("yolov5n", device=-1, target_size=512, min_face=24) +# Initialize the model +model = get_model("yolov5n", device=-1, min_face=24) +# Load your image image = cv2.imread() image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) -boxes, key_points, scores = model(image) -``` - -* **device**: Specify device `cpu`, `cuda`, `mps` or integer for the number of cuda device. -* **target_size**: The minimum size of the target image for detection. -* **min_face**: The minimum face size in pixels. Faces smaller than this value will be ignored. - -## Enhanced Detection with Aggregated Target Sizes - -In addition to standard detection, this wrapper supports enhanced detection capabilities by aggregating results over multiple target sizes. This feature is especially useful in scenarios where face sizes vary significantly within the same image. +# Standard Detection +boxes, key_points, scores = model(image, target_size=512) -To use this feature: - -```bash -from yolo5face.get_model import get_model +# Enhanced Detection (aggregating over multiple target sizes) +enhanced_boxes, enhanced_key_points, enhanced_scores = model(image, target_size=[320, 640, 1280]) +``` -model = get_model("yolov5n", device=-1, target_size=[320, 640, 1280], min_face=24) +Parameters: -# Aggregate detections over the specified target sizes -boxes, key_points, scores = model(image) -``` +* **device**: Set the processing device (cpu, cuda, mps, or CUDA device number). +* **target_size**: For standard detection, it's the minimum size of the target image. For enhanced detection, provide a list of sizes for better accuracy. +* **min_face**: Minimum size of faces to detect in pixels. Smaller faces will be ignored. -This approach leverages multiple detections at different scales, followed by Non-Maximum Suppression, to provide a more comprehensive set of detections. +This approach, especially the enhanced detection, uses multiple scales for improved accuracy and is followed by Non-Maximum Suppression to refine the results. ## License -This YoloV5Face wrapper is released under the [MIT License](LICENSE) +This YoloV5Face wrapper is released under the [MIT License](LICENSE). diff --git a/tests/aggregate_test.py b/tests/aggregate_test.py index 4483708..d00626b 100644 --- a/tests/aggregate_test.py +++ b/tests/aggregate_test.py @@ -4,7 +4,7 @@ from tests.conftest import test_images as images from yolo5face.get_model import get_model -model = get_model("yolov5n", device="cpu", target_size=[512, 1024]) +model = get_model("yolov5n", device="cpu") @mark.parametrize( @@ -15,7 +15,7 @@ ], ) def test_face_detection(image, face): - boxes, points = model(image)[:2] + boxes, points = model(image, target_size=[512, 1024])[:2] for box_id, box in enumerate(boxes): assert len(DeepDiff(box, face[box_id]["box"])) == 0 diff --git a/tests/model_test.py b/tests/model_test.py index 4e99c4b..5fdfba3 100644 --- a/tests/model_test.py +++ b/tests/model_test.py @@ -4,7 +4,7 @@ from tests.conftest import test_images as images from yolo5face.get_model import get_model -model = get_model("yolov5n", device="cpu", target_size=512) +model = get_model("yolov5n", device="cpu") @mark.parametrize( @@ -15,7 +15,7 @@ ], ) def test_face_detection(image, face): - boxes, points = model(image)[:2] + boxes, points = model(image, target_size=512)[:2] for box_id, box in enumerate(boxes): assert len(DeepDiff(box, face[box_id]["box"])) == 0 diff --git a/yolo5face/__init__.py b/yolo5face/__init__.py index a73339b..00ec2dc 100644 --- a/yolo5face/__init__.py +++ b/yolo5face/__init__.py @@ -1 +1 @@ -__version__ = "0.0.8" +__version__ = "0.0.9" diff --git a/yolo5face/get_model.py b/yolo5face/get_model.py index fdc7813..1cacd7b 100644 --- a/yolo5face/get_model.py +++ b/yolo5face/get_model.py @@ -28,7 +28,6 @@ def get_file_name(url: str) -> str: def get_model( model_name: str, device: str, - target_size: int, min_face: int = 24, weights_path: str = "~/.torch/models", ) -> YoloDetectorAggregator: @@ -57,7 +56,6 @@ def get_model( device = torch.device("cpu") return YoloDetectorAggregator( - target_sizes=target_size, min_face=min_face, device=device, weights_name=weight_file_path, diff --git a/yolo5face/yoloface/YoloDetectorAggregator.py b/yolo5face/yoloface/YoloDetectorAggregator.py index 46fa607..0d0824c 100644 --- a/yolo5face/yoloface/YoloDetectorAggregator.py +++ b/yolo5face/yoloface/YoloDetectorAggregator.py @@ -8,27 +8,32 @@ class YoloDetectorAggregator: - def __init__(self, target_sizes: int | list[int], **yolo_args: Any) -> None: + def __init__(self, **yolo_args: Any) -> None: self.yolo_args = yolo_args - self.target_sizes = target_sizes if isinstance(target_sizes, list) else [target_sizes] + self.detector = YoloDetector(**self.yolo_args) def nms(self, boxes: torch.Tensor, scores: torch.Tensor, iou_threshold: float = 0.5) -> torch.Tensor: """Applies Non-Maximum Suppression (NMS) to filter boxes.""" return torch.ops.torchvision.nms(boxes.type(torch.float), scores.type(torch.float), iou_threshold) - def __call__(self, image: np.ndarray) -> tuple[list[BoxType], list[KeypointType], list[float]]: + def __call__( + self, + image: np.ndarray, + target_size: int | list[int], + ) -> tuple[list[BoxType], list[KeypointType], list[float]]: all_boxes, all_keypoints, all_scores = [], [], [] - for size in self.target_sizes: - detector = YoloDetector(target_size=size, **self.yolo_args) + if isinstance(target_size, int): + target_size = [target_size] - boxes, keypoints, scores = detector(image) + for size in target_size: + boxes, keypoints, scores = self.detector(image, size) all_boxes.extend(boxes) all_keypoints.extend(keypoints) all_scores.extend(scores) - if len(self.target_sizes) > 1: + if len(target_size) > 1: # Perform aggregation with NMS if multiple target sizes are used return self.aggregate_predictions(all_boxes, all_keypoints, all_scores) diff --git a/yolo5face/yoloface/face_detector.py b/yolo5face/yoloface/face_detector.py index 98c4532..1fcb614 100644 --- a/yolo5face/yoloface/face_detector.py +++ b/yolo5face/yoloface/face_detector.py @@ -24,20 +24,15 @@ def __init__( config_name: str, device: torch.device, min_face: int, - target_size: int | None = None, ): """ weights_name: name of file with network weights in weights/ folder. config_name: name of .yaml config with network configuration from models/ folder. gpu : gpu number (int) or -1 or string for cpu. min_face : minimal face size in pixels. - target_size : target size of smaller image axis (choose lower for faster work). e.g. 480, 720, 1080. - None for original resolution. - """ self._class_path = Path(__file__).parent.absolute() self.device = device - self.target_size = target_size self.min_face = min_face self.detector = self.init_detector(weights_name, config_name) @@ -48,17 +43,16 @@ def init_detector(self, weights_name: str, config_name: str) -> nn.Module: detector.load_state_dict(state_dict) return detector.to(self.device).float().eval() - def _preprocess(self, imgs: list[np.ndarray]) -> torch.Tensor: + def _preprocess(self, imgs: list[np.ndarray], target_size: int) -> torch.Tensor: """ Preprocessing image before passing through the network. Resize and conversion to torch tensor. """ pp_imgs = [] for img in imgs: h0, w0 = img.shape[:2] # orig hw - if self.target_size: - r = self.target_size / min(h0, w0) # resize image to img_size - if r < 1: - img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=cv2.INTER_AREA) + r = target_size / min(h0, w0) # resize image to img_size + if r < 1: + img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=cv2.INTER_AREA) imgsz = check_img_size(max(img.shape[:2]), s=int(self.detector.stride.max())) # check img_size img = letterbox(img, new_shape=imgsz)[0] @@ -120,6 +114,7 @@ def _postprocess( def predict( self, imgs: np.ndarray | list[np.ndarray], + target_size: int, conf_thres: float = 0.7, iou_thres: float = 0.5, ) -> tuple[list[BoxType], list[KeypointType], list[float]]: @@ -138,7 +133,7 @@ def predict( origimgs = copy.deepcopy(images) - images = self._preprocess(images) + images = self._preprocess(images, target_size) with torch.inference_mode(): # change this with torch.no_grad() for pytorch <1.8 compatibility pred = self.detector(images)[0] @@ -148,7 +143,8 @@ def predict( def __call__( self, imgs: np.ndarray | list[np.ndarray], + target_size: int, conf_thres: float = 0.7, iou_thres: float = 0.5, ) -> tuple[list[BoxType], list[KeypointType], list[float]]: - return self.predict(imgs, conf_thres, iou_thres) + return self.predict(imgs, target_size, conf_thres, iou_thres)