Skip to content

Commit

Permalink
Only one model when aggregating (#17)
Browse files Browse the repository at this point in the history
  • Loading branch information
ternaus committed Nov 25, 2023
1 parent 1bb9e95 commit 991a143
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 51 deletions.
47 changes: 22 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,53 +2,50 @@

![](https://habrastorage.org/webt/gy/-1/xd/gy-1xdtfz3_i7xxt-nqzl4mfhuw.jpeg)

A Python wrapper for the YoloV5Face model, providing easy-to-use functionalities for face detection in images.
A user-friendly Python wrapper for the YoloV5Face model, designed to simplify face detection in images. This wrapper offers straightforward functionalities for quick integration into Python projects, along with customization options for handling various face detection scenarios.

## Installation

Install the YoloV5Face wrapper using pip:
Install the YoloV5Face wrapper using pip to easily incorporate it into your projects:

```bash
pip install -U yolo5face
```

## Inference
## Face Detection: Standard and Enhanced

Use the wrapper to quickly deploy face detection in your projects:
The YoloV5Face wrapper supports both standard and enhanced face detection. The standard detection is suitable for most use cases, while the enhanced detection, which aggregates results over multiple target sizes, is ideal for images with faces of varying sizes.

### Getting Started

To detect faces in an image:

```bash
from yolo5face.get_model import get_model
import cv2

model = get_model("yolov5n", device=-1, target_size=512, min_face=24)
# Initialize the model
model = get_model("yolov5n", device=-1, min_face=24)

# Load your image
image = cv2.imread(<IMAGE_PATH>)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

boxes, key_points, scores = model(image)
```

* **device**: Specify device `cpu`, `cuda`, `mps` or integer for the number of cuda device.
* **target_size**: The minimum size of the target image for detection.
* **min_face**: The minimum face size in pixels. Faces smaller than this value will be ignored.

## Enhanced Detection with Aggregated Target Sizes

In addition to standard detection, this wrapper supports enhanced detection capabilities by aggregating results over multiple target sizes. This feature is especially useful in scenarios where face sizes vary significantly within the same image.
# Standard Detection
boxes, key_points, scores = model(image, target_size=512)

To use this feature:

```bash
from yolo5face.get_model import get_model
# Enhanced Detection (aggregating over multiple target sizes)
enhanced_boxes, enhanced_key_points, enhanced_scores = model(image, target_size=[320, 640, 1280])
```

model = get_model("yolov5n", device=-1, target_size=[320, 640, 1280], min_face=24)
Parameters:

# Aggregate detections over the specified target sizes
boxes, key_points, scores = model(image)
```
* **device**: Set the processing device (cpu, cuda, mps, or CUDA device number).
* **target_size**: For standard detection, it's the minimum size of the target image. For enhanced detection, provide a list of sizes for better accuracy.
* **min_face**: Minimum size of faces to detect in pixels. Smaller faces will be ignored.

This approach leverages multiple detections at different scales, followed by Non-Maximum Suppression, to provide a more comprehensive set of detections.
This approach, especially the enhanced detection, uses multiple scales for improved accuracy and is followed by Non-Maximum Suppression to refine the results.

## License

This YoloV5Face wrapper is released under the [MIT License](LICENSE)
This YoloV5Face wrapper is released under the [MIT License](LICENSE).
4 changes: 2 additions & 2 deletions tests/aggregate_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from tests.conftest import test_images as images
from yolo5face.get_model import get_model

model = get_model("yolov5n", device="cpu", target_size=[512, 1024])
model = get_model("yolov5n", device="cpu")


@mark.parametrize(
Expand All @@ -15,7 +15,7 @@
],
)
def test_face_detection(image, face):
boxes, points = model(image)[:2]
boxes, points = model(image, target_size=[512, 1024])[:2]

for box_id, box in enumerate(boxes):
assert len(DeepDiff(box, face[box_id]["box"])) == 0
Expand Down
4 changes: 2 additions & 2 deletions tests/model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from tests.conftest import test_images as images
from yolo5face.get_model import get_model

model = get_model("yolov5n", device="cpu", target_size=512)
model = get_model("yolov5n", device="cpu")


@mark.parametrize(
Expand All @@ -15,7 +15,7 @@
],
)
def test_face_detection(image, face):
boxes, points = model(image)[:2]
boxes, points = model(image, target_size=512)[:2]

for box_id, box in enumerate(boxes):
assert len(DeepDiff(box, face[box_id]["box"])) == 0
Expand Down
2 changes: 1 addition & 1 deletion yolo5face/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.8"
__version__ = "0.0.9"
2 changes: 0 additions & 2 deletions yolo5face/get_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def get_file_name(url: str) -> str:
def get_model(
model_name: str,
device: str,
target_size: int,
min_face: int = 24,
weights_path: str = "~/.torch/models",
) -> YoloDetectorAggregator:
Expand Down Expand Up @@ -57,7 +56,6 @@ def get_model(
device = torch.device("cpu")

return YoloDetectorAggregator(
target_sizes=target_size,
min_face=min_face,
device=device,
weights_name=weight_file_path,
Expand Down
19 changes: 12 additions & 7 deletions yolo5face/yoloface/YoloDetectorAggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,32 @@


class YoloDetectorAggregator:
def __init__(self, target_sizes: int | list[int], **yolo_args: Any) -> None:
def __init__(self, **yolo_args: Any) -> None:
self.yolo_args = yolo_args
self.target_sizes = target_sizes if isinstance(target_sizes, list) else [target_sizes]
self.detector = YoloDetector(**self.yolo_args)

def nms(self, boxes: torch.Tensor, scores: torch.Tensor, iou_threshold: float = 0.5) -> torch.Tensor:
"""Applies Non-Maximum Suppression (NMS) to filter boxes."""
return torch.ops.torchvision.nms(boxes.type(torch.float), scores.type(torch.float), iou_threshold)

def __call__(self, image: np.ndarray) -> tuple[list[BoxType], list[KeypointType], list[float]]:
def __call__(
self,
image: np.ndarray,
target_size: int | list[int],
) -> tuple[list[BoxType], list[KeypointType], list[float]]:
all_boxes, all_keypoints, all_scores = [], [], []

for size in self.target_sizes:
detector = YoloDetector(target_size=size, **self.yolo_args)
if isinstance(target_size, int):
target_size = [target_size]

boxes, keypoints, scores = detector(image)
for size in target_size:
boxes, keypoints, scores = self.detector(image, size)

all_boxes.extend(boxes)
all_keypoints.extend(keypoints)
all_scores.extend(scores)

if len(self.target_sizes) > 1:
if len(target_size) > 1:
# Perform aggregation with NMS if multiple target sizes are used
return self.aggregate_predictions(all_boxes, all_keypoints, all_scores)

Expand Down
20 changes: 8 additions & 12 deletions yolo5face/yoloface/face_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,15 @@ def __init__(
config_name: str,
device: torch.device,
min_face: int,
target_size: int | None = None,
):
"""
weights_name: name of file with network weights in weights/ folder.
config_name: name of .yaml config with network configuration from models/ folder.
gpu : gpu number (int) or -1 or string for cpu.
min_face : minimal face size in pixels.
target_size : target size of smaller image axis (choose lower for faster work). e.g. 480, 720, 1080.
None for original resolution.
"""
self._class_path = Path(__file__).parent.absolute()
self.device = device
self.target_size = target_size
self.min_face = min_face

self.detector = self.init_detector(weights_name, config_name)
Expand All @@ -48,17 +43,16 @@ def init_detector(self, weights_name: str, config_name: str) -> nn.Module:
detector.load_state_dict(state_dict)
return detector.to(self.device).float().eval()

def _preprocess(self, imgs: list[np.ndarray]) -> torch.Tensor:
def _preprocess(self, imgs: list[np.ndarray], target_size: int) -> torch.Tensor:
"""
Preprocessing image before passing through the network. Resize and conversion to torch tensor.
"""
pp_imgs = []
for img in imgs:
h0, w0 = img.shape[:2] # orig hw
if self.target_size:
r = self.target_size / min(h0, w0) # resize image to img_size
if r < 1:
img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=cv2.INTER_AREA)
r = target_size / min(h0, w0) # resize image to img_size
if r < 1:
img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=cv2.INTER_AREA)

imgsz = check_img_size(max(img.shape[:2]), s=int(self.detector.stride.max())) # check img_size
img = letterbox(img, new_shape=imgsz)[0]
Expand Down Expand Up @@ -120,6 +114,7 @@ def _postprocess(
def predict(
self,
imgs: np.ndarray | list[np.ndarray],
target_size: int,
conf_thres: float = 0.7,
iou_thres: float = 0.5,
) -> tuple[list[BoxType], list[KeypointType], list[float]]:
Expand All @@ -138,7 +133,7 @@ def predict(

origimgs = copy.deepcopy(images)

images = self._preprocess(images)
images = self._preprocess(images, target_size)

with torch.inference_mode(): # change this with torch.no_grad() for pytorch <1.8 compatibility
pred = self.detector(images)[0]
Expand All @@ -148,7 +143,8 @@ def predict(
def __call__(
self,
imgs: np.ndarray | list[np.ndarray],
target_size: int,
conf_thres: float = 0.7,
iou_thres: float = 0.5,
) -> tuple[list[BoxType], list[KeypointType], list[float]]:
return self.predict(imgs, conf_thres, iou_thres)
return self.predict(imgs, target_size, conf_thres, iou_thres)

0 comments on commit 991a143

Please sign in to comment.