From e2829f4fca7683096524b03faaa7d84d1dea3696 Mon Sep 17 00:00:00 2001 From: id3s3c <51968606+id3s3c@users.noreply.github.com> Date: Thu, 27 May 2021 19:18:25 -0300 Subject: [PATCH] lower case names before remove accents (#31) Names that begin with accent characters were being removed in the lists --- linkedin2username.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/linkedin2username.py b/linkedin2username.py index c6bdf66..af4cfda 100755 --- a/linkedin2username.py +++ b/linkedin2username.py @@ -596,6 +596,9 @@ def clean(raw_list): allowed_chars = re.compile('[^a-zA-Z -]') for name in raw_list: + # Lower-case everything to make it easier to de-duplicate. + name = name.lower() + # Try to transform non-English characters below. name = remove_accents(name) @@ -604,9 +607,6 @@ def clean(raw_list): # People like to feel special, I guess. name = allowed_chars.sub('', name) - # Lower-case everything to make it easier to de-duplicate. - name = name.lower() - # The line below tries to consolidate white space between words # and get rid of leading/trailing spaces. name = re.sub(r'\s+', ' ', name).strip()