Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing bug #537, image filenames and web links are not included in wo… #696

Merged
merged 45 commits into from
May 5, 2024
Merged
Changes from 1 commit
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
d18c8f4
Fixing bug #537, image filenames and web links are not included in wo…
laraconda May 14, 2023
3cdf4cf
Removing tags from word cloud.
laraconda May 30, 2023
bcde7cd
Fix issue #556 with hashtag symbols in URLs (#703)
laraconda Jun 10, 2023
33be8ad
Add menu item for inserting numbered lists (#706)
curioussushiroll Jun 15, 2023
dad16a4
Use webkit API 4.1 where available or 4.0 if not.
kathenas Jul 16, 2023
4e28373
Direct PDF export has long been unsupported.
jendrikseipp Jul 26, 2023
c7a19ee
Change to use new GitHub API for release tarball download.
kathenas Aug 3, 2023
002b6ca
Add release date.
jendrikseipp Aug 10, 2023
5b3a6f9
Update version.
jendrikseipp Aug 10, 2023
9bf1dee
add version 2.30 to debian/changelog
jendrikseipp Aug 10, 2023
ea0c240
Update translations.
jendrikseipp Aug 10, 2023
9f3f266
Ignore if weblate reset times out.
jendrikseipp Aug 10, 2023
5bef688
Remove generated files in clean target (Closes: #1046202).
kathenas Aug 14, 2023
43bdde3
Update roadmap.
jendrikseipp Aug 19, 2023
f6c40c9
Add a script to merge rednotebook files into a rednotebook (#687)
sjg20 Aug 20, 2023
47274fc
Fix style.
jendrikseipp Aug 20, 2023
f958b6f
Add acknowledgment.
jendrikseipp Aug 20, 2023
7047f15
rednotebook_merge: Avoid duplicate data when merging
sjg20 Aug 28, 2023
5e7887f
Add basic text replace functionality (#715)
curioussushiroll Sep 2, 2023
af765d4
Don't switch the current day while clearing the search results.
jendrikseipp Sep 2, 2023
95a3a0a
Update translation template.
jendrikseipp Sep 2, 2023
93800ae
Prepare for release.
jendrikseipp Sep 2, 2023
64d1917
Update version.
jendrikseipp Sep 2, 2023
1261ef1
add version 2.31 to debian/changelog
jendrikseipp Sep 2, 2023
7b0ed8c
Clean up watch file. (#729)
kathenas Nov 3, 2023
e4f9246
Test macOS 12 and 13 instead of 11 and 12. (#730)
jendrikseipp Nov 5, 2023
acc85e2
Bump pyinstaller from 5.0 to 5.13.1 in /win (#737)
dependabot[bot] Dec 11, 2023
4ecb842
Don't update homebrew dependents. (#739)
jendrikseipp Dec 18, 2023
90f1f86
Make Debian dependencies more explicit. (#741)
kathenas Jan 12, 2024
a6801ad
Add kudos and URLs to appdata file.
jendrikseipp Jan 17, 2024
222d2da
Revert project license to GPL2+.
jendrikseipp Feb 2, 2024
8460110
Allow hashtags that start with (but are longer than) hex color codes …
jendrikseipp Feb 17, 2024
3dc4f0a
Highlight hashtags and formatting within lists (fixes #744).
jendrikseipp Feb 17, 2024
2e6ea3c
Allow copying text in preview mode (fixes #732).
jendrikseipp Feb 17, 2024
2fe2021
Add release date.
jendrikseipp Feb 17, 2024
66def90
Fix regex.
jendrikseipp Feb 17, 2024
74831a6
Update version.
jendrikseipp Feb 17, 2024
d593101
add version 2.32 to debian/changelog
jendrikseipp Feb 17, 2024
e7eb1b0
Update it.po (#747)
albanobattistella Feb 18, 2024
6e87567
Upgrade to actions/checkout@v4.
jendrikseipp Mar 13, 2024
9d97c88
Fix CI on macOS 13. (#756)
jendrikseipp Mar 13, 2024
4c773a5
Update Occitan translation (#758)
Mejans Mar 29, 2024
0e486fe
Let tox invoke pre-commit hooks (#705)
laraconda May 5, 2024
4eb74ef
Polish.
jendrikseipp May 5, 2024
16a42a6
Merge branch 'master' into laraconda-filenames-cloud
jendrikseipp May 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 25 additions & 21 deletions rednotebook/gui/clouds.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@

def get_regex(word):
try:
return re.compile(f"{word}$", re.I)
return re.compile(f"{word}$", re.IGNORECASE)
except Exception:
logging.warning(f'"{word}" is not a valid regular expression')
return re.compile("^$")
Expand Down Expand Up @@ -81,13 +81,20 @@ def update_lists(self):
self.include_list = [word.lower() for word in self.include_list]
logging.info("Cloud include list: %s" % self.include_list)

# ignoring files and web links in words cloud
jendrikseipp marked this conversation as resolved.
Show resolved Hide resolved
self.special_ignore_words_tuple = ("file://.*", "https?://.*")
logging.info(f"Cloud special ignore regexes: {self.special_ignore_words_tuple}")

self.update_regexes()

def update_regexes(self):
logging.debug("Start compiling regexes")
logging.debug("Start compiling regexes: ignore, include and special")
self.regexes_ignore = [get_regex(word) for word in self.ignore_list]
self.regexes_include = [get_regex(word) for word in self.include_list]
logging.debug("Finished")
self.regexes_special_ignore_words = [
re.compile(regex) for regex in self.special_ignore_words_tuple
]
logging.debug("Finished compiling")

def update(self, force_update=False):
"""Public method that calls the private "_update"."""
Expand Down Expand Up @@ -119,7 +126,9 @@ def _update(self):

word_count_dict = self.journal.get_word_count_dict()
self.words = self._get_words_for_cloud(
word_count_dict, self.regexes_ignore, self.regexes_include
word_count_dict,
self.regexes_ignore + self.regexes_special_ignore_words,
self.regexes_include,
)

self.link_dict = self.tags + self.words
Expand Down Expand Up @@ -153,22 +162,17 @@ def _get_cloud_body(self, cloud_words):
return "\n".join(html_elements)

@staticmethod
def select_most_frequent_words(words_and_frequencies, count):
if count == 0:
return []

def get_collated_word(word_and_freq):
word, freq = word_and_freq
return locale.strxfrm(word)

def get_frequency(word_and_freq):
word, freq = word_and_freq
return freq

words_and_frequencies.sort(key=get_frequency, reverse=True)
words_and_frequencies = words_and_frequencies[:count]
words_and_frequencies.sort(key=get_collated_word)
return words_and_frequencies
def select_most_frequent_words(words_and_frequencies, nwords):
"""
Returns the 'nwords' most frequent words in 'words_and_frequences'
jendrikseipp marked this conversation as resolved.
Show resolved Hide resolved
sorted by the locale
jendrikseipp marked this conversation as resolved.
Show resolved Hide resolved
"""
most_frequent_words = []
if nwords > 0:
words_and_frequencies.sort(key=lambda word_freq: word_freq[1], reverse=True)
most_frequent_words = words_and_frequencies[:nwords]
most_frequent_words.sort(key=lambda word_freq: locale.strxfrm(word_freq[0]))
return most_frequent_words

def _get_tags_for_cloud(self, tag_count_dict, ignores):
tags_and_frequencies = [
Expand All @@ -186,7 +190,7 @@ def _get_words_for_cloud(self, word_count_dict, ignores, includes):
for (word, freq) in word_count_dict.items()
if (len(word) > 4 or any(pattern.match(word) for pattern in includes))
and not
# filter words in ignore_list
# filter words and patterns in ignore_list
any(pattern.match(word) for pattern in ignores)
]
return self.select_most_frequent_words(words_and_frequencies, CLOUD_WORDS)
Expand Down