Skip to content

Commit

Permalink
[twitter] ignore 'Unavailable' media (#5736)
Browse files Browse the repository at this point in the history
… including geo-restricted content.

add 'unavailable' option to allow re-enabling them again
  • Loading branch information
mikf committed Jun 20, 2024
1 parent 8452d04 commit f58b0e6
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 0 deletions.
11 changes: 11 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3896,6 +3896,17 @@ Description
Note: This requires at least 1 additional API call per initial Tweet.


extractor.twitter.unavailable
-----------------------------
Type
``bool``
Default
``false``
Description
Try to download media marked as ``Unavailable``,
e.g. ``Geoblocked`` videos.


extractor.twitter.include
-------------------------
Type
Expand Down
10 changes: 10 additions & 0 deletions gallery_dl/extractor/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def __init__(self, match):
self.user = match.group(1)

def _init(self):
self.unavailable = self.config("unavailable", False)
self.textonly = self.config("text-tweets", False)
self.retweets = self.config("retweets", False)
self.replies = self.config("replies", True)
Expand Down Expand Up @@ -143,6 +144,15 @@ def items(self):

def _extract_media(self, tweet, entities, files):
for media in entities:

if "ext_media_availability" in media:
ext = media["ext_media_availability"]
if ext.get("status") == "Unavailable":
self.log.warning("Media unavailable (%s - '%s')",
tweet["id_str"], ext.get("reason"))
if not self.unavailable:
continue

descr = media.get("ext_alt_text")
width = media["original_info"].get("width", 0)
height = media["original_info"].get("height", 0)
Expand Down
8 changes: 8 additions & 0 deletions test/results/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,14 @@
"content" : "Biden wants to replace lead pipes. He failed to mention that the unfunded mandate sets an almost impossible timeline, will cost billions, infringe on the rights of the States and their residents – all for benefits that may be entirely speculative. #sotu https://ag.ks.gov/media-center/news-releases/2024/02/09/kobach-leads-coalition-demanding-biden-drop-unnecessary-epa-rule",
},

{
"#url" : "https://x.com/jsports_motor/status/1801338077618524583",
"#comment" : "geo-restricted video (#5736)",
"#category": ("", "twitter", "tweet"),
"#class" : twitter.TwitterTweetExtractor,
"#count" : 0,
},

{
"#url" : "https://twitter.com/playpokemon/status/1263832915173048321/quotes",
"#category": ("", "twitter", "quotes"),
Expand Down

0 comments on commit f58b0e6

Please sign in to comment.