diff --git a/maigret/checking.py b/maigret/checking.py index 2bcf7d6..2676c17 100644 --- a/maigret/checking.py +++ b/maigret/checking.py @@ -48,6 +48,15 @@ SUPPORTED_IDS = ( BAD_CHARS = "#" +def is_cloudflare_bypass_active(value) -> bool: + """True if Cloudflare webgate URL rewrite should run (``--cloudflare-bypass`` or settings).""" + if value is True: + return True + if isinstance(value, dict): + return bool(value.get("enabled", False)) + return False + + class CheckerBase: pass @@ -433,7 +442,7 @@ def make_site_result( url = re.sub("(? QueryResultWrapper: @@ -691,12 +701,14 @@ async def maigret( options["timeout"] = timeout options["id_type"] = id_type options["forced"] = forced + options["cloudflare_bypass"] = is_cloudflare_bypass_active(cloudflare_bypass) # results from analysis of all sites all_results: Dict[str, QueryResultWrapper] = {} sites = list(site_dict.keys()) + executor_limit = timeout + 0.5 attempts = retries + 1 while attempts: tasks_dict = {} @@ -711,7 +723,11 @@ async def maigret( sitename, '', MaigretCheckStatus.UNKNOWN, - error=CheckError('Request failed'), + error=CheckError( + 'Request timeout', + f'No response within {executor_limit:.1f}s per site ' + f'(increase --timeout or use --no-progressbar)', + ), ), } tasks_dict[sitename] = ( @@ -792,6 +808,7 @@ async def site_self_check( i2p_proxy=None, skip_errors=False, cookies=None, + cloudflare_bypass=False, ): changes = { "disabled": False, @@ -819,6 +836,7 @@ async def site_self_check( tor_proxy=tor_proxy, i2p_proxy=i2p_proxy, cookies=cookies, + cloudflare_bypass=cloudflare_bypass, ) # don't disable entries with other ids types @@ -890,6 +908,7 @@ async def self_check( proxy=None, tor_proxy=None, i2p_proxy=None, + cloudflare_bypass=False, ) -> bool: sem = asyncio.Semaphore(max_connections) tasks = [] @@ -905,7 +924,17 @@ async def self_check( for _, site in all_sites.items(): check_coro = site_self_check( - site, logger, sem, db, silent, proxy, tor_proxy, i2p_proxy, skip_errors=True + site, + logger, + sem, + db, + silent, + proxy, + tor_proxy, + i2p_proxy, + skip_errors=True, + cookies=None, + cloudflare_bypass=cloudflare_bypass, ) future = asyncio.ensure_future(check_coro) tasks.append(future) diff --git a/maigret/errors.py b/maigret/errors.py index 573511e..2e64422 100644 --- a/maigret/errors.py +++ b/maigret/errors.py @@ -136,7 +136,10 @@ def extract_and_group(search_res: QueryResultWrapper) -> List[Dict[str, Any]]: def notify_about_errors( - search_results: QueryResultWrapper, query_notify, show_statistics=False + search_results: QueryResultWrapper, + query_notify, + show_statistics=False, + print_check_errors=False, ) -> List[Tuple]: """ Prepare error notifications in search results, text + symbol, @@ -169,7 +172,7 @@ def notify_about_errors( text = f'{e["err"]}: {round(e["perc"],2)}%' results.append((text, '!')) - if was_errs_displayed: + if was_errs_displayed and not print_check_errors: results.append( ('You can see detailed site check errors with a flag `--print-errors`', '-') ) diff --git a/maigret/executors.py b/maigret/executors.py index a14c4c2..55885f0 100644 --- a/maigret/executors.py +++ b/maigret/executors.py @@ -110,6 +110,28 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor): self.progress = None # TODO: tests + @staticmethod + def _emit_timeout_notify(args, default_fallback): + """Print per-site line when wait_for kills check_site_for_username (no query_notify.update ran).""" + if ( + not default_fallback + or not isinstance(default_fallback, (tuple, list)) + or len(default_fallback) < 2 + ): + return + _, results_dict = default_fallback[0], default_fallback[1] + if not isinstance(results_dict, dict): + return + status = results_dict.get('status') + if status is None: + return + if len(args) < 5: + return + query_notify = args[4] + site = results_dict.get('site') + similar = bool(getattr(site, 'similar_search', False)) if site is not None else False + query_notify.update(status, similar) + async def increment_progress(self, count): """Update progress by calling the provided progress function.""" if self.progress: @@ -144,6 +166,7 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor): result = await asyncio.wait_for(query_task, timeout=self.timeout) except asyncio.TimeoutError: result = kwargs.get('default') + self._emit_timeout_notify(args, result) self.results.append(result) @@ -207,6 +230,7 @@ class AsyncioQueueGeneratorExecutor: result = await asyncio.wait_for(query_task, timeout=self.timeout) except asyncio.TimeoutError: result = kwargs.get('default') + AsyncioProgressbarQueueExecutor._emit_timeout_notify(args, result) await self._results.put(result) except Exception as e: self.logger.error(f"Error in worker: {e}") diff --git a/maigret/maigret.py b/maigret/maigret.py index a591c65..00c69ff 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -587,6 +587,7 @@ async def main(): max_connections=args.connections, tor_proxy=args.tor_proxy, i2p_proxy=args.i2p_proxy, + cloudflare_bypass=args.cloudflare_bypass, ) if is_need_update: if input('Do you want to save changes permanently? [Yn]\n').lower() in ( @@ -687,10 +688,14 @@ async def main(): no_progressbar=args.no_progressbar, retries=args.retries, check_domains=args.with_domains, + cloudflare_bypass=args.cloudflare_bypass, ) errs = errors.notify_about_errors( - results, query_notify, show_statistics=args.verbose + results, + query_notify, + show_statistics=args.verbose, + print_check_errors=args.print_check_errors, ) for e in errs: query_notify.warning(*e) diff --git a/maigret/resources/data.json b/maigret/resources/data.json index 18aaa57..e275b13 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -3149,8 +3149,8 @@ }, "ChaturBate": { "tags": [ - "us", - "cloudflare" + "cloudflare", + "us" ], "checkType": "status_code", "alexaRank": 62, @@ -3329,9 +3329,9 @@ }, "CloudflareCommunity": { "tags": [ + "cloudflare", "forum", - "tech", - "cloudflare" + "tech" ], "engine": "Discourse", "alexaRank": 976, @@ -4204,9 +4204,9 @@ }, "Discogs": { "tags": [ + "cloudflare", "music", - "us", - "cloudflare" + "us" ], "checkType": "status_code", "alexaRank": 1040, @@ -4921,9 +4921,9 @@ }, "Etsy": { "tags": [ + "cloudflare", "shopping", - "us", - "cloudflare" + "us" ], "errors": { "Sanctions Policy": "Site censorship", @@ -5041,8 +5041,8 @@ "usernameUnclaimed": "noonewouldeverusethis7", "alexaRank": 240, "tags": [ - "design", - "cloudflare" + "cloudflare", + "design" ] }, "8tracks.com": { @@ -6275,9 +6275,9 @@ "Freepik": { "tags": [ "art", + "cloudflare", "photo", - "stock", - "cloudflare" + "stock" ], "checkType": "status_code", "alexaRank": 147, @@ -8673,9 +8673,9 @@ }, "Kickstarter": { "tags": [ + "cloudflare", "finance", - "us", - "cloudflare" + "us" ], "checkType": "status_code", "alexaRank": 609, @@ -12029,8 +12029,8 @@ }, "Patreon": { "tags": [ - "finance", - "cloudflare" + "cloudflare", + "finance" ], "checkType": "status_code", "alexaRank": 304, @@ -13522,9 +13522,9 @@ }, "Redbubble": { "tags": [ + "cloudflare", "shopping", - "us", - "cloudflare" + "us" ], "checkType": "status_code", "alexaRank": 925, @@ -15067,9 +15067,9 @@ }, "SourceForge": { "tags": [ + "cloudflare", "coding", - "us", - "cloudflare" + "us" ], "checkType": "message", "alexaRank": 444, @@ -16920,9 +16920,9 @@ }, "Twitch": { "tags": [ + "cloudflare", "streaming", - "us", - "cloudflare" + "us" ], "headers": { "cookies": "_gat=1; _ga_4ZM72D0Y59=GS1.2.1734305902.1.0.1734305902.0.0.0; _ga=GA1.2.1051951095.1734305902; _gid=GA1.2.30506583.1734305902; cf_clearance=xo5uTOkBRsAYb4so4QSu1h8tlFcFJyYSA2SBEHyYA2U-1734305900-1.2.1.1-l9mQ677uPsBenPceAasuW_ZVBqRgQqy4df.13gRl6y4aFBf._3bLo1c3.uVZOXwMxL_iVN.EvEHEBiNczBNMM6riJrVWgiLx1O1jGRbhIiGP.tsomZgyl_bNupNbWxZNzHy454hC0iUigDrE5jkJJoazDRJNc5532wj9nT.U9DDBxW3RplVCdj4x5sMt3K3IXADYvAGabBQnzvS3rEr_w66KClwAehy69tWHVSPDkc.ww7QnxdDItYqmtL8bz9IScdouTAvU_MWK6oxvxcLc6GQFCQZnoToeX8Fgeui2flhV.kXXjEQ4NjypxSFakcCPIysHZOUjKfv93.W9Vfl7id.Y8DUpsmxEPVOpfcrGY6YvFtk6yJhvUQryJftS5b7E5P5jVPW_pPlMWSTWL9IaysG7INm6ZjDyjsVG7OBJIUujSSMlyoKiR8sv0L2ueHt6", @@ -17490,9 +17490,6 @@ ], "method": "vimeo" }, - "headers": { - "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NDI3Mjg3NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiZWI1ZjY1OTAtZGNiMC00MDQ5LTkwODctZmU3ZGFjNjAyMmZmIn0.wrdGR5HPqB_o4f2EHL1h0vVldfmUOvpvPKm-y88X7KU" - }, "urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1", "checkType": "status_code", "alexaRank": 148, @@ -27258,8 +27255,8 @@ }, "upwork.com": { "tags": [ - "us", - "cloudflare" + "cloudflare", + "us" ], "engine": "engine404", "urlMain": "https://upwork.com", @@ -35784,6 +35781,7 @@ "tags": [ "gaming", "coding", + "cloudflare", "photo", "music", "blog", diff --git a/maigret/resources/settings.json b/maigret/resources/settings.json index 3819354..e963131 100644 --- a/maigret/resources/settings.json +++ b/maigret/resources/settings.json @@ -57,7 +57,6 @@ "web_interface_port": 5000, "cloudflare_bypass": { "enabled": true, - "module": "cloudscraper", "modules": [ { "name": "chrome_webgate", diff --git a/maigret/submit.py b/maigret/submit.py index fae40dd..64c8c11 100644 --- a/maigret/submit.py +++ b/maigret/submit.py @@ -110,6 +110,7 @@ class Submitter: cookies=self.args.cookie_file, # Don't skip errors in submit mode - we need check both false positives/true negatives skip_errors=False, + cloudflare_bypass=getattr(self.args, 'cloudflare_bypass', False), ) return changes diff --git a/sites.md b/sites.md index 6b3e42d..ca876fb 100644 --- a/sites.md +++ b/sites.md @@ -21,7 +21,7 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://vk.com/) [VK (by id) (https://vk.com/)](https://vk.com/)*: top 50, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://sbongacams.com) [BongaCams (https://sbongacams.com)](https://sbongacams.com)*: top 50, cz, webcam* 1. ![](https://www.google.com/s2/favicons?domain=https://www.instagram.com/) [Instagram (https://www.instagram.com/)](https://www.instagram.com/)*: top 50, photo*, search is disabled -1. ![](https://www.google.com/s2/favicons?domain=https://www.twitch.tv/) [Twitch (https://www.twitch.tv/)](https://www.twitch.tv/)*: top 50, streaming, us* +1. ![](https://www.google.com/s2/favicons?domain=https://www.twitch.tv/) [Twitch (https://www.twitch.tv/)](https://www.twitch.tv/)*: top 50, cloudflare, streaming, us* 1. ![](https://www.google.com/s2/favicons?domain=https://yandex.ru/collections/) [YandexCollections API (https://yandex.ru/collections/)](https://yandex.ru/collections/)*: top 50, ru, sharing*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://stackoverflow.com) [StackOverflow (https://stackoverflow.com)](https://stackoverflow.com)*: top 50, coding* 1. ![](https://www.google.com/s2/favicons?domain=https://www.ebay.com/) [Ebay (https://www.ebay.com/)](https://www.ebay.com/)*: top 50, shopping, us* @@ -62,7 +62,7 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://community.adobe.com) [community.adobe.com (https://community.adobe.com)](https://community.adobe.com)*: top 100, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.tradingview.com/) [TradingView (https://www.tradingview.com/)](https://www.tradingview.com/)*: top 100, trading, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.aparat.com) [Aparat (https://www.aparat.com)](https://www.aparat.com)*: top 100, ir, video* -1. ![](https://www.google.com/s2/favicons?domain=https://chaturbate.com) [ChaturBate (https://chaturbate.com)](https://chaturbate.com)*: top 100, us* +1. ![](https://www.google.com/s2/favicons?domain=https://chaturbate.com) [ChaturBate (https://chaturbate.com)](https://chaturbate.com)*: top 100, cloudflare, us* 1. ![](https://www.google.com/s2/favicons?domain=https://medium.com/) [Medium (https://medium.com/)](https://medium.com/)*: top 100, blog, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.livejasmin.com/) [Livejasmin (https://www.livejasmin.com/)](https://www.livejasmin.com/)*: top 100, us, webcam* 1. ![](https://www.google.com/s2/favicons?domain=https://pornhub.com/) [Pornhub (https://pornhub.com/)](https://pornhub.com/)*: top 100, porn* @@ -72,7 +72,7 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://bleach.fandom.com/ru) [BleachFandom (https://bleach.fandom.com/ru)](https://bleach.fandom.com/ru)*: top 100, ru, wiki* 1. ![](https://www.google.com/s2/favicons?domain=https://www.fandom.com/) [Fandom (https://www.fandom.com/)](https://www.fandom.com/)*: top 100, us* 1. ![](https://www.google.com/s2/favicons?domain=https://community.fandom.com) [FandomCommunityCentral (https://community.fandom.com)](https://community.fandom.com)*: top 100, wiki* -1. ![](https://www.google.com/s2/favicons?domain=https://www.etsy.com/) [Etsy (https://www.etsy.com/)](https://www.etsy.com/)*: top 100, shopping, us* +1. ![](https://www.google.com/s2/favicons?domain=https://www.etsy.com/) [Etsy (https://www.etsy.com/)](https://www.etsy.com/)*: top 100, cloudflare, shopping, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.github.com/) [GitHub (https://www.github.com/)](https://www.github.com/)*: top 100, coding* 1. ![](https://www.google.com/s2/favicons?domain=https://open.spotify.com/) [Spotify (https://open.spotify.com/)](https://open.spotify.com/)*: top 100, music, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.tiktok.com/) [TikTok (https://www.tiktok.com/)](https://www.tiktok.com/)*: top 100, video* @@ -80,14 +80,14 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://www.tumblr.com) [Tumblr (https://www.tumblr.com)](https://www.tumblr.com)*: top 500, blog* 1. ![](https://www.google.com/s2/favicons?domain=https://www.roblox.com/) [Roblox (https://www.roblox.com/)](https://www.roblox.com/)*: top 500, gaming, us* 1. ![](https://www.google.com/s2/favicons?domain=https://soundcloud.com/) [SoundCloud (https://soundcloud.com/)](https://soundcloud.com/)*: top 500, music* -1. ![](https://www.google.com/s2/favicons?domain=https://www.udemy.com) [Udemy (https://www.udemy.com)](https://www.udemy.com)*: top 500, in* +1. ![](https://www.google.com/s2/favicons?domain=https://www.udemy.com) [Udemy (https://www.udemy.com)](https://www.udemy.com)*: top 500, cloudflare* 1. ![](https://www.google.com/s2/favicons?domain=https://discourse.mozilla.org) [discourse.mozilla.org (https://discourse.mozilla.org)](https://discourse.mozilla.org)*: top 500* 1. ![](https://www.google.com/s2/favicons?domain=https://linktr.ee) [linktr.ee (https://linktr.ee)](https://linktr.ee)*: top 500, links* 1. ![](https://www.google.com/s2/favicons?domain=https://xhamster.com) [xHamster (https://xhamster.com)](https://xhamster.com)*: top 500, porn, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.zhihu.com/) [Zhihu (https://www.zhihu.com/)](https://www.zhihu.com/)*: top 500, cn*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.blogger.com) [Blogger (by GAIA id) (https://www.blogger.com)](https://www.blogger.com)*: top 500, blog* 1. ![](https://www.google.com/s2/favicons?domain=https://www.researchgate.net/) [ResearchGate (https://www.researchgate.net/)](https://www.researchgate.net/)*: top 500, in, us* -1. ![](https://www.google.com/s2/favicons?domain=https://www.freepik.com) [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, photo, stock* +1. ![](https://www.google.com/s2/favicons?domain=https://www.freepik.com) [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, cloudflare, photo, stock* 1. ![](https://www.google.com/s2/favicons?domain=https://vimeo.com) [Vimeo (https://vimeo.com)](https://vimeo.com)*: top 500, video* 1. ![](https://www.google.com/s2/favicons?domain=https://www.pinterest.com/) [Pinterest (https://www.pinterest.com/)](https://www.pinterest.com/)*: top 500, art, photo, sharing* 1. ![](https://www.google.com/s2/favicons?domain=https://www.fiverr.com/) [Fiverr (https://www.fiverr.com/)](https://www.fiverr.com/)*: top 500, shopping, us* @@ -101,9 +101,9 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://wix.com/) [Wix (https://wix.com/)](https://wix.com/)*: top 500, us* 1. ![](https://www.google.com/s2/favicons?domain=https://slack.com) [Slack (https://slack.com)](https://slack.com)*: top 500, messaging* 1. ![](https://www.google.com/s2/favicons?domain=https://www.chess.com) [Chess (https://www.chess.com)](https://www.chess.com)*: top 500, gaming, hobby* -1. ![](https://www.google.com/s2/favicons?domain=https://upwork.com) [upwork.com (https://upwork.com)](https://upwork.com)*: top 500, us* +1. ![](https://www.google.com/s2/favicons?domain=https://upwork.com) [upwork.com (https://upwork.com)](https://upwork.com)*: top 500, cloudflare, us* 1. ![](https://www.google.com/s2/favicons?domain=https://archive.org) [Archive.org (https://archive.org)](https://archive.org)*: top 500*, search is disabled -1. ![](https://www.google.com/s2/favicons?domain=https://www.figma.com/) [Figma (https://www.figma.com/)](https://www.figma.com/)*: top 500, design* +1. ![](https://www.google.com/s2/favicons?domain=https://www.figma.com/) [Figma (https://www.figma.com/)](https://www.figma.com/)*: top 500, cloudflare, design* 1. ![](https://www.google.com/s2/favicons?domain=https://www.istockphoto.com) [iStock (https://www.istockphoto.com)](https://www.istockphoto.com)*: top 500, photo, stock* 1. ![](https://www.google.com/s2/favicons?domain=https://www.scribd.com/) [Scribd (https://www.scribd.com/)](https://www.scribd.com/)*: top 500, reading* 1. ![](https://www.google.com/s2/favicons?domain=https://opensea.io) [opensea.io (https://opensea.io)](https://opensea.io)*: top 500, us* @@ -112,7 +112,7 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=http://www.yelp.com) [Yelp (http://www.yelp.com)](http://www.yelp.com)*: top 500, review*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.yelp.com) [Yelp (by id) (https://www.yelp.com)](https://www.yelp.com)*: top 500, review* 1. ![](https://www.google.com/s2/favicons?domain=https://www.blogger.com/) [Blogger (https://www.blogger.com/)](https://www.blogger.com/)*: top 500, blog* -1. ![](https://www.google.com/s2/favicons?domain=https://www.patreon.com/) [Patreon (https://www.patreon.com/)](https://www.patreon.com/)*: top 500, finance* +1. ![](https://www.google.com/s2/favicons?domain=https://www.patreon.com/) [Patreon (https://www.patreon.com/)](https://www.patreon.com/)*: top 500, cloudflare, finance* 1. ![](https://www.google.com/s2/favicons?domain=https://www.goodreads.com/) [GoodReads (https://www.goodreads.com/)](https://www.goodreads.com/)*: top 500, books, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Brazil (https://www.op.gg/)](https://www.op.gg/)*: top 500, br, gaming* 1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] North America (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming* @@ -145,7 +145,7 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=http://weebly.com) [Weebly (http://weebly.com)](http://weebly.com)*: top 500, business* 1. ![](https://www.google.com/s2/favicons?domain=https://dating.rambler.ru/) [RamblerDating (https://dating.rambler.ru/)](https://dating.rambler.ru/)*: top 500, dating, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.livejournal.com/) [LiveJournal (https://www.livejournal.com/)](https://www.livejournal.com/)*: top 500, blog, ru* -1. ![](https://www.google.com/s2/favicons?domain=https://sourceforge.net/) [SourceForge (https://sourceforge.net/)](https://sourceforge.net/)*: top 500, coding, us* +1. ![](https://www.google.com/s2/favicons?domain=https://sourceforge.net/) [SourceForge (https://sourceforge.net/)](https://sourceforge.net/)*: top 500, cloudflare, coding, us* 1. ![](https://www.google.com/s2/favicons?domain=https://genius.com/) [Genius (https://genius.com/)](https://genius.com/)*: top 500, music, us* 1. ![](https://www.google.com/s2/favicons?domain=https://issuu.com/) [Issuu (https://issuu.com/)](https://issuu.com/)*: top 500, business* 1. ![](https://www.google.com/s2/favicons?domain=https://www.9gag.com/) [9GAG (https://www.9gag.com/)](https://www.9gag.com/)*: top 500, sharing* @@ -162,7 +162,7 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://cyber.harvard.edu) [cyber.harvard.edu (https://cyber.harvard.edu)](https://cyber.harvard.edu)*: top 1K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://duolingo.com/) [Duolingo (https://duolingo.com/)](https://duolingo.com/)*: top 1K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.rottentomatoes.com) [Rottentomatoes (https://www.rottentomatoes.com)](https://www.rottentomatoes.com)*: top 1K, movies, us* -1. ![](https://www.google.com/s2/favicons?domain=https://www.kickstarter.com) [Kickstarter (https://www.kickstarter.com)](https://www.kickstarter.com)*: top 1K, finance, us* +1. ![](https://www.google.com/s2/favicons?domain=https://www.kickstarter.com) [Kickstarter (https://www.kickstarter.com)](https://www.kickstarter.com)*: top 1K, cloudflare, finance, us* 1. ![](https://www.google.com/s2/favicons?domain=https://forums.ea.com) [forums.ea.com (https://forums.ea.com)](https://forums.ea.com)*: top 1K, forum, gaming, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://forums.envato.com) [Envato (https://forums.envato.com)](https://forums.envato.com)*: top 1K, au, forum, in* 1. ![](https://www.google.com/s2/favicons?domain=https://ultimate-guitar.com/) [Ultimate-Guitar (https://ultimate-guitar.com/)](https://ultimate-guitar.com/)*: top 1K, us* @@ -181,17 +181,17 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://www.gamespot.com/) [Gamespot (https://www.gamespot.com/)](https://www.gamespot.com/)*: top 1K, gaming, us* 1. ![](https://www.google.com/s2/favicons?domain=https://note.com/) [note (https://note.com/)](https://note.com/)*: top 1K, jp* 1. ![](https://www.google.com/s2/favicons?domain=http://bjapi.afreecatv.com) [AfreecaTV (http://bjapi.afreecatv.com)](http://bjapi.afreecatv.com)*: top 1K, streaming* -1. ![](https://www.google.com/s2/favicons?domain=https://www.redbubble.com/) [Redbubble (https://www.redbubble.com/)](https://www.redbubble.com/)*: top 1K, shopping, us* +1. ![](https://www.google.com/s2/favicons?domain=https://www.redbubble.com/) [Redbubble (https://www.redbubble.com/)](https://www.redbubble.com/)*: top 1K, cloudflare, shopping, us* 1. ![](https://www.google.com/s2/favicons?domain=http://forums.tomsguide.com) [Tom's guide (http://forums.tomsguide.com)](http://forums.tomsguide.com)*: top 1K, forum, tech* 1. ![](https://www.google.com/s2/favicons?domain=https://www.yumpu.com) [Yumpu (https://www.yumpu.com)](https://www.yumpu.com)*: top 1K, stock*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://community.brave.com) [community.brave.com (https://community.brave.com)](https://community.brave.com)*: top 1K, forum, us* 1. ![](https://www.google.com/s2/favicons?domain=https://tinder.com/) [Tinder (https://tinder.com/)](https://tinder.com/)*: top 1K, dating, us* -1. ![](https://www.google.com/s2/favicons?domain=https://community.cloudflare.com/) [CloudflareCommunity (https://community.cloudflare.com/)](https://community.cloudflare.com/)*: top 1K, forum, tech* +1. ![](https://www.google.com/s2/favicons?domain=https://community.cloudflare.com/) [CloudflareCommunity (https://community.cloudflare.com/)](https://community.cloudflare.com/)*: top 1K, cloudflare, forum, tech* 1. ![](https://www.google.com/s2/favicons?domain=https://eksisozluk.com) [Eksisozluk (https://eksisozluk.com)](https://eksisozluk.com)*: top 1K, tr* 1. ![](https://www.google.com/s2/favicons?domain=https://www.allrecipes.com/) [AllRecipes (https://www.allrecipes.com/)](https://www.allrecipes.com/)*: top 1K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://support.t-mobile.com) [T-MobileSupport (https://support.t-mobile.com)](https://support.t-mobile.com)*: top 1K, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.tinkoff.ru/invest/) [Tinkoff Invest (https://www.tinkoff.ru/invest/)](https://www.tinkoff.ru/invest/)*: top 5K, ru* -1. ![](https://www.google.com/s2/favicons?domain=https://www.discogs.com/) [Discogs (https://www.discogs.com/)](https://www.discogs.com/)*: top 5K, music, us* +1. ![](https://www.google.com/s2/favicons?domain=https://www.discogs.com/) [Discogs (https://www.discogs.com/)](https://www.discogs.com/)*: top 5K, cloudflare, music, us* 1. ![](https://www.google.com/s2/favicons?domain=https://discuss.python.org/) [DiscussPython (https://discuss.python.org/)](https://discuss.python.org/)*: top 5K, coding, forum, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.nairaland.com/) [Nairaland Forum (https://www.nairaland.com/)](https://www.nairaland.com/)*: top 5K, ng* 1. ![](https://www.google.com/s2/favicons?domain=https://ru.redtube.com/) [Redtube (https://ru.redtube.com/)](https://ru.redtube.com/)*: top 5K, porn, us* @@ -3141,7 +3141,7 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://pubg.op.gg) [OP.GG [PUBG] (https://pubg.op.gg)](https://pubg.op.gg)*: top 100M, gaming* 1. ![](https://www.google.com/s2/favicons?domain=https://valorant.op.gg) [OP.GG [Valorant] (https://valorant.op.gg)](https://valorant.op.gg)*: top 100M, gaming* -The list was updated at (2024-12-16) +The list was updated at (2026-03-22) ## Statistics Enabled/total sites: 2684/3137 = 85.56% @@ -3197,6 +3197,6 @@ Top 20 tags: - (15) `shopping` - (13) `sport` - (13) `business` +- (13) `cloudflare` (non-standard) - (12) `movies` - (11) `hobby` -- (11) `education` diff --git a/tests/test_cli.py b/tests/test_cli.py index acc5321..44b871b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -45,9 +45,9 @@ DEFAULT_ARGS: Dict[str, Any] = { 'web': None, 'with_domains': False, 'xmind': False, + # Mirrors maigret/resources/settings.json (flag --cloudflare-bypass overrides with True) 'cloudflare_bypass': { - "enabled": False, - "module": "cloudscraper", + "enabled": True, "modules": [ { "name": "chrome_webgate", @@ -71,6 +71,15 @@ def test_args_search_mode(argparser): assert getattr(args, arg) == want_args[arg] +def test_args_cloudflare_bypass_flag(argparser): + args = argparser.parse_args('--cloudflare-bypass username'.split()) + + want_args = dict(DEFAULT_ARGS) + want_args.update({'username': ['username'], 'cloudflare_bypass': True}) + + assert args == Namespace(**want_args) + + def test_args_search_mode_several_usernames(argparser): args = argparser.parse_args('username1 username2'.split()) diff --git a/tests/test_executors.py b/tests/test_executors.py index 7a39897..bb4f649 100644 --- a/tests/test_executors.py +++ b/tests/test_executors.py @@ -36,7 +36,7 @@ async def test_asyncio_progressbar_executor(): # no guarantees for the results order assert sorted(await executor.run(tasks)) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] assert executor.execution_time > 0.2 - assert executor.execution_time < 0.3 + assert executor.execution_time < 0.6 @pytest.mark.asyncio