Update: CloudflareBypassForScraping

This commit is contained in:
Soxoj
2026-03-22 01:48:31 +01:00
parent a163278f89
commit 23c7757dcf
10 changed files with 120 additions and 52 deletions
+32 -3
View File
@@ -48,6 +48,15 @@ SUPPORTED_IDS = (
BAD_CHARS = "#"
def is_cloudflare_bypass_active(value) -> bool:
"""True if Cloudflare webgate URL rewrite should run (``--cloudflare-bypass`` or settings)."""
if value is True:
return True
if isinstance(value, dict):
return bool(value.get("enabled", False))
return False
class CheckerBase:
pass
@@ -433,7 +442,7 @@ def make_site_result(
url = re.sub("(?<!:)/+", "/", url)
url_probe = site.url_probe
if 'cloudflare' in site.tags:
if 'cloudflare' in site.tags and options.get("cloudflare_bypass"):
url_probe = 'http://localhost:8000/html?url=' + url
logger.info(f"Using cloudflare proxy for {site.name}")
@@ -593,6 +602,7 @@ async def maigret(
cookies=None,
retries=0,
check_domains=False,
cloudflare_bypass=False,
*args,
**kwargs,
) -> QueryResultWrapper:
@@ -691,12 +701,14 @@ async def maigret(
options["timeout"] = timeout
options["id_type"] = id_type
options["forced"] = forced
options["cloudflare_bypass"] = is_cloudflare_bypass_active(cloudflare_bypass)
# results from analysis of all sites
all_results: Dict[str, QueryResultWrapper] = {}
sites = list(site_dict.keys())
executor_limit = timeout + 0.5
attempts = retries + 1
while attempts:
tasks_dict = {}
@@ -711,7 +723,11 @@ async def maigret(
sitename,
'',
MaigretCheckStatus.UNKNOWN,
error=CheckError('Request failed'),
error=CheckError(
'Request timeout',
f'No response within {executor_limit:.1f}s per site '
f'(increase --timeout or use --no-progressbar)',
),
),
}
tasks_dict[sitename] = (
@@ -792,6 +808,7 @@ async def site_self_check(
i2p_proxy=None,
skip_errors=False,
cookies=None,
cloudflare_bypass=False,
):
changes = {
"disabled": False,
@@ -819,6 +836,7 @@ async def site_self_check(
tor_proxy=tor_proxy,
i2p_proxy=i2p_proxy,
cookies=cookies,
cloudflare_bypass=cloudflare_bypass,
)
# don't disable entries with other ids types
@@ -890,6 +908,7 @@ async def self_check(
proxy=None,
tor_proxy=None,
i2p_proxy=None,
cloudflare_bypass=False,
) -> bool:
sem = asyncio.Semaphore(max_connections)
tasks = []
@@ -905,7 +924,17 @@ async def self_check(
for _, site in all_sites.items():
check_coro = site_self_check(
site, logger, sem, db, silent, proxy, tor_proxy, i2p_proxy, skip_errors=True
site,
logger,
sem,
db,
silent,
proxy,
tor_proxy,
i2p_proxy,
skip_errors=True,
cookies=None,
cloudflare_bypass=cloudflare_bypass,
)
future = asyncio.ensure_future(check_coro)
tasks.append(future)
+5 -2
View File
@@ -136,7 +136,10 @@ def extract_and_group(search_res: QueryResultWrapper) -> List[Dict[str, Any]]:
def notify_about_errors(
search_results: QueryResultWrapper, query_notify, show_statistics=False
search_results: QueryResultWrapper,
query_notify,
show_statistics=False,
print_check_errors=False,
) -> List[Tuple]:
"""
Prepare error notifications in search results, text + symbol,
@@ -169,7 +172,7 @@ def notify_about_errors(
text = f'{e["err"]}: {round(e["perc"],2)}%'
results.append((text, '!'))
if was_errs_displayed:
if was_errs_displayed and not print_check_errors:
results.append(
('You can see detailed site check errors with a flag `--print-errors`', '-')
)
+24
View File
@@ -110,6 +110,28 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
self.progress = None
# TODO: tests
@staticmethod
def _emit_timeout_notify(args, default_fallback):
"""Print per-site line when wait_for kills check_site_for_username (no query_notify.update ran)."""
if (
not default_fallback
or not isinstance(default_fallback, (tuple, list))
or len(default_fallback) < 2
):
return
_, results_dict = default_fallback[0], default_fallback[1]
if not isinstance(results_dict, dict):
return
status = results_dict.get('status')
if status is None:
return
if len(args) < 5:
return
query_notify = args[4]
site = results_dict.get('site')
similar = bool(getattr(site, 'similar_search', False)) if site is not None else False
query_notify.update(status, similar)
async def increment_progress(self, count):
"""Update progress by calling the provided progress function."""
if self.progress:
@@ -144,6 +166,7 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
result = await asyncio.wait_for(query_task, timeout=self.timeout)
except asyncio.TimeoutError:
result = kwargs.get('default')
self._emit_timeout_notify(args, result)
self.results.append(result)
@@ -207,6 +230,7 @@ class AsyncioQueueGeneratorExecutor:
result = await asyncio.wait_for(query_task, timeout=self.timeout)
except asyncio.TimeoutError:
result = kwargs.get('default')
AsyncioProgressbarQueueExecutor._emit_timeout_notify(args, result)
await self._results.put(result)
except Exception as e:
self.logger.error(f"Error in worker: {e}")
+6 -1
View File
@@ -587,6 +587,7 @@ async def main():
max_connections=args.connections,
tor_proxy=args.tor_proxy,
i2p_proxy=args.i2p_proxy,
cloudflare_bypass=args.cloudflare_bypass,
)
if is_need_update:
if input('Do you want to save changes permanently? [Yn]\n').lower() in (
@@ -687,10 +688,14 @@ async def main():
no_progressbar=args.no_progressbar,
retries=args.retries,
check_domains=args.with_domains,
cloudflare_bypass=args.cloudflare_bypass,
)
errs = errors.notify_about_errors(
results, query_notify, show_statistics=args.verbose
results,
query_notify,
show_statistics=args.verbose,
print_check_errors=args.print_check_errors,
)
for e in errs:
query_notify.warning(*e)
+25 -27
View File
@@ -3149,8 +3149,8 @@
},
"ChaturBate": {
"tags": [
"us",
"cloudflare"
"cloudflare",
"us"
],
"checkType": "status_code",
"alexaRank": 62,
@@ -3329,9 +3329,9 @@
},
"CloudflareCommunity": {
"tags": [
"cloudflare",
"forum",
"tech",
"cloudflare"
"tech"
],
"engine": "Discourse",
"alexaRank": 976,
@@ -4204,9 +4204,9 @@
},
"Discogs": {
"tags": [
"cloudflare",
"music",
"us",
"cloudflare"
"us"
],
"checkType": "status_code",
"alexaRank": 1040,
@@ -4921,9 +4921,9 @@
},
"Etsy": {
"tags": [
"cloudflare",
"shopping",
"us",
"cloudflare"
"us"
],
"errors": {
"Sanctions Policy": "Site censorship",
@@ -5041,8 +5041,8 @@
"usernameUnclaimed": "noonewouldeverusethis7",
"alexaRank": 240,
"tags": [
"design",
"cloudflare"
"cloudflare",
"design"
]
},
"8tracks.com": {
@@ -6275,9 +6275,9 @@
"Freepik": {
"tags": [
"art",
"cloudflare",
"photo",
"stock",
"cloudflare"
"stock"
],
"checkType": "status_code",
"alexaRank": 147,
@@ -8673,9 +8673,9 @@
},
"Kickstarter": {
"tags": [
"cloudflare",
"finance",
"us",
"cloudflare"
"us"
],
"checkType": "status_code",
"alexaRank": 609,
@@ -12029,8 +12029,8 @@
},
"Patreon": {
"tags": [
"finance",
"cloudflare"
"cloudflare",
"finance"
],
"checkType": "status_code",
"alexaRank": 304,
@@ -13522,9 +13522,9 @@
},
"Redbubble": {
"tags": [
"cloudflare",
"shopping",
"us",
"cloudflare"
"us"
],
"checkType": "status_code",
"alexaRank": 925,
@@ -15067,9 +15067,9 @@
},
"SourceForge": {
"tags": [
"cloudflare",
"coding",
"us",
"cloudflare"
"us"
],
"checkType": "message",
"alexaRank": 444,
@@ -16920,9 +16920,9 @@
},
"Twitch": {
"tags": [
"cloudflare",
"streaming",
"us",
"cloudflare"
"us"
],
"headers": {
"cookies": "_gat=1; _ga_4ZM72D0Y59=GS1.2.1734305902.1.0.1734305902.0.0.0; _ga=GA1.2.1051951095.1734305902; _gid=GA1.2.30506583.1734305902; cf_clearance=xo5uTOkBRsAYb4so4QSu1h8tlFcFJyYSA2SBEHyYA2U-1734305900-1.2.1.1-l9mQ677uPsBenPceAasuW_ZVBqRgQqy4df.13gRl6y4aFBf._3bLo1c3.uVZOXwMxL_iVN.EvEHEBiNczBNMM6riJrVWgiLx1O1jGRbhIiGP.tsomZgyl_bNupNbWxZNzHy454hC0iUigDrE5jkJJoazDRJNc5532wj9nT.U9DDBxW3RplVCdj4x5sMt3K3IXADYvAGabBQnzvS3rEr_w66KClwAehy69tWHVSPDkc.ww7QnxdDItYqmtL8bz9IScdouTAvU_MWK6oxvxcLc6GQFCQZnoToeX8Fgeui2flhV.kXXjEQ4NjypxSFakcCPIysHZOUjKfv93.W9Vfl7id.Y8DUpsmxEPVOpfcrGY6YvFtk6yJhvUQryJftS5b7E5P5jVPW_pPlMWSTWL9IaysG7INm6ZjDyjsVG7OBJIUujSSMlyoKiR8sv0L2ueHt6",
@@ -17490,9 +17490,6 @@
],
"method": "vimeo"
},
"headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NDI3Mjg3NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiZWI1ZjY1OTAtZGNiMC00MDQ5LTkwODctZmU3ZGFjNjAyMmZmIn0.wrdGR5HPqB_o4f2EHL1h0vVldfmUOvpvPKm-y88X7KU"
},
"urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1",
"checkType": "status_code",
"alexaRank": 148,
@@ -27258,8 +27255,8 @@
},
"upwork.com": {
"tags": [
"us",
"cloudflare"
"cloudflare",
"us"
],
"engine": "engine404",
"urlMain": "https://upwork.com",
@@ -35784,6 +35781,7 @@
"tags": [
"gaming",
"coding",
"cloudflare",
"photo",
"music",
"blog",
-1
View File
@@ -57,7 +57,6 @@
"web_interface_port": 5000,
"cloudflare_bypass": {
"enabled": true,
"module": "cloudscraper",
"modules": [
{
"name": "chrome_webgate",
+1
View File
@@ -110,6 +110,7 @@ class Submitter:
cookies=self.args.cookie_file,
# Don't skip errors in submit mode - we need check both false positives/true negatives
skip_errors=False,
cloudflare_bypass=getattr(self.args, 'cloudflare_bypass', False),
)
return changes
+15 -15
View File
@@ -21,7 +21,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://vk.com/) [VK (by id) (https://vk.com/)](https://vk.com/)*: top 50, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://sbongacams.com) [BongaCams (https://sbongacams.com)](https://sbongacams.com)*: top 50, cz, webcam*
1. ![](https://www.google.com/s2/favicons?domain=https://www.instagram.com/) [Instagram (https://www.instagram.com/)](https://www.instagram.com/)*: top 50, photo*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.twitch.tv/) [Twitch (https://www.twitch.tv/)](https://www.twitch.tv/)*: top 50, streaming, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.twitch.tv/) [Twitch (https://www.twitch.tv/)](https://www.twitch.tv/)*: top 50, cloudflare, streaming, us*
1. ![](https://www.google.com/s2/favicons?domain=https://yandex.ru/collections/) [YandexCollections API (https://yandex.ru/collections/)](https://yandex.ru/collections/)*: top 50, ru, sharing*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://stackoverflow.com) [StackOverflow (https://stackoverflow.com)](https://stackoverflow.com)*: top 50, coding*
1. ![](https://www.google.com/s2/favicons?domain=https://www.ebay.com/) [Ebay (https://www.ebay.com/)](https://www.ebay.com/)*: top 50, shopping, us*
@@ -62,7 +62,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://community.adobe.com) [community.adobe.com (https://community.adobe.com)](https://community.adobe.com)*: top 100, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.tradingview.com/) [TradingView (https://www.tradingview.com/)](https://www.tradingview.com/)*: top 100, trading, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.aparat.com) [Aparat (https://www.aparat.com)](https://www.aparat.com)*: top 100, ir, video*
1. ![](https://www.google.com/s2/favicons?domain=https://chaturbate.com) [ChaturBate (https://chaturbate.com)](https://chaturbate.com)*: top 100, us*
1. ![](https://www.google.com/s2/favicons?domain=https://chaturbate.com) [ChaturBate (https://chaturbate.com)](https://chaturbate.com)*: top 100, cloudflare, us*
1. ![](https://www.google.com/s2/favicons?domain=https://medium.com/) [Medium (https://medium.com/)](https://medium.com/)*: top 100, blog, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.livejasmin.com/) [Livejasmin (https://www.livejasmin.com/)](https://www.livejasmin.com/)*: top 100, us, webcam*
1. ![](https://www.google.com/s2/favicons?domain=https://pornhub.com/) [Pornhub (https://pornhub.com/)](https://pornhub.com/)*: top 100, porn*
@@ -72,7 +72,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://bleach.fandom.com/ru) [BleachFandom (https://bleach.fandom.com/ru)](https://bleach.fandom.com/ru)*: top 100, ru, wiki*
1. ![](https://www.google.com/s2/favicons?domain=https://www.fandom.com/) [Fandom (https://www.fandom.com/)](https://www.fandom.com/)*: top 100, us*
1. ![](https://www.google.com/s2/favicons?domain=https://community.fandom.com) [FandomCommunityCentral (https://community.fandom.com)](https://community.fandom.com)*: top 100, wiki*
1. ![](https://www.google.com/s2/favicons?domain=https://www.etsy.com/) [Etsy (https://www.etsy.com/)](https://www.etsy.com/)*: top 100, shopping, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.etsy.com/) [Etsy (https://www.etsy.com/)](https://www.etsy.com/)*: top 100, cloudflare, shopping, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.github.com/) [GitHub (https://www.github.com/)](https://www.github.com/)*: top 100, coding*
1. ![](https://www.google.com/s2/favicons?domain=https://open.spotify.com/) [Spotify (https://open.spotify.com/)](https://open.spotify.com/)*: top 100, music, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.tiktok.com/) [TikTok (https://www.tiktok.com/)](https://www.tiktok.com/)*: top 100, video*
@@ -80,14 +80,14 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.tumblr.com) [Tumblr (https://www.tumblr.com)](https://www.tumblr.com)*: top 500, blog*
1. ![](https://www.google.com/s2/favicons?domain=https://www.roblox.com/) [Roblox (https://www.roblox.com/)](https://www.roblox.com/)*: top 500, gaming, us*
1. ![](https://www.google.com/s2/favicons?domain=https://soundcloud.com/) [SoundCloud (https://soundcloud.com/)](https://soundcloud.com/)*: top 500, music*
1. ![](https://www.google.com/s2/favicons?domain=https://www.udemy.com) [Udemy (https://www.udemy.com)](https://www.udemy.com)*: top 500, in*
1. ![](https://www.google.com/s2/favicons?domain=https://www.udemy.com) [Udemy (https://www.udemy.com)](https://www.udemy.com)*: top 500, cloudflare*
1. ![](https://www.google.com/s2/favicons?domain=https://discourse.mozilla.org) [discourse.mozilla.org (https://discourse.mozilla.org)](https://discourse.mozilla.org)*: top 500*
1. ![](https://www.google.com/s2/favicons?domain=https://linktr.ee) [linktr.ee (https://linktr.ee)](https://linktr.ee)*: top 500, links*
1. ![](https://www.google.com/s2/favicons?domain=https://xhamster.com) [xHamster (https://xhamster.com)](https://xhamster.com)*: top 500, porn, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.zhihu.com/) [Zhihu (https://www.zhihu.com/)](https://www.zhihu.com/)*: top 500, cn*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.blogger.com) [Blogger (by GAIA id) (https://www.blogger.com)](https://www.blogger.com)*: top 500, blog*
1. ![](https://www.google.com/s2/favicons?domain=https://www.researchgate.net/) [ResearchGate (https://www.researchgate.net/)](https://www.researchgate.net/)*: top 500, in, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.freepik.com) [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, photo, stock*
1. ![](https://www.google.com/s2/favicons?domain=https://www.freepik.com) [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, cloudflare, photo, stock*
1. ![](https://www.google.com/s2/favicons?domain=https://vimeo.com) [Vimeo (https://vimeo.com)](https://vimeo.com)*: top 500, video*
1. ![](https://www.google.com/s2/favicons?domain=https://www.pinterest.com/) [Pinterest (https://www.pinterest.com/)](https://www.pinterest.com/)*: top 500, art, photo, sharing*
1. ![](https://www.google.com/s2/favicons?domain=https://www.fiverr.com/) [Fiverr (https://www.fiverr.com/)](https://www.fiverr.com/)*: top 500, shopping, us*
@@ -101,9 +101,9 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://wix.com/) [Wix (https://wix.com/)](https://wix.com/)*: top 500, us*
1. ![](https://www.google.com/s2/favicons?domain=https://slack.com) [Slack (https://slack.com)](https://slack.com)*: top 500, messaging*
1. ![](https://www.google.com/s2/favicons?domain=https://www.chess.com) [Chess (https://www.chess.com)](https://www.chess.com)*: top 500, gaming, hobby*
1. ![](https://www.google.com/s2/favicons?domain=https://upwork.com) [upwork.com (https://upwork.com)](https://upwork.com)*: top 500, us*
1. ![](https://www.google.com/s2/favicons?domain=https://upwork.com) [upwork.com (https://upwork.com)](https://upwork.com)*: top 500, cloudflare, us*
1. ![](https://www.google.com/s2/favicons?domain=https://archive.org) [Archive.org (https://archive.org)](https://archive.org)*: top 500*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.figma.com/) [Figma (https://www.figma.com/)](https://www.figma.com/)*: top 500, design*
1. ![](https://www.google.com/s2/favicons?domain=https://www.figma.com/) [Figma (https://www.figma.com/)](https://www.figma.com/)*: top 500, cloudflare, design*
1. ![](https://www.google.com/s2/favicons?domain=https://www.istockphoto.com) [iStock (https://www.istockphoto.com)](https://www.istockphoto.com)*: top 500, photo, stock*
1. ![](https://www.google.com/s2/favicons?domain=https://www.scribd.com/) [Scribd (https://www.scribd.com/)](https://www.scribd.com/)*: top 500, reading*
1. ![](https://www.google.com/s2/favicons?domain=https://opensea.io) [opensea.io (https://opensea.io)](https://opensea.io)*: top 500, us*
@@ -112,7 +112,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=http://www.yelp.com) [Yelp (http://www.yelp.com)](http://www.yelp.com)*: top 500, review*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.yelp.com) [Yelp (by id) (https://www.yelp.com)](https://www.yelp.com)*: top 500, review*
1. ![](https://www.google.com/s2/favicons?domain=https://www.blogger.com/) [Blogger (https://www.blogger.com/)](https://www.blogger.com/)*: top 500, blog*
1. ![](https://www.google.com/s2/favicons?domain=https://www.patreon.com/) [Patreon (https://www.patreon.com/)](https://www.patreon.com/)*: top 500, finance*
1. ![](https://www.google.com/s2/favicons?domain=https://www.patreon.com/) [Patreon (https://www.patreon.com/)](https://www.patreon.com/)*: top 500, cloudflare, finance*
1. ![](https://www.google.com/s2/favicons?domain=https://www.goodreads.com/) [GoodReads (https://www.goodreads.com/)](https://www.goodreads.com/)*: top 500, books, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Brazil (https://www.op.gg/)](https://www.op.gg/)*: top 500, br, gaming*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] North America (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*
@@ -145,7 +145,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=http://weebly.com) [Weebly (http://weebly.com)](http://weebly.com)*: top 500, business*
1. ![](https://www.google.com/s2/favicons?domain=https://dating.rambler.ru/) [RamblerDating (https://dating.rambler.ru/)](https://dating.rambler.ru/)*: top 500, dating, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.livejournal.com/) [LiveJournal (https://www.livejournal.com/)](https://www.livejournal.com/)*: top 500, blog, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://sourceforge.net/) [SourceForge (https://sourceforge.net/)](https://sourceforge.net/)*: top 500, coding, us*
1. ![](https://www.google.com/s2/favicons?domain=https://sourceforge.net/) [SourceForge (https://sourceforge.net/)](https://sourceforge.net/)*: top 500, cloudflare, coding, us*
1. ![](https://www.google.com/s2/favicons?domain=https://genius.com/) [Genius (https://genius.com/)](https://genius.com/)*: top 500, music, us*
1. ![](https://www.google.com/s2/favicons?domain=https://issuu.com/) [Issuu (https://issuu.com/)](https://issuu.com/)*: top 500, business*
1. ![](https://www.google.com/s2/favicons?domain=https://www.9gag.com/) [9GAG (https://www.9gag.com/)](https://www.9gag.com/)*: top 500, sharing*
@@ -162,7 +162,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://cyber.harvard.edu) [cyber.harvard.edu (https://cyber.harvard.edu)](https://cyber.harvard.edu)*: top 1K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://duolingo.com/) [Duolingo (https://duolingo.com/)](https://duolingo.com/)*: top 1K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.rottentomatoes.com) [Rottentomatoes (https://www.rottentomatoes.com)](https://www.rottentomatoes.com)*: top 1K, movies, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.kickstarter.com) [Kickstarter (https://www.kickstarter.com)](https://www.kickstarter.com)*: top 1K, finance, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.kickstarter.com) [Kickstarter (https://www.kickstarter.com)](https://www.kickstarter.com)*: top 1K, cloudflare, finance, us*
1. ![](https://www.google.com/s2/favicons?domain=https://forums.ea.com) [forums.ea.com (https://forums.ea.com)](https://forums.ea.com)*: top 1K, forum, gaming, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://forums.envato.com) [Envato (https://forums.envato.com)](https://forums.envato.com)*: top 1K, au, forum, in*
1. ![](https://www.google.com/s2/favicons?domain=https://ultimate-guitar.com/) [Ultimate-Guitar (https://ultimate-guitar.com/)](https://ultimate-guitar.com/)*: top 1K, us*
@@ -181,17 +181,17 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.gamespot.com/) [Gamespot (https://www.gamespot.com/)](https://www.gamespot.com/)*: top 1K, gaming, us*
1. ![](https://www.google.com/s2/favicons?domain=https://note.com/) [note (https://note.com/)](https://note.com/)*: top 1K, jp*
1. ![](https://www.google.com/s2/favicons?domain=http://bjapi.afreecatv.com) [AfreecaTV (http://bjapi.afreecatv.com)](http://bjapi.afreecatv.com)*: top 1K, streaming*
1. ![](https://www.google.com/s2/favicons?domain=https://www.redbubble.com/) [Redbubble (https://www.redbubble.com/)](https://www.redbubble.com/)*: top 1K, shopping, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.redbubble.com/) [Redbubble (https://www.redbubble.com/)](https://www.redbubble.com/)*: top 1K, cloudflare, shopping, us*
1. ![](https://www.google.com/s2/favicons?domain=http://forums.tomsguide.com) [Tom's guide (http://forums.tomsguide.com)](http://forums.tomsguide.com)*: top 1K, forum, tech*
1. ![](https://www.google.com/s2/favicons?domain=https://www.yumpu.com) [Yumpu (https://www.yumpu.com)](https://www.yumpu.com)*: top 1K, stock*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://community.brave.com) [community.brave.com (https://community.brave.com)](https://community.brave.com)*: top 1K, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://tinder.com/) [Tinder (https://tinder.com/)](https://tinder.com/)*: top 1K, dating, us*
1. ![](https://www.google.com/s2/favicons?domain=https://community.cloudflare.com/) [CloudflareCommunity (https://community.cloudflare.com/)](https://community.cloudflare.com/)*: top 1K, forum, tech*
1. ![](https://www.google.com/s2/favicons?domain=https://community.cloudflare.com/) [CloudflareCommunity (https://community.cloudflare.com/)](https://community.cloudflare.com/)*: top 1K, cloudflare, forum, tech*
1. ![](https://www.google.com/s2/favicons?domain=https://eksisozluk.com) [Eksisozluk (https://eksisozluk.com)](https://eksisozluk.com)*: top 1K, tr*
1. ![](https://www.google.com/s2/favicons?domain=https://www.allrecipes.com/) [AllRecipes (https://www.allrecipes.com/)](https://www.allrecipes.com/)*: top 1K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://support.t-mobile.com) [T-MobileSupport (https://support.t-mobile.com)](https://support.t-mobile.com)*: top 1K, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.tinkoff.ru/invest/) [Tinkoff Invest (https://www.tinkoff.ru/invest/)](https://www.tinkoff.ru/invest/)*: top 5K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.discogs.com/) [Discogs (https://www.discogs.com/)](https://www.discogs.com/)*: top 5K, music, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.discogs.com/) [Discogs (https://www.discogs.com/)](https://www.discogs.com/)*: top 5K, cloudflare, music, us*
1. ![](https://www.google.com/s2/favicons?domain=https://discuss.python.org/) [DiscussPython (https://discuss.python.org/)](https://discuss.python.org/)*: top 5K, coding, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.nairaland.com/) [Nairaland Forum (https://www.nairaland.com/)](https://www.nairaland.com/)*: top 5K, ng*
1. ![](https://www.google.com/s2/favicons?domain=https://ru.redtube.com/) [Redtube (https://ru.redtube.com/)](https://ru.redtube.com/)*: top 5K, porn, us*
@@ -3141,7 +3141,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://pubg.op.gg) [OP.GG [PUBG] (https://pubg.op.gg)](https://pubg.op.gg)*: top 100M, gaming*
1. ![](https://www.google.com/s2/favicons?domain=https://valorant.op.gg) [OP.GG [Valorant] (https://valorant.op.gg)](https://valorant.op.gg)*: top 100M, gaming*
The list was updated at (2024-12-16)
The list was updated at (2026-03-22)
## Statistics
Enabled/total sites: 2684/3137 = 85.56%
@@ -3197,6 +3197,6 @@ Top 20 tags:
- (15) `shopping`
- (13) `sport`
- (13) `business`
- (13) `cloudflare` (non-standard)
- (12) `movies`
- (11) `hobby`
- (11) `education`
+11 -2
View File
@@ -45,9 +45,9 @@ DEFAULT_ARGS: Dict[str, Any] = {
'web': None,
'with_domains': False,
'xmind': False,
# Mirrors maigret/resources/settings.json (flag --cloudflare-bypass overrides with True)
'cloudflare_bypass': {
"enabled": False,
"module": "cloudscraper",
"enabled": True,
"modules": [
{
"name": "chrome_webgate",
@@ -71,6 +71,15 @@ def test_args_search_mode(argparser):
assert getattr(args, arg) == want_args[arg]
def test_args_cloudflare_bypass_flag(argparser):
args = argparser.parse_args('--cloudflare-bypass username'.split())
want_args = dict(DEFAULT_ARGS)
want_args.update({'username': ['username'], 'cloudflare_bypass': True})
assert args == Namespace(**want_args)
def test_args_search_mode_several_usernames(argparser):
args = argparser.parse_args('username1 username2'.split())
+1 -1
View File
@@ -36,7 +36,7 @@ async def test_asyncio_progressbar_executor():
# no guarantees for the results order
assert sorted(await executor.run(tasks)) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
assert executor.execution_time > 0.2
assert executor.execution_time < 0.3
assert executor.execution_time < 0.6
@pytest.mark.asyncio