mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 23c7757dcf | |||
| a163278f89 | |||
| 5381a134f0 |
+36
-3
@@ -48,6 +48,15 @@ SUPPORTED_IDS = (
|
||||
BAD_CHARS = "#"
|
||||
|
||||
|
||||
def is_cloudflare_bypass_active(value) -> bool:
|
||||
"""True if Cloudflare webgate URL rewrite should run (``--cloudflare-bypass`` or settings)."""
|
||||
if value is True:
|
||||
return True
|
||||
if isinstance(value, dict):
|
||||
return bool(value.get("enabled", False))
|
||||
return False
|
||||
|
||||
|
||||
class CheckerBase:
|
||||
pass
|
||||
|
||||
@@ -431,6 +440,11 @@ def make_site_result(
|
||||
|
||||
# workaround to prevent slash errors
|
||||
url = re.sub("(?<!:)/+", "/", url)
|
||||
url_probe = site.url_probe
|
||||
|
||||
if 'cloudflare' in site.tags and options.get("cloudflare_bypass"):
|
||||
url_probe = 'http://localhost:8000/html?url=' + url
|
||||
logger.info(f"Using cloudflare proxy for {site.name}")
|
||||
|
||||
# always clearweb_checker for now
|
||||
checker = options["checkers"][site.protocol]
|
||||
@@ -472,7 +486,6 @@ def make_site_result(
|
||||
else:
|
||||
# URL of user on site (if it exists)
|
||||
results_site["url_user"] = url
|
||||
url_probe = site.url_probe
|
||||
if url_probe is None:
|
||||
# Probe URL is normal one seen by people out on the web.
|
||||
url_probe = url
|
||||
@@ -589,6 +602,7 @@ async def maigret(
|
||||
cookies=None,
|
||||
retries=0,
|
||||
check_domains=False,
|
||||
cloudflare_bypass=False,
|
||||
*args,
|
||||
**kwargs,
|
||||
) -> QueryResultWrapper:
|
||||
@@ -687,12 +701,14 @@ async def maigret(
|
||||
options["timeout"] = timeout
|
||||
options["id_type"] = id_type
|
||||
options["forced"] = forced
|
||||
options["cloudflare_bypass"] = is_cloudflare_bypass_active(cloudflare_bypass)
|
||||
|
||||
# results from analysis of all sites
|
||||
all_results: Dict[str, QueryResultWrapper] = {}
|
||||
|
||||
sites = list(site_dict.keys())
|
||||
|
||||
executor_limit = timeout + 0.5
|
||||
attempts = retries + 1
|
||||
while attempts:
|
||||
tasks_dict = {}
|
||||
@@ -707,7 +723,11 @@ async def maigret(
|
||||
sitename,
|
||||
'',
|
||||
MaigretCheckStatus.UNKNOWN,
|
||||
error=CheckError('Request failed'),
|
||||
error=CheckError(
|
||||
'Request timeout',
|
||||
f'No response within {executor_limit:.1f}s per site '
|
||||
f'(increase --timeout or use --no-progressbar)',
|
||||
),
|
||||
),
|
||||
}
|
||||
tasks_dict[sitename] = (
|
||||
@@ -788,6 +808,7 @@ async def site_self_check(
|
||||
i2p_proxy=None,
|
||||
skip_errors=False,
|
||||
cookies=None,
|
||||
cloudflare_bypass=False,
|
||||
):
|
||||
changes = {
|
||||
"disabled": False,
|
||||
@@ -815,6 +836,7 @@ async def site_self_check(
|
||||
tor_proxy=tor_proxy,
|
||||
i2p_proxy=i2p_proxy,
|
||||
cookies=cookies,
|
||||
cloudflare_bypass=cloudflare_bypass,
|
||||
)
|
||||
|
||||
# don't disable entries with other ids types
|
||||
@@ -886,6 +908,7 @@ async def self_check(
|
||||
proxy=None,
|
||||
tor_proxy=None,
|
||||
i2p_proxy=None,
|
||||
cloudflare_bypass=False,
|
||||
) -> bool:
|
||||
sem = asyncio.Semaphore(max_connections)
|
||||
tasks = []
|
||||
@@ -901,7 +924,17 @@ async def self_check(
|
||||
|
||||
for _, site in all_sites.items():
|
||||
check_coro = site_self_check(
|
||||
site, logger, sem, db, silent, proxy, tor_proxy, i2p_proxy, skip_errors=True
|
||||
site,
|
||||
logger,
|
||||
sem,
|
||||
db,
|
||||
silent,
|
||||
proxy,
|
||||
tor_proxy,
|
||||
i2p_proxy,
|
||||
skip_errors=True,
|
||||
cookies=None,
|
||||
cloudflare_bypass=cloudflare_bypass,
|
||||
)
|
||||
future = asyncio.ensure_future(check_coro)
|
||||
tasks.append(future)
|
||||
|
||||
+5
-2
@@ -136,7 +136,10 @@ def extract_and_group(search_res: QueryResultWrapper) -> List[Dict[str, Any]]:
|
||||
|
||||
|
||||
def notify_about_errors(
|
||||
search_results: QueryResultWrapper, query_notify, show_statistics=False
|
||||
search_results: QueryResultWrapper,
|
||||
query_notify,
|
||||
show_statistics=False,
|
||||
print_check_errors=False,
|
||||
) -> List[Tuple]:
|
||||
"""
|
||||
Prepare error notifications in search results, text + symbol,
|
||||
@@ -169,7 +172,7 @@ def notify_about_errors(
|
||||
text = f'{e["err"]}: {round(e["perc"],2)}%'
|
||||
results.append((text, '!'))
|
||||
|
||||
if was_errs_displayed:
|
||||
if was_errs_displayed and not print_check_errors:
|
||||
results.append(
|
||||
('You can see detailed site check errors with a flag `--print-errors`', '-')
|
||||
)
|
||||
|
||||
@@ -110,6 +110,28 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
||||
self.progress = None
|
||||
|
||||
# TODO: tests
|
||||
@staticmethod
|
||||
def _emit_timeout_notify(args, default_fallback):
|
||||
"""Print per-site line when wait_for kills check_site_for_username (no query_notify.update ran)."""
|
||||
if (
|
||||
not default_fallback
|
||||
or not isinstance(default_fallback, (tuple, list))
|
||||
or len(default_fallback) < 2
|
||||
):
|
||||
return
|
||||
_, results_dict = default_fallback[0], default_fallback[1]
|
||||
if not isinstance(results_dict, dict):
|
||||
return
|
||||
status = results_dict.get('status')
|
||||
if status is None:
|
||||
return
|
||||
if len(args) < 5:
|
||||
return
|
||||
query_notify = args[4]
|
||||
site = results_dict.get('site')
|
||||
similar = bool(getattr(site, 'similar_search', False)) if site is not None else False
|
||||
query_notify.update(status, similar)
|
||||
|
||||
async def increment_progress(self, count):
|
||||
"""Update progress by calling the provided progress function."""
|
||||
if self.progress:
|
||||
@@ -144,6 +166,7 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
||||
result = await asyncio.wait_for(query_task, timeout=self.timeout)
|
||||
except asyncio.TimeoutError:
|
||||
result = kwargs.get('default')
|
||||
self._emit_timeout_notify(args, result)
|
||||
|
||||
self.results.append(result)
|
||||
|
||||
@@ -207,6 +230,7 @@ class AsyncioQueueGeneratorExecutor:
|
||||
result = await asyncio.wait_for(query_task, timeout=self.timeout)
|
||||
except asyncio.TimeoutError:
|
||||
result = kwargs.get('default')
|
||||
AsyncioProgressbarQueueExecutor._emit_timeout_notify(args, result)
|
||||
await self._results.put(result)
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error in worker: {e}")
|
||||
|
||||
+12
-1
@@ -254,6 +254,12 @@ def setup_arguments_parser(settings: Settings):
|
||||
default=settings.domain_search,
|
||||
help="Enable (experimental) feature of checking domains on usernames.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cloudflare-bypass",
|
||||
action="store_true",
|
||||
default=settings.cloudflare_bypass,
|
||||
help="Enable Cloudflare bypass (edit settings.json to configure)",
|
||||
)
|
||||
|
||||
filter_group = parser.add_argument_group(
|
||||
'Site filtering', 'Options to set site search scope'
|
||||
@@ -581,6 +587,7 @@ async def main():
|
||||
max_connections=args.connections,
|
||||
tor_proxy=args.tor_proxy,
|
||||
i2p_proxy=args.i2p_proxy,
|
||||
cloudflare_bypass=args.cloudflare_bypass,
|
||||
)
|
||||
if is_need_update:
|
||||
if input('Do you want to save changes permanently? [Yn]\n').lower() in (
|
||||
@@ -681,10 +688,14 @@ async def main():
|
||||
no_progressbar=args.no_progressbar,
|
||||
retries=args.retries,
|
||||
check_domains=args.with_domains,
|
||||
cloudflare_bypass=args.cloudflare_bypass,
|
||||
)
|
||||
|
||||
errs = errors.notify_about_errors(
|
||||
results, query_notify, show_statistics=args.verbose
|
||||
results,
|
||||
query_notify,
|
||||
show_statistics=args.verbose,
|
||||
print_check_errors=args.print_check_errors,
|
||||
)
|
||||
for e in errs:
|
||||
query_notify.warning(*e)
|
||||
|
||||
@@ -3149,6 +3149,7 @@
|
||||
},
|
||||
"ChaturBate": {
|
||||
"tags": [
|
||||
"cloudflare",
|
||||
"us"
|
||||
],
|
||||
"checkType": "status_code",
|
||||
@@ -3328,6 +3329,7 @@
|
||||
},
|
||||
"CloudflareCommunity": {
|
||||
"tags": [
|
||||
"cloudflare",
|
||||
"forum",
|
||||
"tech"
|
||||
],
|
||||
@@ -4202,6 +4204,7 @@
|
||||
},
|
||||
"Discogs": {
|
||||
"tags": [
|
||||
"cloudflare",
|
||||
"music",
|
||||
"us"
|
||||
],
|
||||
@@ -4918,6 +4921,7 @@
|
||||
},
|
||||
"Etsy": {
|
||||
"tags": [
|
||||
"cloudflare",
|
||||
"shopping",
|
||||
"us"
|
||||
],
|
||||
@@ -5037,6 +5041,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"alexaRank": 240,
|
||||
"tags": [
|
||||
"cloudflare",
|
||||
"design"
|
||||
]
|
||||
},
|
||||
@@ -6270,6 +6275,7 @@
|
||||
"Freepik": {
|
||||
"tags": [
|
||||
"art",
|
||||
"cloudflare",
|
||||
"photo",
|
||||
"stock"
|
||||
],
|
||||
@@ -8667,6 +8673,7 @@
|
||||
},
|
||||
"Kickstarter": {
|
||||
"tags": [
|
||||
"cloudflare",
|
||||
"finance",
|
||||
"us"
|
||||
],
|
||||
@@ -12022,6 +12029,7 @@
|
||||
},
|
||||
"Patreon": {
|
||||
"tags": [
|
||||
"cloudflare",
|
||||
"finance"
|
||||
],
|
||||
"checkType": "status_code",
|
||||
@@ -13514,6 +13522,7 @@
|
||||
},
|
||||
"Redbubble": {
|
||||
"tags": [
|
||||
"cloudflare",
|
||||
"shopping",
|
||||
"us"
|
||||
],
|
||||
@@ -15058,6 +15067,7 @@
|
||||
},
|
||||
"SourceForge": {
|
||||
"tags": [
|
||||
"cloudflare",
|
||||
"coding",
|
||||
"us"
|
||||
],
|
||||
@@ -16910,10 +16920,14 @@
|
||||
},
|
||||
"Twitch": {
|
||||
"tags": [
|
||||
"cloudflare",
|
||||
"streaming",
|
||||
"us"
|
||||
],
|
||||
"urlProbe": "https://twitchtracker.com/{username}",
|
||||
"headers": {
|
||||
"cookies": "_gat=1; _ga_4ZM72D0Y59=GS1.2.1734305902.1.0.1734305902.0.0.0; _ga=GA1.2.1051951095.1734305902; _gid=GA1.2.30506583.1734305902; cf_clearance=xo5uTOkBRsAYb4so4QSu1h8tlFcFJyYSA2SBEHyYA2U-1734305900-1.2.1.1-l9mQ677uPsBenPceAasuW_ZVBqRgQqy4df.13gRl6y4aFBf._3bLo1c3.uVZOXwMxL_iVN.EvEHEBiNczBNMM6riJrVWgiLx1O1jGRbhIiGP.tsomZgyl_bNupNbWxZNzHy454hC0iUigDrE5jkJJoazDRJNc5532wj9nT.U9DDBxW3RplVCdj4x5sMt3K3IXADYvAGabBQnzvS3rEr_w66KClwAehy69tWHVSPDkc.ww7QnxdDItYqmtL8bz9IScdouTAvU_MWK6oxvxcLc6GQFCQZnoToeX8Fgeui2flhV.kXXjEQ4NjypxSFakcCPIysHZOUjKfv93.W9Vfl7id.Y8DUpsmxEPVOpfcrGY6YvFtk6yJhvUQryJftS5b7E5P5jVPW_pPlMWSTWL9IaysG7INm6ZjDyjsVG7OBJIUujSSMlyoKiR8sv0L2ueHt6",
|
||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
|
||||
},
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 34,
|
||||
"urlMain": "https://www.twitch.tv/",
|
||||
@@ -17079,7 +17093,7 @@
|
||||
},
|
||||
"Udemy": {
|
||||
"tags": [
|
||||
"in"
|
||||
"cloudflare"
|
||||
],
|
||||
"checkType": "response_url",
|
||||
"alexaRank": 131,
|
||||
@@ -17476,9 +17490,6 @@
|
||||
],
|
||||
"method": "vimeo"
|
||||
},
|
||||
"headers": {
|
||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzQxMTc1NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiNDc4Y2ZhZGUtZjI0Yy00MDVkLTliYWItN2RlNGEzNGM4MzI5In0.guN7Fg8dqq7EYdckrJ-6Rdkj_5MOl6FaC4YUSOceDpU"
|
||||
},
|
||||
"urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1",
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 148,
|
||||
@@ -27244,6 +27255,7 @@
|
||||
},
|
||||
"upwork.com": {
|
||||
"tags": [
|
||||
"cloudflare",
|
||||
"us"
|
||||
],
|
||||
"engine": "engine404",
|
||||
@@ -35769,6 +35781,7 @@
|
||||
"tags": [
|
||||
"gaming",
|
||||
"coding",
|
||||
"cloudflare",
|
||||
"photo",
|
||||
"music",
|
||||
"blog",
|
||||
|
||||
@@ -54,5 +54,15 @@
|
||||
"graph_report": false,
|
||||
"pdf_report": false,
|
||||
"html_report": false,
|
||||
"web_interface_port": 5000
|
||||
"web_interface_port": 5000,
|
||||
"cloudflare_bypass": {
|
||||
"enabled": true,
|
||||
"modules": [
|
||||
{
|
||||
"name": "chrome_webgate",
|
||||
"method": "url_rewrite",
|
||||
"url": "http://localhost:8000/html?url={url}&retries=1"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -110,6 +110,7 @@ class Submitter:
|
||||
cookies=self.args.cookie_file,
|
||||
# Don't skip errors in submit mode - we need check both false positives/true negatives
|
||||
skip_errors=False,
|
||||
cloudflare_bypass=getattr(self.args, 'cloudflare_bypass', False),
|
||||
)
|
||||
return changes
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [VK (by id) (https://vk.com/)](https://vk.com/)*: top 50, ru*
|
||||
1.  [BongaCams (https://sbongacams.com)](https://sbongacams.com)*: top 50, cz, webcam*
|
||||
1.  [Instagram (https://www.instagram.com/)](https://www.instagram.com/)*: top 50, photo*, search is disabled
|
||||
1.  [Twitch (https://www.twitch.tv/)](https://www.twitch.tv/)*: top 50, streaming, us*
|
||||
1.  [Twitch (https://www.twitch.tv/)](https://www.twitch.tv/)*: top 50, cloudflare, streaming, us*
|
||||
1.  [YandexCollections API (https://yandex.ru/collections/)](https://yandex.ru/collections/)*: top 50, ru, sharing*, search is disabled
|
||||
1.  [StackOverflow (https://stackoverflow.com)](https://stackoverflow.com)*: top 50, coding*
|
||||
1.  [Ebay (https://www.ebay.com/)](https://www.ebay.com/)*: top 50, shopping, us*
|
||||
@@ -62,7 +62,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [community.adobe.com (https://community.adobe.com)](https://community.adobe.com)*: top 100, us*
|
||||
1.  [TradingView (https://www.tradingview.com/)](https://www.tradingview.com/)*: top 100, trading, us*
|
||||
1.  [Aparat (https://www.aparat.com)](https://www.aparat.com)*: top 100, ir, video*
|
||||
1.  [ChaturBate (https://chaturbate.com)](https://chaturbate.com)*: top 100, us*
|
||||
1.  [ChaturBate (https://chaturbate.com)](https://chaturbate.com)*: top 100, cloudflare, us*
|
||||
1.  [Medium (https://medium.com/)](https://medium.com/)*: top 100, blog, us*, search is disabled
|
||||
1.  [Livejasmin (https://www.livejasmin.com/)](https://www.livejasmin.com/)*: top 100, us, webcam*
|
||||
1.  [Pornhub (https://pornhub.com/)](https://pornhub.com/)*: top 100, porn*
|
||||
@@ -72,7 +72,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [BleachFandom (https://bleach.fandom.com/ru)](https://bleach.fandom.com/ru)*: top 100, ru, wiki*
|
||||
1.  [Fandom (https://www.fandom.com/)](https://www.fandom.com/)*: top 100, us*
|
||||
1.  [FandomCommunityCentral (https://community.fandom.com)](https://community.fandom.com)*: top 100, wiki*
|
||||
1.  [Etsy (https://www.etsy.com/)](https://www.etsy.com/)*: top 100, shopping, us*
|
||||
1.  [Etsy (https://www.etsy.com/)](https://www.etsy.com/)*: top 100, cloudflare, shopping, us*
|
||||
1.  [GitHub (https://www.github.com/)](https://www.github.com/)*: top 100, coding*
|
||||
1.  [Spotify (https://open.spotify.com/)](https://open.spotify.com/)*: top 100, music, us*, search is disabled
|
||||
1.  [TikTok (https://www.tiktok.com/)](https://www.tiktok.com/)*: top 100, video*
|
||||
@@ -80,14 +80,14 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Tumblr (https://www.tumblr.com)](https://www.tumblr.com)*: top 500, blog*
|
||||
1.  [Roblox (https://www.roblox.com/)](https://www.roblox.com/)*: top 500, gaming, us*
|
||||
1.  [SoundCloud (https://soundcloud.com/)](https://soundcloud.com/)*: top 500, music*
|
||||
1.  [Udemy (https://www.udemy.com)](https://www.udemy.com)*: top 500, in*
|
||||
1.  [Udemy (https://www.udemy.com)](https://www.udemy.com)*: top 500, cloudflare*
|
||||
1.  [discourse.mozilla.org (https://discourse.mozilla.org)](https://discourse.mozilla.org)*: top 500*
|
||||
1.  [linktr.ee (https://linktr.ee)](https://linktr.ee)*: top 500, links*
|
||||
1.  [xHamster (https://xhamster.com)](https://xhamster.com)*: top 500, porn, us*
|
||||
1.  [Zhihu (https://www.zhihu.com/)](https://www.zhihu.com/)*: top 500, cn*, search is disabled
|
||||
1.  [Blogger (by GAIA id) (https://www.blogger.com)](https://www.blogger.com)*: top 500, blog*
|
||||
1.  [ResearchGate (https://www.researchgate.net/)](https://www.researchgate.net/)*: top 500, in, us*
|
||||
1.  [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, photo, stock*
|
||||
1.  [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, cloudflare, photo, stock*
|
||||
1.  [Vimeo (https://vimeo.com)](https://vimeo.com)*: top 500, video*
|
||||
1.  [Pinterest (https://www.pinterest.com/)](https://www.pinterest.com/)*: top 500, art, photo, sharing*
|
||||
1.  [Fiverr (https://www.fiverr.com/)](https://www.fiverr.com/)*: top 500, shopping, us*
|
||||
@@ -101,9 +101,9 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Wix (https://wix.com/)](https://wix.com/)*: top 500, us*
|
||||
1.  [Slack (https://slack.com)](https://slack.com)*: top 500, messaging*
|
||||
1.  [Chess (https://www.chess.com)](https://www.chess.com)*: top 500, gaming, hobby*
|
||||
1.  [upwork.com (https://upwork.com)](https://upwork.com)*: top 500, us*
|
||||
1.  [upwork.com (https://upwork.com)](https://upwork.com)*: top 500, cloudflare, us*
|
||||
1.  [Archive.org (https://archive.org)](https://archive.org)*: top 500*, search is disabled
|
||||
1.  [Figma (https://www.figma.com/)](https://www.figma.com/)*: top 500, design*
|
||||
1.  [Figma (https://www.figma.com/)](https://www.figma.com/)*: top 500, cloudflare, design*
|
||||
1.  [iStock (https://www.istockphoto.com)](https://www.istockphoto.com)*: top 500, photo, stock*
|
||||
1.  [Scribd (https://www.scribd.com/)](https://www.scribd.com/)*: top 500, reading*
|
||||
1.  [opensea.io (https://opensea.io)](https://opensea.io)*: top 500, us*
|
||||
@@ -112,7 +112,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Yelp (http://www.yelp.com)](http://www.yelp.com)*: top 500, review*, search is disabled
|
||||
1.  [Yelp (by id) (https://www.yelp.com)](https://www.yelp.com)*: top 500, review*
|
||||
1.  [Blogger (https://www.blogger.com/)](https://www.blogger.com/)*: top 500, blog*
|
||||
1.  [Patreon (https://www.patreon.com/)](https://www.patreon.com/)*: top 500, finance*
|
||||
1.  [Patreon (https://www.patreon.com/)](https://www.patreon.com/)*: top 500, cloudflare, finance*
|
||||
1.  [GoodReads (https://www.goodreads.com/)](https://www.goodreads.com/)*: top 500, books, us*
|
||||
1.  [OP.GG [LeagueOfLegends] Brazil (https://www.op.gg/)](https://www.op.gg/)*: top 500, br, gaming*
|
||||
1.  [OP.GG [LeagueOfLegends] North America (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*
|
||||
@@ -145,7 +145,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Weebly (http://weebly.com)](http://weebly.com)*: top 500, business*
|
||||
1.  [RamblerDating (https://dating.rambler.ru/)](https://dating.rambler.ru/)*: top 500, dating, ru*, search is disabled
|
||||
1.  [LiveJournal (https://www.livejournal.com/)](https://www.livejournal.com/)*: top 500, blog, ru*
|
||||
1.  [SourceForge (https://sourceforge.net/)](https://sourceforge.net/)*: top 500, coding, us*
|
||||
1.  [SourceForge (https://sourceforge.net/)](https://sourceforge.net/)*: top 500, cloudflare, coding, us*
|
||||
1.  [Genius (https://genius.com/)](https://genius.com/)*: top 500, music, us*
|
||||
1.  [Issuu (https://issuu.com/)](https://issuu.com/)*: top 500, business*
|
||||
1.  [9GAG (https://www.9gag.com/)](https://www.9gag.com/)*: top 500, sharing*
|
||||
@@ -162,7 +162,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [cyber.harvard.edu (https://cyber.harvard.edu)](https://cyber.harvard.edu)*: top 1K, us*
|
||||
1.  [Duolingo (https://duolingo.com/)](https://duolingo.com/)*: top 1K, us*
|
||||
1.  [Rottentomatoes (https://www.rottentomatoes.com)](https://www.rottentomatoes.com)*: top 1K, movies, us*
|
||||
1.  [Kickstarter (https://www.kickstarter.com)](https://www.kickstarter.com)*: top 1K, finance, us*
|
||||
1.  [Kickstarter (https://www.kickstarter.com)](https://www.kickstarter.com)*: top 1K, cloudflare, finance, us*
|
||||
1.  [forums.ea.com (https://forums.ea.com)](https://forums.ea.com)*: top 1K, forum, gaming, us*, search is disabled
|
||||
1.  [Envato (https://forums.envato.com)](https://forums.envato.com)*: top 1K, au, forum, in*
|
||||
1.  [Ultimate-Guitar (https://ultimate-guitar.com/)](https://ultimate-guitar.com/)*: top 1K, us*
|
||||
@@ -181,17 +181,17 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Gamespot (https://www.gamespot.com/)](https://www.gamespot.com/)*: top 1K, gaming, us*
|
||||
1.  [note (https://note.com/)](https://note.com/)*: top 1K, jp*
|
||||
1.  [AfreecaTV (http://bjapi.afreecatv.com)](http://bjapi.afreecatv.com)*: top 1K, streaming*
|
||||
1.  [Redbubble (https://www.redbubble.com/)](https://www.redbubble.com/)*: top 1K, shopping, us*
|
||||
1.  [Redbubble (https://www.redbubble.com/)](https://www.redbubble.com/)*: top 1K, cloudflare, shopping, us*
|
||||
1.  [Tom's guide (http://forums.tomsguide.com)](http://forums.tomsguide.com)*: top 1K, forum, tech*
|
||||
1.  [Yumpu (https://www.yumpu.com)](https://www.yumpu.com)*: top 1K, stock*, search is disabled
|
||||
1.  [community.brave.com (https://community.brave.com)](https://community.brave.com)*: top 1K, forum, us*
|
||||
1.  [Tinder (https://tinder.com/)](https://tinder.com/)*: top 1K, dating, us*
|
||||
1.  [CloudflareCommunity (https://community.cloudflare.com/)](https://community.cloudflare.com/)*: top 1K, forum, tech*
|
||||
1.  [CloudflareCommunity (https://community.cloudflare.com/)](https://community.cloudflare.com/)*: top 1K, cloudflare, forum, tech*
|
||||
1.  [Eksisozluk (https://eksisozluk.com)](https://eksisozluk.com)*: top 1K, tr*
|
||||
1.  [AllRecipes (https://www.allrecipes.com/)](https://www.allrecipes.com/)*: top 1K, us*
|
||||
1.  [T-MobileSupport (https://support.t-mobile.com)](https://support.t-mobile.com)*: top 1K, us*, search is disabled
|
||||
1.  [Tinkoff Invest (https://www.tinkoff.ru/invest/)](https://www.tinkoff.ru/invest/)*: top 5K, ru*
|
||||
1.  [Discogs (https://www.discogs.com/)](https://www.discogs.com/)*: top 5K, music, us*
|
||||
1.  [Discogs (https://www.discogs.com/)](https://www.discogs.com/)*: top 5K, cloudflare, music, us*
|
||||
1.  [DiscussPython (https://discuss.python.org/)](https://discuss.python.org/)*: top 5K, coding, forum, us*
|
||||
1.  [Nairaland Forum (https://www.nairaland.com/)](https://www.nairaland.com/)*: top 5K, ng*
|
||||
1.  [Redtube (https://ru.redtube.com/)](https://ru.redtube.com/)*: top 5K, porn, us*
|
||||
@@ -3141,7 +3141,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [OP.GG [PUBG] (https://pubg.op.gg)](https://pubg.op.gg)*: top 100M, gaming*
|
||||
1.  [OP.GG [Valorant] (https://valorant.op.gg)](https://valorant.op.gg)*: top 100M, gaming*
|
||||
|
||||
The list was updated at (2024-12-13)
|
||||
The list was updated at (2026-03-22)
|
||||
## Statistics
|
||||
|
||||
Enabled/total sites: 2684/3137 = 85.56%
|
||||
@@ -3197,6 +3197,6 @@ Top 20 tags:
|
||||
- (15) `shopping`
|
||||
- (13) `sport`
|
||||
- (13) `business`
|
||||
- (13) `cloudflare` (non-standard)
|
||||
- (12) `movies`
|
||||
- (11) `hobby`
|
||||
- (11) `education`
|
||||
|
||||
@@ -45,6 +45,17 @@ DEFAULT_ARGS: Dict[str, Any] = {
|
||||
'web': None,
|
||||
'with_domains': False,
|
||||
'xmind': False,
|
||||
# Mirrors maigret/resources/settings.json (flag --cloudflare-bypass overrides with True)
|
||||
'cloudflare_bypass': {
|
||||
"enabled": True,
|
||||
"modules": [
|
||||
{
|
||||
"name": "chrome_webgate",
|
||||
"method": "url_rewrite",
|
||||
"url": "http://localhost:8000/html?url={url}&retries=1"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -60,6 +71,15 @@ def test_args_search_mode(argparser):
|
||||
assert getattr(args, arg) == want_args[arg]
|
||||
|
||||
|
||||
def test_args_cloudflare_bypass_flag(argparser):
|
||||
args = argparser.parse_args('--cloudflare-bypass username'.split())
|
||||
|
||||
want_args = dict(DEFAULT_ARGS)
|
||||
want_args.update({'username': ['username'], 'cloudflare_bypass': True})
|
||||
|
||||
assert args == Namespace(**want_args)
|
||||
|
||||
|
||||
def test_args_search_mode_several_usernames(argparser):
|
||||
args = argparser.parse_args('username1 username2'.split())
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ async def test_asyncio_progressbar_executor():
|
||||
# no guarantees for the results order
|
||||
assert sorted(await executor.run(tasks)) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
||||
assert executor.execution_time > 0.2
|
||||
assert executor.execution_time < 0.3
|
||||
assert executor.execution_time < 0.6
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
Reference in New Issue
Block a user