Added Crypto/Web3 site checks (#2457)

This commit is contained in:
Soxoj
2026-04-04 16:49:12 +02:00
committed by GitHub
parent 75289c78bf
commit e8f4318e5d
4 changed files with 97 additions and 50 deletions
+1 -1
View File
@@ -209,7 +209,7 @@ class AsyncioQueueGeneratorExecutor:
result = kwargs.get('default')
await self._results.put(result)
except Exception as e:
self.logger.error(f"Error in worker: {e}")
self.logger.error(f"Error in worker: {e}", exc_info=True)
finally:
self.queue.task_done()
+73 -33
View File
@@ -1880,6 +1880,9 @@
"alexaRank": 407,
"tags": [
"social"
],
"protection": [
"tls_fingerprint"
]
},
"Slack": {
@@ -3942,26 +3945,19 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"OpenSea": {
"disabled": true,
"tags": [
"crypto",
"nft"
],
"checkType": "message",
"presenseStrs": [
"username\\",
"lastSale",
"publicUsername",
"name"
],
"absenceStrs": [
"This page is lost.</h1>"
"checkType": "status_code",
"protection": [
"tls_fingerprint"
],
"alexaRank": 1842,
"urlMain": "https://opensea.io",
"url": "https://opensea.io/accounts/{username}",
"usernameClaimed": "admin",
"usernameUnclaimed": "noonewouldeverusethis7",
"alexaRank": 1842
"url": "https://opensea.io/{username}",
"usernameClaimed": "opensea",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Speakerdeck": {
"tags": [
@@ -16401,25 +16397,6 @@
"usernameClaimed": "green",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"fragment.com": {
"absenceStrs": [
"data-username=",
"data-item-title="
],
"presenseStrs": [
"tm-datetime",
"tm-wallet"
],
"url": "https://fragment.com/username/{username}",
"urlMain": "https://fragment.com",
"usernameClaimed": "yazheg",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"tags": [
"crypto"
],
"alexaRank": 127213
},
"artinvestment": {
"tags": [
"forum",
@@ -35085,6 +35062,69 @@
"urlMain": "https://ctftime.org/",
"usernameClaimed": "1",
"usernameUnclaimed": "999999999"
},
"Warpcast": {
"url": "https://warpcast.com/{username}",
"urlProbe": "https://client.warpcast.com/v2/user-by-username?username={username}",
"urlMain": "https://warpcast.com",
"checkType": "status_code",
"usernameClaimed": "dwr.eth",
"usernameUnclaimed": "noonewouldever",
"tags": [
"crypto",
"social"
]
},
"Fragment": {
"url": "https://fragment.com/username/{username}",
"urlMain": "https://fragment.com",
"checkType": "message",
"presenseStrs": [
"tm-wallet"
],
"absenceStrs": [
"Available"
],
"usernameClaimed": "durov",
"usernameUnclaimed": "noonewouldeverusethis77777",
"tags": [
"crypto",
"messaging"
],
"alexaRank": 127213
},
"Paragraph": {
"url": "https://paragraph.com/@{username}",
"urlProbe": "https://paragraph.com/api/blogs/@{username}",
"urlMain": "https://paragraph.com",
"checkType": "status_code",
"usernameClaimed": "vitalik",
"usernameUnclaimed": "noonewouldever",
"tags": [
"blog",
"crypto"
]
},
"Tonometerbot": {
"url": "https://tonometerbot.com/@/{username}",
"urlMain": "https://tonometerbot.com",
"checkType": "status_code",
"usernameClaimed": "jaga1985",
"usernameUnclaimed": "noonewouldever",
"tags": [
"crypto"
]
},
"Spatial": {
"url": "https://www.spatial.io/@{username}",
"urlMain": "https://www.spatial.io",
"checkType": "status_code",
"usernameClaimed": "rammy",
"usernameUnclaimed": "noonewouldever",
"tags": [
"crypto",
"gaming"
]
}
},
"engines": {
+3
View File
@@ -71,7 +71,10 @@ class URLMatcher:
def ascii_data_display(data: str) -> Any:
try:
return ast.literal_eval(data)
except (ValueError, SyntaxError):
return data
def get_dict_ascii_tree(items, prepend="", new_line=True):
+19 -15
View File
@@ -1,5 +1,5 @@
## List of supported sites (search methods): total 3153
## List of supported sites (search methods): total 3157
Rank data fetched from Majestic Million by domains.
@@ -199,7 +199,7 @@ Rank data fetched from Majestic Million by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.dreamstime.com) [Dreamstime (https://www.dreamstime.com)](https://www.dreamstime.com)*: top 5K, art, photo, stock*
1. ![](https://www.google.com/s2/favicons?domain=https://www.instapaper.com/) [Instapaper (https://www.instapaper.com/)](https://www.instapaper.com/)*: top 5K, reading*
1. ![](https://www.google.com/s2/favicons?domain=https://www.wattpad.com/) [Wattpad (https://www.wattpad.com/)](https://www.wattpad.com/)*: top 5K, reading, writing*
1. ![](https://www.google.com/s2/favicons?domain=https://opensea.io) [OpenSea (https://opensea.io)](https://opensea.io)*: top 5K, crypto, nft*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://opensea.io) [OpenSea (https://opensea.io)](https://opensea.io)*: top 5K, crypto, nft*
1. ![](https://www.google.com/s2/favicons?domain=https://speakerdeck.com) [Speakerdeck (https://speakerdeck.com)](https://speakerdeck.com)*: top 5K, sharing*
1. ![](https://www.google.com/s2/favicons?domain=http://wikimapia.org) [WikimapiaProfile (http://wikimapia.org)](http://wikimapia.org)*: top 5K, maps, ru*
1. ![](https://www.google.com/s2/favicons?domain=http://wikimapia.org) [WikimapiaSearch (http://wikimapia.org)](http://wikimapia.org)*: top 5K, maps, ru*
@@ -748,7 +748,7 @@ Rank data fetched from Majestic Million by domains.
1. ![](https://www.google.com/s2/favicons?domain=http://www.droidforums.net/) [Droidforums (http://www.droidforums.net/)](http://www.droidforums.net/)*: top 10M, forum*
1. ![](https://www.google.com/s2/favicons?domain=https://www.vsemayki.ru/) [Vsemayki (https://www.vsemayki.ru/)](https://www.vsemayki.ru/)*: top 10M, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.clozemaster.com) [Clozemaster (https://www.clozemaster.com)](https://www.clozemaster.com)*: top 10M, education*
1. ![](https://www.google.com/s2/favicons?domain=https://fragment.com) [fragment.com (https://fragment.com)](https://fragment.com)*: top 10M, crypto*
1. ![](https://www.google.com/s2/favicons?domain=https://fragment.com) [Fragment (https://fragment.com)](https://fragment.com)*: top 10M, crypto, messaging*
1. ![](https://www.google.com/s2/favicons?domain=https://nothing.community/) [Nothing Community (https://nothing.community/)](https://nothing.community/)*: top 10M, forum*
1. ![](https://www.google.com/s2/favicons?domain=https://www.old-games.ru) [Old-games (https://www.old-games.ru)](https://www.old-games.ru)*: top 10M, pt, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://getmyuni.com/) [GetMyUni (https://getmyuni.com/)](https://getmyuni.com/)*: top 10M, in*, search is disabled
@@ -3156,25 +3156,29 @@ Rank data fetched from Majestic Million by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.curseforge.com.) [Curse Forge (https://www.curseforge.com.)](https://www.curseforge.com.)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://pentesterlab.com/) [PentesterLab (https://pentesterlab.com/)](https://pentesterlab.com/)*: top 100M, hacking*
1. ![](https://www.google.com/s2/favicons?domain=https://ctftime.org/) [CTFtime (https://ctftime.org/)](https://ctftime.org/)*: top 100M, hacking*
1. ![](https://www.google.com/s2/favicons?domain=https://warpcast.com) [Warpcast (https://warpcast.com)](https://warpcast.com)*: top 100M, crypto, social*
1. ![](https://www.google.com/s2/favicons?domain=https://paragraph.com) [Paragraph (https://paragraph.com)](https://paragraph.com)*: top 100M, blog, crypto*
1. ![](https://www.google.com/s2/favicons?domain=https://tonometerbot.com) [Tonometerbot (https://tonometerbot.com)](https://tonometerbot.com)*: top 100M, crypto*
1. ![](https://www.google.com/s2/favicons?domain=https://www.spatial.io) [Spatial (https://www.spatial.io)](https://www.spatial.io)*: top 100M, crypto, gaming*
The list was updated at (2026-04-02)
The list was updated at (2026-04-03)
## Statistics
Enabled/total sites: 2545/3153 = 80.72%
Enabled/total sites: 2550/3157 = 80.77%
Incomplete message checks: 341/2545 = 13.4% (false positive risks)
Incomplete message checks: 341/2550 = 13.37% (false positive risks)
Status code checks: 634/2545 = 24.91% (false positive risks)
Status code checks: 639/2550 = 25.06% (false positive risks)
False positive risk (total): 38.31%
False positive risk (total): 38.43%
Sites with probing: 500px, Armchairgm, BinarySearch (disabled), BleachFandom, Bluesky, BongaCams, Boosty, BuyMeACoffee, Calendly, Cent, Chess, Code Sandbox, Code Snippet Wiki, DailyMotion, Discord, Diskusjon.no, Disqus, Docker Hub, Duolingo, FandomCommunityCentral, GitHub, GitLab, Google Plus (archived), Gravatar, Hashnode, Holopin, Imgur, Issuu, Keybase, Kick, Kvinneguiden, LeetCode, Lesswrong, Livejasmin, LocalCryptos (disabled), MicrosoftLearn, MixCloud, Monkeytype, Niftygateway, Omg.lol, Picsart, Plurk, Polarsteps, Rarible, Reddit, Reddit Search (Pushshift) (disabled), RoyalCams, Scratch, Soop, SportsTracker, Spotify (disabled), StackOverflow, Substack, TAP'D, Topcoder, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Warframe Market, Weibo, Yapisal (disabled), YouNow, en.brickimedia.org, nightbot, notabug.org, qiwi.me (disabled)
Sites with probing: 500px, Armchairgm, BinarySearch (disabled), BleachFandom, Bluesky, BongaCams, Boosty, BuyMeACoffee, Calendly, Cent, Chess, Code Sandbox, Code Snippet Wiki, DailyMotion, Discord, Diskusjon.no, Disqus, Docker Hub, Duolingo, FandomCommunityCentral, GitHub, GitLab, Google Plus (archived), Gravatar, Hashnode, Holopin, Imgur, Issuu, Keybase, Kick, Kvinneguiden, LeetCode, Lesswrong, Livejasmin, LocalCryptos (disabled), MicrosoftLearn, MixCloud, Monkeytype, Niftygateway, Omg.lol, Paragraph, Picsart, Plurk, Polarsteps, Rarible, Reddit, Reddit Search (Pushshift) (disabled), RoyalCams, Scratch, Soop, SportsTracker, Spotify (disabled), StackOverflow, Substack, TAP'D, Topcoder, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Warframe Market, Warpcast, Weibo, Yapisal (disabled), YouNow, en.brickimedia.org, nightbot, notabug.org, qiwi.me (disabled)
Sites with activation: Spotify (disabled), Twitter, Vimeo, Weibo
Top 20 profile URLs:
- (710) `{urlMain}/index/8-0-{username} (uCoz)`
- (317) `/{username}`
- (319) `/{username}`
- (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)`
- (173) `/user/{username}`
- (138) `/profile/{username}`
@@ -3183,7 +3187,7 @@ Top 20 profile URLs:
- (118) `/u/{username}`
- (92) `/users/{username}`
- (87) `{urlMain}/u/{username}/summary (Discourse)`
- (66) `/@{username}`
- (68) `/@{username}`
- (55) `/wiki/User:{username}`
- (45) `SUBDOMAIN`
- (41) `/members/?username={username}`
@@ -3198,21 +3202,21 @@ Top 20 profile URLs:
Top 20 tags:
- (1066) `NO_TAGS` (non-standard)
- (754) `forum`
- (124) `gaming`
- (125) `gaming`
- (77) `coding`
- (59) `photo`
- (46) `tech`
- (44) `social`
- (45) `social`
- (41) `news`
- (38) `blog`
- (39) `blog`
- (34) `music`
- (32) `shopping`
- (25) `sharing`
- (25) `crypto`
- (23) `video`
- (23) `education`
- (22) `finance`
- (21) `art`
- (21) `freelance`
- (21) `crypto`
- (18) `hobby`
- (17) `sport`