diff --git a/maigret/resources/data.json b/maigret/resources/data.json
index 30e3f10..30cd298 100644
--- a/maigret/resources/data.json
+++ b/maigret/resources/data.json
@@ -73,10 +73,8 @@
},
"123rf": {
"tags": [
- "images",
- "in",
- "ru",
- "us"
+ "photo",
+ "ru"
],
"checkType": "response_url",
"alexaRank": 1068,
@@ -416,8 +414,7 @@
},
"About.me": {
"tags": [
- "in",
- "social"
+ "blog"
],
"checkType": "status_code",
"alexaRank": 11450,
@@ -2164,8 +2161,7 @@
},
"BuzzFeed": {
"tags": [
- "social",
- "us"
+ "news"
],
"checkType": "status_code",
"alexaRank": 509,
@@ -3274,7 +3270,8 @@
},
"Diary.ru": {
"tags": [
- "ru"
+ "ru",
+ "blog"
],
"checkType": "message",
"absenceStrs": "
— @\u0434\u043d\u0435\u0432\u043d\u0438\u043a\u0438: \u0430\u0441\u043e\u0446\u0438\u0430\u043b\u044c\u043d\u0430\u044f \u0441\u0435\u0442\u044c",
@@ -3346,7 +3343,7 @@
},
"Discogs": {
"tags": [
- "us"
+ "music"
],
"checkType": "status_code",
"alexaRank": 899,
@@ -3398,9 +3395,7 @@
},
"Disqus": {
"tags": [
- "discussion",
- "global",
- "us"
+ "discussion"
],
"checkType": "status_code",
"alexaRank": 836,
@@ -3804,9 +3799,7 @@
},
"Empflix": {
"tags": [
- "de",
- "porno",
- "us"
+ "porn"
],
"checkType": "response_url",
"alexaRank": 11804,
@@ -4306,7 +4299,7 @@
"FilmWeb": {
"disabled": true,
"tags": [
- "films",
+ "movies",
"pl"
],
"checkType": "message",
@@ -4320,7 +4313,7 @@
"Filmogs": {
"disabled": true,
"tags": [
- "films"
+ "movies"
],
"checkType": "status_code",
"url": "https://www.filmo.gs/users/{username}",
@@ -4444,9 +4437,7 @@
},
"Flickr": {
"tags": [
- "images",
- "in",
- "us"
+ "photo"
],
"checkType": "status_code",
"alexaRank": 936,
@@ -4587,7 +4578,7 @@
},
"FortniteTracker": {
"tags": [
- "us"
+ "gaming"
],
"checkType": "status_code",
"alexaRank": 8125,
@@ -4809,10 +4800,7 @@
},
"Foursquare": {
"tags": [
- "global",
- "in",
- "social",
- "us"
+ "geosocial"
],
"checkType": "status_code",
"alexaRank": 3540,
@@ -5153,9 +5141,6 @@
"usernameUnclaimed": "noonewouldeverusethis77777"
},
"Twitter Shadowban": {
- "tags": [
- "jp"
- ],
"urlProbe": "https://shadowban.eu/.api/{username}",
"checkType": "message",
"presenseStrs": [
@@ -5420,8 +5405,7 @@
},
"Giphy": {
"tags": [
- "image",
- "us",
+ "photo",
"video"
],
"checkType": "status_code",
@@ -5444,8 +5428,7 @@
},
"GitHub": {
"tags": [
- "coding",
- "us"
+ "coding"
],
"regexCheck": "^[a-zA-Z0-9](?:[a-zA-Z0-9]|-(?=[a-zA-Z0-9])){0,38}$",
"urlProbe": "https://api.github.com/users/{username}",
@@ -5458,8 +5441,7 @@
},
"GitLab": {
"tags": [
- "coding",
- "in"
+ "coding"
],
"urlProbe": "https://gitlab.com/api/v4/users?username={username}",
"checkType": "message",
@@ -5484,8 +5466,7 @@
"Gitmemory": {
"tags": [
"coding",
- "global",
- "in"
+ "github"
],
"checkType": "message",
"absenceStrs": "Oops,404",
@@ -5568,8 +5549,7 @@
},
"Gog": {
"tags": [
- "global",
- "us"
+ "gaming"
],
"checkType": "status_code",
"alexaRank": 1980,
@@ -5794,8 +5774,8 @@
},
"Gramho": {
"tags": [
- "global",
- "jp"
+ "instagram",
+ "photo"
],
"checkType": "status_code",
"alexaRank": 3253,
@@ -5806,9 +5786,7 @@
},
"Gravatar": {
"tags": [
- "global",
- "images",
- "in"
+ "photo"
],
"urlProbe": "http://en.gravatar.com/{username}.json",
"checkType": "message",
@@ -6007,8 +5985,7 @@
},
"HackerOne": {
"tags": [
- "hacker",
- "in"
+ "hacking"
],
"checkType": "message",
"absenceStrs": "Page not found",
@@ -6359,10 +6336,6 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"IFTTT": {
- "tags": [
- "misc",
- "us"
- ],
"regexCheck": "^[A-Za-z0-9]{3,35}$",
"checkType": "message",
"absenceStrs": "The requested page or file does not exist",
@@ -6456,8 +6429,7 @@
},
"ImageShack": {
"tags": [
- "images",
- "in"
+ "photo"
],
"checkType": "response_url",
"alexaRank": 10418,
@@ -6599,9 +6571,7 @@
},
"Instagram": {
"tags": [
- "global",
- "photos",
- "us"
+ "photo"
],
"errors": {
"Login \u2022 Instagram": "Login required"
@@ -7002,7 +6972,7 @@
"Kinogo": {
"tags": [
"by",
- "films"
+ "movies"
],
"checkType": "status_code",
"alexaRank": 20379,
@@ -7203,7 +7173,7 @@
},
"Launchpad": {
"tags": [
- "us"
+ "tech"
],
"checkType": "status_code",
"alexaRank": 14448,
@@ -7304,8 +7274,7 @@
"Libraries": {
"tags": [
"coding",
- "global",
- "in"
+ "github"
],
"regexCheck": "^[^\\.]+$",
"checkType": "status_code",
@@ -7522,6 +7491,7 @@
"LiveLib": {
"tags": [
"reading",
+ "books",
"ru"
],
"checkType": "status_code",
@@ -7671,11 +7641,8 @@
"LostFilmHD": {
"disabled": true,
"tags": [
- "es",
- "films",
- "pl",
- "ru",
- "ua"
+ "movies",
+ "ru"
],
"engine": "uCoz",
"alexaRank": 11625,
@@ -7719,8 +7686,6 @@
"Loveplanet": {
"tags": [
"dating",
- "gb",
- "it",
"ru"
],
"checkType": "message",
@@ -8086,8 +8051,7 @@
},
"Medium": {
"tags": [
- "news",
- "us"
+ "blog"
],
"checkType": "message",
"presenseStrs": [
@@ -8277,8 +8241,7 @@
},
"MixCloud": {
"tags": [
- "music",
- "us"
+ "music"
],
"urlProbe": "https://api.mixcloud.com/{username}/",
"checkType": "status_code",
@@ -8634,8 +8597,7 @@
},
"My.Mail.ru@OK": {
"tags": [
- "ru",
- "social"
+ "ru"
],
"type": "ok_id",
"checkType": "message",
@@ -8651,8 +8613,7 @@
},
"My.Mail.ru@VK": {
"tags": [
- "ru",
- "social"
+ "ru"
],
"type": "vk_id",
"checkType": "message",
@@ -8668,8 +8629,7 @@
},
"My.Mail.ru@bk.ru": {
"tags": [
- "ru",
- "social"
+ "ru"
],
"checkType": "message",
"absenceStrs": [
@@ -8684,8 +8644,7 @@
},
"My.Mail.ru@gmail.com": {
"tags": [
- "ru",
- "social"
+ "ru"
],
"checkType": "message",
"absenceStrs": [
@@ -8700,8 +8659,7 @@
},
"My.Mail.ru@list.ru": {
"tags": [
- "ru",
- "social"
+ "ru"
],
"checkType": "message",
"absenceStrs": [
@@ -8716,8 +8674,7 @@
},
"My.Mail.ru@mail.ru": {
"tags": [
- "ru",
- "social"
+ "ru"
],
"checkType": "message",
"absenceStrs": [
@@ -8732,8 +8689,7 @@
},
"My.Mail.ru@ya.ru": {
"tags": [
- "ru",
- "social"
+ "ru"
],
"checkType": "message",
"absenceStrs": [
@@ -8748,8 +8704,7 @@
},
"My.Mail.ru@yandex.ru": {
"tags": [
- "ru",
- "social"
+ "ru"
],
"checkType": "message",
"absenceStrs": [
@@ -8873,9 +8828,7 @@
},
"Myspace": {
"tags": [
- "in",
- "social",
- "us"
+ "blog"
],
"checkType": "status_code",
"alexaRank": 1824,
@@ -9219,11 +9172,7 @@
},
"Noblogs": {
"tags": [
- "global",
- "in",
- "it",
- "pk",
- "us"
+ "blog"
],
"checkType": "status_code",
"presenseStrs": [
@@ -9414,8 +9363,7 @@
},
"OpenStreetMap": {
"tags": [
- "in",
- "social"
+ "maps"
],
"regexCheck": "^[^\\.]+$",
"checkType": "status_code",
@@ -9673,8 +9621,7 @@
},
"Pastebin": {
"tags": [
- "sharing",
- "us"
+ "sharing"
],
"checkType": "response_url",
"alexaRank": 2132,
@@ -9849,8 +9796,8 @@
},
"Periscope": {
"tags": [
- "us",
- "video"
+ "video",
+ "streaming"
],
"checkType": "status_code",
"alexaRank": 52346,
@@ -9885,8 +9832,7 @@
"Photobucket": {
"disabled": true,
"tags": [
- "images",
- "us"
+ "photo"
],
"regexCheck": "\\w{4,32}",
"checkType": "message",
@@ -9936,10 +9882,8 @@
},
"Picuki": {
"tags": [
- "global",
"instagram",
- "photo",
- "us"
+ "photo"
],
"checkType": "message",
"absenceStrs": [
@@ -10270,9 +10214,7 @@
},
"Pornhub": {
"tags": [
- "global",
- "porno",
- "us"
+ "porn"
],
"checkType": "status_code",
"alexaRank": 62,
@@ -10964,9 +10906,8 @@
},
"Reddit": {
"tags": [
- "discussions",
- "news",
- "us"
+ "discussion",
+ "news"
],
"checkType": "status_code",
"presenseStrs": [
@@ -10991,9 +10932,7 @@
},
"Redtube": {
"tags": [
- "global",
- "porno",
- "us"
+ "porn"
],
"checkType": "status_code",
"alexaRank": 752,
@@ -11175,7 +11114,7 @@
},
"Roblox": {
"tags": [
- "us"
+ "gaming"
],
"checkType": "message",
"absenceStrs": "Page cannot be found or no longer exists",
@@ -11258,9 +11197,7 @@
},
"Rottentomatoes": {
"tags": [
- "films",
- "global",
- "us"
+ "movies"
],
"checkType": "status_code",
"alexaRank": 592,
@@ -11959,8 +11896,8 @@
},
"SlideShare": {
"tags": [
- "in",
- "presos"
+ "sharing",
+ "documents"
],
"checkType": "status_code",
"alexaRank": 172,
@@ -12131,8 +12068,7 @@
},
"SoundCloud": {
"tags": [
- "music",
- "us"
+ "music"
],
"checkType": "status_code",
"alexaRank": 116,
@@ -12193,7 +12129,8 @@
},
"Spaces": {
"tags": [
- "ru"
+ "ru",
+ "blog"
],
"checkType": "status_code",
"alexaRank": 48402,
@@ -12289,8 +12226,7 @@
},
"Spotify": {
"tags": [
- "music",
- "us"
+ "music"
],
"errors": {
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -12375,8 +12311,7 @@
},
"Steam": {
"tags": [
- "gaming",
- "us"
+ "gaming"
],
"checkType": "message",
"absenceStrs": "The specified profile could not be found",
@@ -12769,8 +12704,7 @@
},
"Taringa": {
"tags": [
- "ar",
- "social"
+ "ar"
],
"checkType": "message",
"absenceStrs": "Moved Permanently",
@@ -12782,7 +12716,6 @@
},
"TechPowerUp": {
"tags": [
- "global",
"us"
],
"checkType": "message",
@@ -12817,11 +12750,6 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Telegram": {
- "tags": [
- "global",
- "in",
- "us"
- ],
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_]{4,}$",
"checkType": "message",
"absenceStrs": [
@@ -12918,8 +12846,8 @@
},
"TheGuardian": {
"tags": [
- "global",
- "us"
+ "us",
+ "news"
],
"checkType": "message",
"absenceStrs": "public profile | Identity | The Guardian",
@@ -12943,9 +12871,7 @@
},
"TheSimsResource": {
"tags": [
- "de",
"gaming",
- "global",
"us"
],
"checkType": "status_code",
@@ -13198,7 +13124,8 @@
},
"Toster": {
"tags": [
- "ru"
+ "ru",
+ "coding"
],
"checkType": "status_code",
"alexaRank": 1405,
@@ -13506,9 +13433,7 @@
},
"Tumblr": {
"tags": [
- "blogs",
- "global",
- "us"
+ "blog"
],
"regexCheck": "^[^\\.]+$",
"checkType": "status_code",
@@ -13531,8 +13456,7 @@
},
"Twitch": {
"tags": [
- "streaming",
- "us"
+ "streaming"
],
"urlProbe": "https://m.twitch.tv/{username}",
"checkType": "status_code",
@@ -13543,15 +13467,11 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Twitter": {
- "tags": [
- "global",
- "us"
- ],
"headers": {
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
- "x-guest-token": "1361823183884742664"
+ "x-guest-token": "1362149064209559554"
},
"errors": {
"Bad guest token": "x-guest-token update required"
@@ -13678,7 +13598,8 @@
},
"Untappd": {
"tags": [
- "us"
+ "networking",
+ "geosocial"
],
"checkType": "status_code",
"alexaRank": 25581,
@@ -13916,7 +13837,7 @@
"video"
],
"headers": {
- "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTM1MTk4ODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.4vI4t-JUbkcEDSoiydNz5dagG9xSKc-Clh2FOaoaXUg"
+ "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTM1OTc1MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.dBxgHYOlLckB2zBh3mgINMKRXCIkWnAUQKUhn27_Zj0"
},
"activation": {
"url": "https://vimeo.com/_rv/viewer",
@@ -14201,8 +14122,8 @@
},
"Wattpad": {
"tags": [
- "in",
- "reading"
+ "reading",
+ "writing"
],
"checkType": "message",
"absenceStrs": "userError-404",
@@ -14248,9 +14169,7 @@
},
"We Heart It": {
"tags": [
- "blogs",
- "global",
- "in",
+ "blog",
"photo"
],
"checkType": "message",
@@ -14551,8 +14470,7 @@
},
"WordPress": {
"tags": [
- "blog",
- "us"
+ "blog"
],
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"checkType": "response_url",
@@ -14716,8 +14634,7 @@
},
"Xvideos": {
"tags": [
- "porno",
- "us"
+ "porn"
],
"checkType": "status_code",
"alexaRank": 114,
@@ -14784,7 +14701,8 @@
},
"YandexBugbounty": {
"tags": [
- "ru"
+ "ru",
+ "hacking"
],
"checkType": "status_code",
"alexaRank": 46,
@@ -14842,7 +14760,8 @@
},
"YandexMusic": {
"tags": [
- "ru"
+ "ru",
+ "music"
],
"headers": {
"Referer": "https://music.yandex.ru/users/test/playlists"
@@ -14940,8 +14859,7 @@
},
"YouPorn": {
"tags": [
- "porno",
- "us"
+ "porn"
],
"checkType": "message",
"presenseStrs": [
@@ -15052,8 +14970,7 @@
},
"Zomato": {
"tags": [
- "food",
- "in"
+ "geosocial"
],
"headers": {
"Accept-Language": "en-US,en;q=0.9"
@@ -15246,8 +15163,8 @@
},
"authorSTREAM": {
"tags": [
- "in",
- "presos"
+ "documents",
+ "sharing"
],
"checkType": "status_code",
"alexaRank": 6489,
@@ -16148,7 +16065,7 @@
},
"forums.drom.ru": {
"tags": [
- "auto",
+ "forum",
"ru"
],
"engine": "vBulletin",
@@ -16417,7 +16334,9 @@
},
"Habr": {
"tags": [
- "ru"
+ "ru",
+ "blog",
+ "discussion"
],
"checkType": "status_code",
"alexaRank": 1405,
@@ -16432,11 +16351,9 @@
"usernameClaimed": "admin",
"usernameUnclaimed": "noonewouldeverusethis7"
},
- "hackster": {
+ "Hackster": {
"tags": [
- "de",
- "in",
- "us"
+ "tech"
],
"checkType": "status_code",
"alexaRank": 19719,
@@ -16739,7 +16656,7 @@
},
"labpentestit": {
"tags": [
- "cybersec",
+ "hacking",
"ru"
],
"checkType": "response_url",
@@ -16780,8 +16697,7 @@
},
"last.fm": {
"tags": [
- "music",
- "us"
+ "music"
],
"checkType": "status_code",
"alexaRank": 2058,
@@ -17436,9 +17352,11 @@
"usernameClaimed": "apple",
"usernameUnclaimed": "noonewouldeverusethis7"
},
- "pikabu": {
+ "Pikabu": {
"tags": [
- "ru"
+ "ru",
+ "blog",
+ "discussion"
],
"checkType": "status_code",
"alexaRank": 1349,
@@ -18436,9 +18354,7 @@
},
"xHamster": {
"tags": [
- "de",
- "porno",
- "us"
+ "porn"
],
"checkType": "status_code",
"alexaRank": 141,
@@ -22883,7 +22799,8 @@
},
"GitHubGist": {
"tags": [
- "us"
+ "sharing",
+ "coding"
],
"engine": "engineRedirect",
"alexaRank": 86,
diff --git a/maigret/sites.py b/maigret/sites.py
index fc8cbd4..9035a86 100644
--- a/maigret/sites.py
+++ b/maigret/sites.py
@@ -7,7 +7,16 @@ import sys
import requests
-from .utils import CaseConverter, URLMatcher
+from .utils import CaseConverter, URLMatcher, is_country_tag
+
+# TODO: move to data.json
+SUPPORTED_TAGS = [
+ 'gaming', 'coding', 'photo', 'music', 'blog', 'finance', 'freelance', 'dating',
+ 'tech', 'forum', 'porn', 'erotic', 'webcam', 'video', 'movies', 'hacking', 'art',
+ 'discussion', 'sharing', 'writing', 'wiki', 'business', 'shopping', 'sport',
+ 'books', 'news', 'documents', 'travel', 'maps', 'hobby', 'apps', 'classified',
+ 'career', 'geosocial', 'streaming', 'education', 'networking', 'torrent',
+]
class MaigretEngine:
@@ -329,6 +338,7 @@ class MaigretDatabase:
disabled_count = 0
total_count = len(sites_dict)
urls = {}
+ tags = {}
for _, site in sites_dict.items():
if site.disabled:
@@ -345,11 +355,26 @@ class MaigretDatabase:
urls[url] = urls.get(url, 0) + 1
+ if not site.tags:
+ tags['NO_TAGS'] = tags.get('NO_TAGS', 0) + 1
+
+ for tag in site.tags:
+ if is_country_tag(tag):
+ # currenty do not display country tags
+ continue
+ tags[tag] = tags.get(tag, 0) + 1
+
output += f'Enabled/total sites: {total_count-disabled_count}/{total_count}\n'
output += 'Top sites\' profile URLs:\n'
for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:20]:
if count == 1:
break
output += f'{count}\t{url}\n'
+ output += 'Top sites\' tags:\n'
+ for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True):
+ mark = ''
+ if not tag in SUPPORTED_TAGS:
+ mark = ' (non-standard)'
+ output += f'{count}\t{tag}{mark}\n'
return output
\ No newline at end of file