mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Tags updated, added tests for tags
Added several sites Updated socid_extractor version to avoid bug #150
This commit is contained in:
+79
-10
@@ -13042,7 +13042,7 @@
|
|||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"authorization": "Bearer BQCOP-4T9UEvQLhkgFJ7d8Fyt65Nofw1U0l-pJaUHf0AbhOJv3pPtL6tOZtvtANYeoc4aXeMBhxTEoGk7eo"
|
"authorization": "Bearer BQA1fXOw4_oZPkyh94NHZm4Fwtwb5CJgmaep1bpLJ-Jvrrgi1-FSEsYwih7SyDAItMVDiSZVvn5pq7XTHuc"
|
||||||
},
|
},
|
||||||
"errors": {
|
"errors": {
|
||||||
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
||||||
@@ -14470,7 +14470,7 @@
|
|||||||
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
||||||
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
||||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
||||||
"x-guest-token": "1392232718130466822"
|
"x-guest-token": "1393520076536549382"
|
||||||
},
|
},
|
||||||
"errors": {
|
"errors": {
|
||||||
"Bad guest token": "x-guest-token update required"
|
"Bad guest token": "x-guest-token update required"
|
||||||
@@ -14877,7 +14877,7 @@
|
|||||||
"video"
|
"video"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjA3NzAwNDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.ehmje1TJRvUpW-_wseK5uXNhHykq2jHHh1LBCGFGLyQ"
|
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjEwNzcwMDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.pM-K99b6xokwvi8NBFN3ZdG9jjS7vf54DVxv8yKDJEY"
|
||||||
},
|
},
|
||||||
"activation": {
|
"activation": {
|
||||||
"url": "https://vimeo.com/_rv/viewer",
|
"url": "https://vimeo.com/_rv/viewer",
|
||||||
@@ -25270,7 +25270,7 @@
|
|||||||
"usernameClaimed": "blue",
|
"usernameClaimed": "blue",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
"tags": [
|
"tags": [
|
||||||
"ccbghdifbdchjinnlfejcitcuelgfncjnieuvccbcdut"
|
"networking"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"wblitz.net": {
|
"wblitz.net": {
|
||||||
@@ -25470,7 +25470,7 @@
|
|||||||
"usernameClaimed": "ekostyle",
|
"usernameClaimed": "ekostyle",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
"tags": [
|
"tags": [
|
||||||
"ccbghdifbdchubbbbietggvjbujdlujekugftcgfdufj"
|
"blog"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"www.kinokopilka.pro": {
|
"www.kinokopilka.pro": {
|
||||||
@@ -27367,7 +27367,7 @@
|
|||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"alexaRank": 7237,
|
"alexaRank": 7237,
|
||||||
"tags": [
|
"tags": [
|
||||||
"bussiness"
|
"business"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"nelubit.ru": {
|
"nelubit.ru": {
|
||||||
@@ -27554,7 +27554,7 @@
|
|||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"alexaRank": 461,
|
"alexaRank": 461,
|
||||||
"tags": [
|
"tags": [
|
||||||
" stock",
|
"stock",
|
||||||
"photo"
|
"photo"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -27573,7 +27573,7 @@
|
|||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"alexaRank": 9936,
|
"alexaRank": 9936,
|
||||||
"tags": [
|
"tags": [
|
||||||
" stock",
|
"stock",
|
||||||
"photo"
|
"photo"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -27584,7 +27584,7 @@
|
|||||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
"alexaRank": 83900,
|
"alexaRank": 83900,
|
||||||
"tags": [
|
"tags": [
|
||||||
" photo",
|
"photo",
|
||||||
"blog"
|
"blog"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -27822,9 +27822,78 @@
|
|||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"alexaRank": 39894,
|
"alexaRank": 39894,
|
||||||
"tags": [
|
"tags": [
|
||||||
" networking",
|
"networking",
|
||||||
"fashion"
|
"fashion"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
"99designs.com": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"mobile-only"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"profileUrl"
|
||||||
|
],
|
||||||
|
"url": "https://99designs.com/profiles/{username}",
|
||||||
|
"urlMain": "https://99designs.com",
|
||||||
|
"usernameClaimed": "t6s",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 4149,
|
||||||
|
"tags": [
|
||||||
|
"design",
|
||||||
|
"photo"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Expono": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"404 - Page not found<"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"page-user-badge"
|
||||||
|
],
|
||||||
|
"url": "http://www.expono.com/{username}",
|
||||||
|
"urlMain": "http://www.expono.com",
|
||||||
|
"usernameClaimed": "snila",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 155759,
|
||||||
|
"tags": [
|
||||||
|
"photo"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"picturepush.com": {
|
||||||
|
"absenceStrs": [
|
||||||
|
".stage img"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"loginname"
|
||||||
|
],
|
||||||
|
"url": "https://{username}.picturepush.com/",
|
||||||
|
"urlMain": "https://picturepush.com",
|
||||||
|
"usernameClaimed": "yoskark",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 107053,
|
||||||
|
"tags": [
|
||||||
|
"photo"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Purephoto": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"Not found <span"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"profile_sidebar"
|
||||||
|
],
|
||||||
|
"url": "https://www.purephoto.com/{username}",
|
||||||
|
"urlMain": "https://www.purephoto.com",
|
||||||
|
"usernameClaimed": "garretsuhrie",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 936968,
|
||||||
|
"tags": [
|
||||||
|
"photo"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
|
|||||||
+2
-1
@@ -61,9 +61,10 @@ SUPPORTED_TAGS = [
|
|||||||
"military",
|
"military",
|
||||||
"auto",
|
"auto",
|
||||||
"gambling",
|
"gambling",
|
||||||
"business",
|
|
||||||
"cybercriminal",
|
"cybercriminal",
|
||||||
"review",
|
"review",
|
||||||
|
"bookmarks",
|
||||||
|
"design",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+3
-3
@@ -255,7 +255,7 @@ async def check_features_manually(
|
|||||||
features = input("If features was not detected correctly, write it manually: ")
|
features = input("If features was not detected correctly, write it manually: ")
|
||||||
|
|
||||||
if features:
|
if features:
|
||||||
presence_list = features.split(",")
|
presence_list = list(map(str.strip, features.split(",")))
|
||||||
|
|
||||||
absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[
|
absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[
|
||||||
:top_features_count
|
:top_features_count
|
||||||
@@ -264,7 +264,7 @@ async def check_features_manually(
|
|||||||
features = input("If features was not detected correctly, write it manually: ")
|
features = input("If features was not detected correctly, write it manually: ")
|
||||||
|
|
||||||
if features:
|
if features:
|
||||||
absence_list = features.split(",")
|
absence_list = list(map(str.strip, features.split(",")))
|
||||||
|
|
||||||
site_data = {
|
site_data = {
|
||||||
"absenceStrs": absence_list,
|
"absenceStrs": absence_list,
|
||||||
@@ -355,7 +355,7 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
chosen_site.name = input("Change site name if you want: ") or chosen_site.name
|
chosen_site.name = input("Change site name if you want: ") or chosen_site.name
|
||||||
chosen_site.tags = input("Site tags: ").split(',')
|
chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
|
||||||
rank = get_alexa_rank(chosen_site.url_main)
|
rank = get_alexa_rank(chosen_site.url_main)
|
||||||
if rank:
|
if rank:
|
||||||
print(f'New alexa rank: {rank}')
|
print(f'New alexa rank: {rank}')
|
||||||
|
|||||||
+1
-1
@@ -26,7 +26,7 @@ python-socks==1.1.2
|
|||||||
requests>=2.24.0
|
requests>=2.24.0
|
||||||
requests-futures==1.0.0
|
requests-futures==1.0.0
|
||||||
six==1.15.0
|
six==1.15.0
|
||||||
socid-extractor>=0.0.19
|
socid-extractor>=0.0.20
|
||||||
soupsieve==2.1
|
soupsieve==2.1
|
||||||
stem==1.8.0
|
stem==1.8.0
|
||||||
torrequest==0.1.0
|
torrequest==0.1.0
|
||||||
|
|||||||
@@ -0,0 +1,15 @@
|
|||||||
|
"""Maigret data test functions"""
|
||||||
|
|
||||||
|
from maigret.utils import is_country_tag
|
||||||
|
from maigret.sites import SUPPORTED_TAGS
|
||||||
|
|
||||||
|
|
||||||
|
def test_tags_validity(default_db):
|
||||||
|
unknown_tags = set()
|
||||||
|
|
||||||
|
for site in default_db.sites:
|
||||||
|
for tag in filter(lambda x: not is_country_tag(x), site.tags):
|
||||||
|
if tag not in SUPPORTED_TAGS:
|
||||||
|
unknown_tags.add(tag)
|
||||||
|
|
||||||
|
assert unknown_tags == set()
|
||||||
Reference in New Issue
Block a user