Tags updated, added tests for tags

Added several sites
Updated socid_extractor version to avoid bug #150
This commit is contained in:
Soxoj
2021-05-15 14:51:30 +03:00
parent aa6cd0eca9
commit c9219d91ec
5 changed files with 100 additions and 15 deletions
+79 -10
View File
@@ -13042,7 +13042,7 @@
"us" "us"
], ],
"headers": { "headers": {
"authorization": "Bearer BQCOP-4T9UEvQLhkgFJ7d8Fyt65Nofw1U0l-pJaUHf0AbhOJv3pPtL6tOZtvtANYeoc4aXeMBhxTEoGk7eo" "authorization": "Bearer BQA1fXOw4_oZPkyh94NHZm4Fwtwb5CJgmaep1bpLJ-Jvrrgi1-FSEsYwih7SyDAItMVDiSZVvn5pq7XTHuc"
}, },
"errors": { "errors": {
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn" "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -14470,7 +14470,7 @@
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"", "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA", "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"x-guest-token": "1392232718130466822" "x-guest-token": "1393520076536549382"
}, },
"errors": { "errors": {
"Bad guest token": "x-guest-token update required" "Bad guest token": "x-guest-token update required"
@@ -14877,7 +14877,7 @@
"video" "video"
], ],
"headers": { "headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjA3NzAwNDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.ehmje1TJRvUpW-_wseK5uXNhHykq2jHHh1LBCGFGLyQ" "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjEwNzcwMDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.pM-K99b6xokwvi8NBFN3ZdG9jjS7vf54DVxv8yKDJEY"
}, },
"activation": { "activation": {
"url": "https://vimeo.com/_rv/viewer", "url": "https://vimeo.com/_rv/viewer",
@@ -25270,7 +25270,7 @@
"usernameClaimed": "blue", "usernameClaimed": "blue",
"usernameUnclaimed": "noonewouldeverusethis7", "usernameUnclaimed": "noonewouldeverusethis7",
"tags": [ "tags": [
"ccbghdifbdchjinnlfejcitcuelgfncjnieuvccbcdut" "networking"
] ]
}, },
"wblitz.net": { "wblitz.net": {
@@ -25470,7 +25470,7 @@
"usernameClaimed": "ekostyle", "usernameClaimed": "ekostyle",
"usernameUnclaimed": "noonewouldeverusethis7", "usernameUnclaimed": "noonewouldeverusethis7",
"tags": [ "tags": [
"ccbghdifbdchubbbbietggvjbujdlujekugftcgfdufj" "blog"
] ]
}, },
"www.kinokopilka.pro": { "www.kinokopilka.pro": {
@@ -27367,7 +27367,7 @@
"checkType": "message", "checkType": "message",
"alexaRank": 7237, "alexaRank": 7237,
"tags": [ "tags": [
"bussiness" "business"
] ]
}, },
"nelubit.ru": { "nelubit.ru": {
@@ -27554,7 +27554,7 @@
"checkType": "message", "checkType": "message",
"alexaRank": 461, "alexaRank": 461,
"tags": [ "tags": [
" stock", "stock",
"photo" "photo"
] ]
}, },
@@ -27573,7 +27573,7 @@
"checkType": "message", "checkType": "message",
"alexaRank": 9936, "alexaRank": 9936,
"tags": [ "tags": [
" stock", "stock",
"photo" "photo"
] ]
}, },
@@ -27584,7 +27584,7 @@
"usernameUnclaimed": "noonewouldeverusethis7", "usernameUnclaimed": "noonewouldeverusethis7",
"alexaRank": 83900, "alexaRank": 83900,
"tags": [ "tags": [
" photo", "photo",
"blog" "blog"
] ]
}, },
@@ -27822,9 +27822,78 @@
"checkType": "message", "checkType": "message",
"alexaRank": 39894, "alexaRank": 39894,
"tags": [ "tags": [
" networking", "networking",
"fashion" "fashion"
] ]
},
"99designs.com": {
"absenceStrs": [
"mobile-only"
],
"presenseStrs": [
"profileUrl"
],
"url": "https://99designs.com/profiles/{username}",
"urlMain": "https://99designs.com",
"usernameClaimed": "t6s",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"alexaRank": 4149,
"tags": [
"design",
"photo"
]
},
"Expono": {
"absenceStrs": [
"404 - Page not found<"
],
"presenseStrs": [
"page-user-badge"
],
"url": "http://www.expono.com/{username}",
"urlMain": "http://www.expono.com",
"usernameClaimed": "snila",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"alexaRank": 155759,
"tags": [
"photo"
]
},
"picturepush.com": {
"absenceStrs": [
".stage img"
],
"presenseStrs": [
"loginname"
],
"url": "https://{username}.picturepush.com/",
"urlMain": "https://picturepush.com",
"usernameClaimed": "yoskark",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"alexaRank": 107053,
"tags": [
"photo"
]
},
"Purephoto": {
"absenceStrs": [
"Not found <span"
],
"presenseStrs": [
"profile_sidebar"
],
"url": "https://www.purephoto.com/{username}",
"urlMain": "https://www.purephoto.com",
"usernameClaimed": "garretsuhrie",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"alexaRank": 936968,
"tags": [
"photo"
]
} }
}, },
"engines": { "engines": {
+2 -1
View File
@@ -61,9 +61,10 @@ SUPPORTED_TAGS = [
"military", "military",
"auto", "auto",
"gambling", "gambling",
"business",
"cybercriminal", "cybercriminal",
"review", "review",
"bookmarks",
"design",
] ]
+3 -3
View File
@@ -255,7 +255,7 @@ async def check_features_manually(
features = input("If features was not detected correctly, write it manually: ") features = input("If features was not detected correctly, write it manually: ")
if features: if features:
presence_list = features.split(",") presence_list = list(map(str.strip, features.split(",")))
absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[ absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[
:top_features_count :top_features_count
@@ -264,7 +264,7 @@ async def check_features_manually(
features = input("If features was not detected correctly, write it manually: ") features = input("If features was not detected correctly, write it manually: ")
if features: if features:
absence_list = features.split(",") absence_list = list(map(str.strip, features.split(",")))
site_data = { site_data = {
"absenceStrs": absence_list, "absenceStrs": absence_list,
@@ -355,7 +355,7 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
return False return False
chosen_site.name = input("Change site name if you want: ") or chosen_site.name chosen_site.name = input("Change site name if you want: ") or chosen_site.name
chosen_site.tags = input("Site tags: ").split(',') chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
rank = get_alexa_rank(chosen_site.url_main) rank = get_alexa_rank(chosen_site.url_main)
if rank: if rank:
print(f'New alexa rank: {rank}') print(f'New alexa rank: {rank}')
+1 -1
View File
@@ -26,7 +26,7 @@ python-socks==1.1.2
requests>=2.24.0 requests>=2.24.0
requests-futures==1.0.0 requests-futures==1.0.0
six==1.15.0 six==1.15.0
socid-extractor>=0.0.19 socid-extractor>=0.0.20
soupsieve==2.1 soupsieve==2.1
stem==1.8.0 stem==1.8.0
torrequest==0.1.0 torrequest==0.1.0
+15
View File
@@ -0,0 +1,15 @@
"""Maigret data test functions"""
from maigret.utils import is_country_tag
from maigret.sites import SUPPORTED_TAGS
def test_tags_validity(default_db):
unknown_tags = set()
for site in default_db.sites:
for tag in filter(lambda x: not is_country_tag(x), site.tags):
if tag not in SUPPORTED_TAGS:
unknown_tags.add(tag)
assert unknown_tags == set()