mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Merge pull request #151 from soxoj/tags-socid-extractor
Tags updated, added tests for tags
This commit is contained in:
+79
-10
@@ -13042,7 +13042,7 @@
|
||||
"us"
|
||||
],
|
||||
"headers": {
|
||||
"authorization": "Bearer BQCOP-4T9UEvQLhkgFJ7d8Fyt65Nofw1U0l-pJaUHf0AbhOJv3pPtL6tOZtvtANYeoc4aXeMBhxTEoGk7eo"
|
||||
"authorization": "Bearer BQA1fXOw4_oZPkyh94NHZm4Fwtwb5CJgmaep1bpLJ-Jvrrgi1-FSEsYwih7SyDAItMVDiSZVvn5pq7XTHuc"
|
||||
},
|
||||
"errors": {
|
||||
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
||||
@@ -14470,7 +14470,7 @@
|
||||
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
||||
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
||||
"x-guest-token": "1392232718130466822"
|
||||
"x-guest-token": "1393520076536549382"
|
||||
},
|
||||
"errors": {
|
||||
"Bad guest token": "x-guest-token update required"
|
||||
@@ -14877,7 +14877,7 @@
|
||||
"video"
|
||||
],
|
||||
"headers": {
|
||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjA3NzAwNDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.ehmje1TJRvUpW-_wseK5uXNhHykq2jHHh1LBCGFGLyQ"
|
||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjEwNzcwMDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.pM-K99b6xokwvi8NBFN3ZdG9jjS7vf54DVxv8yKDJEY"
|
||||
},
|
||||
"activation": {
|
||||
"url": "https://vimeo.com/_rv/viewer",
|
||||
@@ -25270,7 +25270,7 @@
|
||||
"usernameClaimed": "blue",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"tags": [
|
||||
"ccbghdifbdchjinnlfejcitcuelgfncjnieuvccbcdut"
|
||||
"networking"
|
||||
]
|
||||
},
|
||||
"wblitz.net": {
|
||||
@@ -25470,7 +25470,7 @@
|
||||
"usernameClaimed": "ekostyle",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"tags": [
|
||||
"ccbghdifbdchubbbbietggvjbujdlujekugftcgfdufj"
|
||||
"blog"
|
||||
]
|
||||
},
|
||||
"www.kinokopilka.pro": {
|
||||
@@ -27367,7 +27367,7 @@
|
||||
"checkType": "message",
|
||||
"alexaRank": 7237,
|
||||
"tags": [
|
||||
"bussiness"
|
||||
"business"
|
||||
]
|
||||
},
|
||||
"nelubit.ru": {
|
||||
@@ -27554,7 +27554,7 @@
|
||||
"checkType": "message",
|
||||
"alexaRank": 461,
|
||||
"tags": [
|
||||
" stock",
|
||||
"stock",
|
||||
"photo"
|
||||
]
|
||||
},
|
||||
@@ -27573,7 +27573,7 @@
|
||||
"checkType": "message",
|
||||
"alexaRank": 9936,
|
||||
"tags": [
|
||||
" stock",
|
||||
"stock",
|
||||
"photo"
|
||||
]
|
||||
},
|
||||
@@ -27584,7 +27584,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"alexaRank": 83900,
|
||||
"tags": [
|
||||
" photo",
|
||||
"photo",
|
||||
"blog"
|
||||
]
|
||||
},
|
||||
@@ -27822,9 +27822,78 @@
|
||||
"checkType": "message",
|
||||
"alexaRank": 39894,
|
||||
"tags": [
|
||||
" networking",
|
||||
"networking",
|
||||
"fashion"
|
||||
]
|
||||
},
|
||||
"99designs.com": {
|
||||
"absenceStrs": [
|
||||
"mobile-only"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"profileUrl"
|
||||
],
|
||||
"url": "https://99designs.com/profiles/{username}",
|
||||
"urlMain": "https://99designs.com",
|
||||
"usernameClaimed": "t6s",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "message",
|
||||
"alexaRank": 4149,
|
||||
"tags": [
|
||||
"design",
|
||||
"photo"
|
||||
]
|
||||
},
|
||||
"Expono": {
|
||||
"absenceStrs": [
|
||||
"404 - Page not found<"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"page-user-badge"
|
||||
],
|
||||
"url": "http://www.expono.com/{username}",
|
||||
"urlMain": "http://www.expono.com",
|
||||
"usernameClaimed": "snila",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "message",
|
||||
"alexaRank": 155759,
|
||||
"tags": [
|
||||
"photo"
|
||||
]
|
||||
},
|
||||
"picturepush.com": {
|
||||
"absenceStrs": [
|
||||
".stage img"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"loginname"
|
||||
],
|
||||
"url": "https://{username}.picturepush.com/",
|
||||
"urlMain": "https://picturepush.com",
|
||||
"usernameClaimed": "yoskark",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "message",
|
||||
"alexaRank": 107053,
|
||||
"tags": [
|
||||
"photo"
|
||||
]
|
||||
},
|
||||
"Purephoto": {
|
||||
"absenceStrs": [
|
||||
"Not found <span"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"profile_sidebar"
|
||||
],
|
||||
"url": "https://www.purephoto.com/{username}",
|
||||
"urlMain": "https://www.purephoto.com",
|
||||
"usernameClaimed": "garretsuhrie",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "message",
|
||||
"alexaRank": 936968,
|
||||
"tags": [
|
||||
"photo"
|
||||
]
|
||||
}
|
||||
},
|
||||
"engines": {
|
||||
|
||||
+2
-1
@@ -61,9 +61,10 @@ SUPPORTED_TAGS = [
|
||||
"military",
|
||||
"auto",
|
||||
"gambling",
|
||||
"business",
|
||||
"cybercriminal",
|
||||
"review",
|
||||
"bookmarks",
|
||||
"design",
|
||||
]
|
||||
|
||||
|
||||
|
||||
+3
-3
@@ -255,7 +255,7 @@ async def check_features_manually(
|
||||
features = input("If features was not detected correctly, write it manually: ")
|
||||
|
||||
if features:
|
||||
presence_list = features.split(",")
|
||||
presence_list = list(map(str.strip, features.split(",")))
|
||||
|
||||
absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[
|
||||
:top_features_count
|
||||
@@ -264,7 +264,7 @@ async def check_features_manually(
|
||||
features = input("If features was not detected correctly, write it manually: ")
|
||||
|
||||
if features:
|
||||
absence_list = features.split(",")
|
||||
absence_list = list(map(str.strip, features.split(",")))
|
||||
|
||||
site_data = {
|
||||
"absenceStrs": absence_list,
|
||||
@@ -355,7 +355,7 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
|
||||
return False
|
||||
|
||||
chosen_site.name = input("Change site name if you want: ") or chosen_site.name
|
||||
chosen_site.tags = input("Site tags: ").split(',')
|
||||
chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
|
||||
rank = get_alexa_rank(chosen_site.url_main)
|
||||
if rank:
|
||||
print(f'New alexa rank: {rank}')
|
||||
|
||||
+1
-1
@@ -26,7 +26,7 @@ python-socks==1.1.2
|
||||
requests>=2.24.0
|
||||
requests-futures==1.0.0
|
||||
six==1.15.0
|
||||
socid-extractor>=0.0.19
|
||||
socid-extractor>=0.0.20
|
||||
soupsieve==2.1
|
||||
stem==1.8.0
|
||||
torrequest==0.1.0
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
"""Maigret data test functions"""
|
||||
|
||||
from maigret.utils import is_country_tag
|
||||
from maigret.sites import SUPPORTED_TAGS
|
||||
|
||||
|
||||
def test_tags_validity(default_db):
|
||||
unknown_tags = set()
|
||||
|
||||
for site in default_db.sites:
|
||||
for tag in filter(lambda x: not is_country_tag(x), site.tags):
|
||||
if tag not in SUPPORTED_TAGS:
|
||||
unknown_tags.add(tag)
|
||||
|
||||
assert unknown_tags == set()
|
||||
Reference in New Issue
Block a user