mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 14:08:59 +00:00
Added some scripts (#355)
This commit is contained in:
@@ -0,0 +1,36 @@
|
||||
import sys
|
||||
import difflib
|
||||
import requests
|
||||
|
||||
|
||||
a = requests.get(sys.argv[1]).text
|
||||
b = requests.get(sys.argv[2]).text
|
||||
|
||||
|
||||
tokens_a = set(a.split('"'))
|
||||
tokens_b = set(b.split('"'))
|
||||
|
||||
a_minus_b = tokens_a.difference(tokens_b)
|
||||
b_minus_a = tokens_b.difference(tokens_a)
|
||||
|
||||
print(a_minus_b)
|
||||
print(b_minus_a)
|
||||
|
||||
print(len(a_minus_b))
|
||||
print(len(b_minus_a))
|
||||
|
||||
desired_strings = ["username", "not found", "пользователь", "profile", "lastname", "firstname", "biography",
|
||||
"birthday", "репутация", "информация", "e-mail"]
|
||||
|
||||
|
||||
def get_match_ratio(x):
|
||||
return round(max([
|
||||
difflib.SequenceMatcher(a=x.lower(), b=y).ratio()
|
||||
for y in desired_strings
|
||||
]), 2)
|
||||
|
||||
|
||||
RATIO = 0.6
|
||||
|
||||
print(sorted(a_minus_b, key=get_match_ratio, reverse=True)[:10])
|
||||
print(sorted(b_minus_a, key=get_match_ratio, reverse=True)[:10])
|
||||
Reference in New Issue
Block a user