mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 22:19:01 +00:00
36 lines
818 B
Python
36 lines
818 B
Python
import sys
|
|
import difflib
|
|
import requests
|
|
|
|
|
|
a = requests.get(sys.argv[1]).text
|
|
b = requests.get(sys.argv[2]).text
|
|
|
|
|
|
tokens_a = set(a.split('"'))
|
|
tokens_b = set(b.split('"'))
|
|
|
|
a_minus_b = tokens_a.difference(tokens_b)
|
|
b_minus_a = tokens_b.difference(tokens_a)
|
|
|
|
print(a_minus_b)
|
|
print(b_minus_a)
|
|
|
|
print(len(a_minus_b))
|
|
print(len(b_minus_a))
|
|
|
|
desired_strings = ["username", "not found", "пользователь", "profile", "lastname", "firstname", "biography",
|
|
"birthday", "репутация", "информация", "e-mail"]
|
|
|
|
|
|
def get_match_ratio(x):
|
|
return round(max([
|
|
difflib.SequenceMatcher(a=x.lower(), b=y).ratio()
|
|
for y in desired_strings
|
|
]), 2)
|
|
|
|
|
|
RATIO = 0.6
|
|
|
|
print(sorted(a_minus_b, key=get_match_ratio, reverse=True)[:10])
|
|
print(sorted(b_minus_a, key=get_match_ratio, reverse=True)[:10]) |