From 86d51bced00d721c3b4d95fb255ab7bc74a70bc3 Mon Sep 17 00:00:00 2001 From: Soxoj <31013580+soxoj@users.noreply.github.com> Date: Mon, 25 Nov 2024 14:41:34 +0100 Subject: [PATCH] Added 7 sites, implemented integration with Marple, docs update (#1881) * Added 5 sites, implemented integration with Marple * Added 2 more sites, updated docs * Updated sites list --- README.md | 6 +-- docs/source/development.rst | 4 ++ maigret/resources/data.json | 101 ++++++++++++++++++++++++++++++++++++ maigret/submit.py | 65 +++++++++++++++++++++++ sites.md | 27 ++++++---- 5 files changed, 189 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 25db58e..feceea3 100644 --- a/README.md +++ b/README.md @@ -119,11 +119,9 @@ Use `maigret --help` to get full options description. Also options [are document ## Contributing -Contribution guidelines can be found [here](CONTRIBUTING) - Maigret has open-source code, so you may contribute your own sites by adding them to `data.json` file, or bring changes to it's code! -If you want to contribute, don't forget to activate statistics update hook, command for it would look like this: `git config --local core.hooksPath .githooks/` -You should make your git commits from your maigret git repo folder, or else the hook wouldn't find the statistics update script. + +For more information about development and contribution, please read the [development documentation](https://maigret.readthedocs.io/en/latest/development.html). ## Demo with page parsing and recursive username search diff --git a/docs/source/development.rst b/docs/source/development.rst index 618e678..cffcfe1 100644 --- a/docs/source/development.rst +++ b/docs/source/development.rst @@ -58,6 +58,10 @@ Use the following commands to check Maigret: How to fix false-positives ----------------------------------------------- +If you want to work with sites database, don't forget to activate statistics update git hook, command for it would look like this: ``git config --local core.hooksPath .githooks/``. + +You should make your git commits from your maigret git repo folder, or else the hook wouldn't find the statistics update script. + 1. Determine the problematic site. If you already know which site has a false-positive and want to fix it specifically, go to the next step. diff --git a/maigret/resources/data.json b/maigret/resources/data.json index ebaf348..684fd51 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -34808,6 +34808,107 @@ "tags": [ "crypto" ] + }, + "sst.hiberworld.com": { + "checkType": "message", + "absenceStrs": [ + "User not found" + ], + "presenceStrs": [ + "email", + "birthdate", + "role", + "Profile Image", + "User" + ], + "url": "https://sst.hiberworld.com/user/{username}", + "urlMain": "https://sst.hiberworld.com/user/{username}", + "usernameClaimed": "pixelpwnz", + "usernameUnclaimed": "foxefwvigz" + }, + "DeepDreamGenerator": { + "checkType": "message", + "absenceStrs": [ + "Page not found" + ], + "presenseStrs": [ + "user-name", + "profile-cover", + "user-info" + ], + "url": "https://deepdreamgenerator.com/u/{username}", + "urlMain": "https://deepdreamgenerator.com", + "usernameClaimed": "sparkles99", + "usernameUnclaimed": "lyazybfqoh" + }, + "PeriscopeTv": { + "checkType": "message", + "absenceStrs": [ + "error-fill" + ], + "presenseStrs": [ + "profile", + "ProfileAuthor", + "ProfileUsername" + ], + "url": "https://www.pscp.tv/{username}", + "urlMain": "https://www.pscp.tv", + "usernameClaimed": "moonlitraven", + "usernameUnclaimed": "higfjqmiez" + }, + "fanscout.com": { + "checkType": "message", + "absenceStrs": [ + "This page is under construction" + ], + "presenseStrs": [ + "birthday cake" + ], + "url": "https://fanscout.com/{username}", + "urlMain": "https://fanscout.com", + "usernameClaimed": "moonlitraven", + "usernameUnclaimed": "sicuoozvul" + }, + "app.samsungfood.com": { + "checkType": "message", + "absenceStrs": [ + ">User not found" + ], + "presenseStrs": [ + "alternateName", + "totalTime" + ], + "url": "https://app.samsungfood.com/u/{username}", + "urlMain": "https://app.samsungfood.com", + "usernameClaimed": "moonlitraven", + "usernameUnclaimed": "onpigjbowo" + }, + "DimensionalMe": { + "checkType": "message", + "absenceStrs": [ + "error_main_" + ], + "presenseStrs": [ + "userName", + "publicProfile" + ], + "url": "https://www.dimensional.me/{username}", + "urlMain": "https://www.dimensional.me", + "usernameClaimed": "sparkles99", + "usernameUnclaimed": "hbtybxpuon" + }, + "www.portal-pisarski.pl": { + "checkType": "message", + "absenceStrs": [ + "obrazki/404.png" + ], + "presenseStrs": [ + "profil/" + ], + "url": "https://www.portal-pisarski.pl/profil/{username}", + "urlMain": "https://www.portal-pisarski.pl", + "usernameClaimed": "sparkles99", + "usernameUnclaimed": "hlwifvxnqw" } }, "engines": { diff --git a/maigret/submit.py b/maigret/submit.py index 1dc50b7..3327abc 100644 --- a/maigret/submit.py +++ b/maigret/submit.py @@ -6,6 +6,7 @@ from xml.etree import ElementTree from aiohttp import TCPConnector, ClientSession import requests import cloudscraper +from colorama import Fore, Style from .activation import import_aiohttp_cookies from .checking import maigret @@ -334,6 +335,70 @@ class Submitter: site = MaigretSite(url_mainpage.split("/")[-1], site_data) return site + async def add_site(self, site): + sem = asyncio.Semaphore(1) + print(f"{Fore.BLUE}{Style.BRIGHT}[*] Adding site {site.name}, let's check it...{Style.RESET_ALL}") + print(site.json) + + result = await self.site_self_check(site, sem) + if result["disabled"]: + print( + f"Checks failed for {site.name}, please, verify them manually." + ) + return { + "valid": False, + "reason": "checks_failed", + } + + while True: + print("\nAvailable fields to edit:") + editable_fields = { + '1': 'name', + '2': 'tags', + '3': 'url', + '4': 'url_main', + '5': 'username_claimed', + '6': 'username_unclaimed', + '7': 'presense_strs', + '8': 'absence_strs', + } + + for num, field in editable_fields.items(): + current_value = getattr(site, field) + print(f"{num}. {field} (current: {current_value})") + + print("0. finish editing") + print("10. reject and block domain") + + choice = input("\nSelect field number to edit (0-8): ").strip() + + if choice == '0': + break + + if choice == '10': + return { + "valid": False, + "reason": "manual block", + } + + if choice in editable_fields: + field = editable_fields[choice] + current_value = getattr(site, field) + new_value = input(f"Enter new value for {field} (current: {current_value}): ").strip() + + if field in ['tags', 'presense_strs', 'absence_strs']: + new_value = list(map(str.strip, new_value.split(','))) + + if new_value: + setattr(site, field, new_value) + print(f"Updated {field} to: {new_value}") + + self.logger.info(site.json) + self.db.update_site(site) + return { + "valid": True, + } + async def dialog(self, url_exists, cookie_file): domain_raw = self.URL_RE.sub("", url_exists).strip().strip("/") domain_raw = domain_raw.split("/")[0] diff --git a/sites.md b/sites.md index 0d215bd..0fc9e08 100644 --- a/sites.md +++ b/sites.md @@ -1,5 +1,5 @@ -## List of supported sites (search methods): total 3103 +## List of supported sites (search methods): total 3110 Rank data fetched from Alexa by domains. @@ -3106,27 +3106,34 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://promptbase.com) [PromptBase (https://promptbase.com)](https://promptbase.com)*: top 100M, ai* 1. ![](https://www.google.com/s2/favicons?domain=https://ngl.link) [ngl.link (https://ngl.link)](https://ngl.link)*: top 100M, q&a* 1. ![](https://www.google.com/s2/favicons?domain=https://bitpapa.com) [bitpapa.com (https://bitpapa.com)](https://bitpapa.com)*: top 100M, crypto* +1. ![](https://www.google.com/s2/favicons?domain=https://sst.hiberworld.com/user/{username}) [sst.hiberworld.com (https://sst.hiberworld.com/user/{username})](https://sst.hiberworld.com/user/{username})*: top 100M* +1. ![](https://www.google.com/s2/favicons?domain=https://deepdreamgenerator.com) [DeepDreamGenerator (https://deepdreamgenerator.com)](https://deepdreamgenerator.com)*: top 100M* +1. ![](https://www.google.com/s2/favicons?domain=https://www.pscp.tv) [PeriscopeTv (https://www.pscp.tv)](https://www.pscp.tv)*: top 100M* +1. ![](https://www.google.com/s2/favicons?domain=https://fanscout.com) [fanscout.com (https://fanscout.com)](https://fanscout.com)*: top 100M* +1. ![](https://www.google.com/s2/favicons?domain=https://app.samsungfood.com) [app.samsungfood.com (https://app.samsungfood.com)](https://app.samsungfood.com)*: top 100M* +1. ![](https://www.google.com/s2/favicons?domain=https://www.dimensional.me) [DimensionalMe (https://www.dimensional.me)](https://www.dimensional.me)*: top 100M* +1. ![](https://www.google.com/s2/favicons?domain=https://www.portal-pisarski.pl) [www.portal-pisarski.pl (https://www.portal-pisarski.pl)](https://www.portal-pisarski.pl)*: top 100M* -The list was updated at (2024-11-24 03:58:29.843092+00:00 UTC) +The list was updated at (2024-11-24 20:52:43.081221+00:00 UTC) ## Statistics -Enabled/total sites: 2671/3103 = 86.08% +Enabled/total sites: 2678/3110 = 86.11% -Incomplete message checks: 404/2671 = 15.13% (false positive risks) +Incomplete message checks: 405/2678 = 15.12% (false positive risks) -Status code checks: 720/2671 = 26.96% (false positive risks) +Status code checks: 720/2678 = 26.89% (false positive risks) -False positive risk (total): 42.09% +False positive risk (total): 42.01% Top 20 profile URLs: - (796) `{urlMain}/index/8-0-{username} (uCoz)` -- (296) `/{username}` +- (299) `/{username}` - (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)` -- (158) `/user/{username}` +- (159) `/user/{username}` - (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)` - (127) `{urlMain}{urlSubpath}/search.php?author={username} (phpBB/Search)` - (117) `/profile/{username}` -- (108) `/u/{username}` +- (110) `/u/{username}` - (88) `/users/{username}` - (87) `{urlMain}/u/{username}/summary (Discourse)` - (54) `/wiki/User:{username}` @@ -3141,7 +3148,7 @@ Top 20 profile URLs: - (17) `/search.php?keywords=&terms=all&author={username}` Top 20 tags: -- (326) `NO_TAGS` (non-standard) +- (327) `NO_TAGS` (non-standard) - (307) `forum` - (50) `gaming` - (26) `coding`