Added 7 sites, implemented integration with Marple, docs update (#1881)

* Added 5 sites, implemented integration with Marple

* Added 2 more sites, updated docs

* Updated sites list
This commit is contained in:
Soxoj
2024-11-25 14:41:34 +01:00
committed by GitHub
parent 54b864f167
commit 86d51bced0
5 changed files with 189 additions and 14 deletions
+2 -4
View File
@@ -119,11 +119,9 @@ Use `maigret --help` to get full options description. Also options [are document
## Contributing ## Contributing
Contribution guidelines can be found [here](CONTRIBUTING)
Maigret has open-source code, so you may contribute your own sites by adding them to `data.json` file, or bring changes to it's code! Maigret has open-source code, so you may contribute your own sites by adding them to `data.json` file, or bring changes to it's code!
If you want to contribute, don't forget to activate statistics update hook, command for it would look like this: `git config --local core.hooksPath .githooks/`
You should make your git commits from your maigret git repo folder, or else the hook wouldn't find the statistics update script. For more information about development and contribution, please read the [development documentation](https://maigret.readthedocs.io/en/latest/development.html).
## Demo with page parsing and recursive username search ## Demo with page parsing and recursive username search
+4
View File
@@ -58,6 +58,10 @@ Use the following commands to check Maigret:
How to fix false-positives How to fix false-positives
----------------------------------------------- -----------------------------------------------
If you want to work with sites database, don't forget to activate statistics update git hook, command for it would look like this: ``git config --local core.hooksPath .githooks/``.
You should make your git commits from your maigret git repo folder, or else the hook wouldn't find the statistics update script.
1. Determine the problematic site. 1. Determine the problematic site.
If you already know which site has a false-positive and want to fix it specifically, go to the next step. If you already know which site has a false-positive and want to fix it specifically, go to the next step.
+101
View File
@@ -34808,6 +34808,107 @@
"tags": [ "tags": [
"crypto" "crypto"
] ]
},
"sst.hiberworld.com": {
"checkType": "message",
"absenceStrs": [
"User not found"
],
"presenceStrs": [
"email",
"birthdate",
"role",
"Profile Image",
"User"
],
"url": "https://sst.hiberworld.com/user/{username}",
"urlMain": "https://sst.hiberworld.com/user/{username}",
"usernameClaimed": "pixelpwnz",
"usernameUnclaimed": "foxefwvigz"
},
"DeepDreamGenerator": {
"checkType": "message",
"absenceStrs": [
"Page not found"
],
"presenseStrs": [
"user-name",
"profile-cover",
"user-info"
],
"url": "https://deepdreamgenerator.com/u/{username}",
"urlMain": "https://deepdreamgenerator.com",
"usernameClaimed": "sparkles99",
"usernameUnclaimed": "lyazybfqoh"
},
"PeriscopeTv": {
"checkType": "message",
"absenceStrs": [
"error-fill"
],
"presenseStrs": [
"profile",
"ProfileAuthor",
"ProfileUsername"
],
"url": "https://www.pscp.tv/{username}",
"urlMain": "https://www.pscp.tv",
"usernameClaimed": "moonlitraven",
"usernameUnclaimed": "higfjqmiez"
},
"fanscout.com": {
"checkType": "message",
"absenceStrs": [
"This page is under construction"
],
"presenseStrs": [
"birthday cake"
],
"url": "https://fanscout.com/{username}",
"urlMain": "https://fanscout.com",
"usernameClaimed": "moonlitraven",
"usernameUnclaimed": "sicuoozvul"
},
"app.samsungfood.com": {
"checkType": "message",
"absenceStrs": [
">User not found</h1></div>"
],
"presenseStrs": [
"alternateName",
"totalTime"
],
"url": "https://app.samsungfood.com/u/{username}",
"urlMain": "https://app.samsungfood.com",
"usernameClaimed": "moonlitraven",
"usernameUnclaimed": "onpigjbowo"
},
"DimensionalMe": {
"checkType": "message",
"absenceStrs": [
"error_main_"
],
"presenseStrs": [
"userName",
"publicProfile"
],
"url": "https://www.dimensional.me/{username}",
"urlMain": "https://www.dimensional.me",
"usernameClaimed": "sparkles99",
"usernameUnclaimed": "hbtybxpuon"
},
"www.portal-pisarski.pl": {
"checkType": "message",
"absenceStrs": [
"obrazki/404.png"
],
"presenseStrs": [
"profil/"
],
"url": "https://www.portal-pisarski.pl/profil/{username}",
"urlMain": "https://www.portal-pisarski.pl",
"usernameClaimed": "sparkles99",
"usernameUnclaimed": "hlwifvxnqw"
} }
}, },
"engines": { "engines": {
+65
View File
@@ -6,6 +6,7 @@ from xml.etree import ElementTree
from aiohttp import TCPConnector, ClientSession from aiohttp import TCPConnector, ClientSession
import requests import requests
import cloudscraper import cloudscraper
from colorama import Fore, Style
from .activation import import_aiohttp_cookies from .activation import import_aiohttp_cookies
from .checking import maigret from .checking import maigret
@@ -334,6 +335,70 @@ class Submitter:
site = MaigretSite(url_mainpage.split("/")[-1], site_data) site = MaigretSite(url_mainpage.split("/")[-1], site_data)
return site return site
async def add_site(self, site):
sem = asyncio.Semaphore(1)
print(f"{Fore.BLUE}{Style.BRIGHT}[*] Adding site {site.name}, let's check it...{Style.RESET_ALL}")
print(site.json)
result = await self.site_self_check(site, sem)
if result["disabled"]:
print(
f"Checks failed for {site.name}, please, verify them manually."
)
return {
"valid": False,
"reason": "checks_failed",
}
while True:
print("\nAvailable fields to edit:")
editable_fields = {
'1': 'name',
'2': 'tags',
'3': 'url',
'4': 'url_main',
'5': 'username_claimed',
'6': 'username_unclaimed',
'7': 'presense_strs',
'8': 'absence_strs',
}
for num, field in editable_fields.items():
current_value = getattr(site, field)
print(f"{num}. {field} (current: {current_value})")
print("0. finish editing")
print("10. reject and block domain")
choice = input("\nSelect field number to edit (0-8): ").strip()
if choice == '0':
break
if choice == '10':
return {
"valid": False,
"reason": "manual block",
}
if choice in editable_fields:
field = editable_fields[choice]
current_value = getattr(site, field)
new_value = input(f"Enter new value for {field} (current: {current_value}): ").strip()
if field in ['tags', 'presense_strs', 'absence_strs']:
new_value = list(map(str.strip, new_value.split(',')))
if new_value:
setattr(site, field, new_value)
print(f"Updated {field} to: {new_value}")
self.logger.info(site.json)
self.db.update_site(site)
return {
"valid": True,
}
async def dialog(self, url_exists, cookie_file): async def dialog(self, url_exists, cookie_file):
domain_raw = self.URL_RE.sub("", url_exists).strip().strip("/") domain_raw = self.URL_RE.sub("", url_exists).strip().strip("/")
domain_raw = domain_raw.split("/")[0] domain_raw = domain_raw.split("/")[0]
+17 -10
View File
@@ -1,5 +1,5 @@
## List of supported sites (search methods): total 3103 ## List of supported sites (search methods): total 3110
Rank data fetched from Alexa by domains. Rank data fetched from Alexa by domains.
@@ -3106,27 +3106,34 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://promptbase.com) [PromptBase (https://promptbase.com)](https://promptbase.com)*: top 100M, ai* 1. ![](https://www.google.com/s2/favicons?domain=https://promptbase.com) [PromptBase (https://promptbase.com)](https://promptbase.com)*: top 100M, ai*
1. ![](https://www.google.com/s2/favicons?domain=https://ngl.link) [ngl.link (https://ngl.link)](https://ngl.link)*: top 100M, q&a* 1. ![](https://www.google.com/s2/favicons?domain=https://ngl.link) [ngl.link (https://ngl.link)](https://ngl.link)*: top 100M, q&a*
1. ![](https://www.google.com/s2/favicons?domain=https://bitpapa.com) [bitpapa.com (https://bitpapa.com)](https://bitpapa.com)*: top 100M, crypto* 1. ![](https://www.google.com/s2/favicons?domain=https://bitpapa.com) [bitpapa.com (https://bitpapa.com)](https://bitpapa.com)*: top 100M, crypto*
1. ![](https://www.google.com/s2/favicons?domain=https://sst.hiberworld.com/user/{username}) [sst.hiberworld.com (https://sst.hiberworld.com/user/{username})](https://sst.hiberworld.com/user/{username})*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://deepdreamgenerator.com) [DeepDreamGenerator (https://deepdreamgenerator.com)](https://deepdreamgenerator.com)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://www.pscp.tv) [PeriscopeTv (https://www.pscp.tv)](https://www.pscp.tv)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://fanscout.com) [fanscout.com (https://fanscout.com)](https://fanscout.com)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://app.samsungfood.com) [app.samsungfood.com (https://app.samsungfood.com)](https://app.samsungfood.com)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://www.dimensional.me) [DimensionalMe (https://www.dimensional.me)](https://www.dimensional.me)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://www.portal-pisarski.pl) [www.portal-pisarski.pl (https://www.portal-pisarski.pl)](https://www.portal-pisarski.pl)*: top 100M*
The list was updated at (2024-11-24 03:58:29.843092+00:00 UTC) The list was updated at (2024-11-24 20:52:43.081221+00:00 UTC)
## Statistics ## Statistics
Enabled/total sites: 2671/3103 = 86.08% Enabled/total sites: 2678/3110 = 86.11%
Incomplete message checks: 404/2671 = 15.13% (false positive risks) Incomplete message checks: 405/2678 = 15.12% (false positive risks)
Status code checks: 720/2671 = 26.96% (false positive risks) Status code checks: 720/2678 = 26.89% (false positive risks)
False positive risk (total): 42.09% False positive risk (total): 42.01%
Top 20 profile URLs: Top 20 profile URLs:
- (796) `{urlMain}/index/8-0-{username} (uCoz)` - (796) `{urlMain}/index/8-0-{username} (uCoz)`
- (296) `/{username}` - (299) `/{username}`
- (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)` - (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)`
- (158) `/user/{username}` - (159) `/user/{username}`
- (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)` - (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)`
- (127) `{urlMain}{urlSubpath}/search.php?author={username} (phpBB/Search)` - (127) `{urlMain}{urlSubpath}/search.php?author={username} (phpBB/Search)`
- (117) `/profile/{username}` - (117) `/profile/{username}`
- (108) `/u/{username}` - (110) `/u/{username}`
- (88) `/users/{username}` - (88) `/users/{username}`
- (87) `{urlMain}/u/{username}/summary (Discourse)` - (87) `{urlMain}/u/{username}/summary (Discourse)`
- (54) `/wiki/User:{username}` - (54) `/wiki/User:{username}`
@@ -3141,7 +3148,7 @@ Top 20 profile URLs:
- (17) `/search.php?keywords=&terms=all&author={username}` - (17) `/search.php?keywords=&terms=all&author={username}`
Top 20 tags: Top 20 tags:
- (326) `NO_TAGS` (non-standard) - (327) `NO_TAGS` (non-standard)
- (307) `forum` - (307) `forum`
- (50) `gaming` - (50) `gaming`
- (26) `coding` - (26) `coding`