Added new sites through auto submit, some fixes

2026-05-06 14:08:59 +00:00 · 2021-03-18 23:21:33 +03:00
parent f9c9af5f41
commit 8c700b9810
6 changed files with 3866 additions and 3589 deletions
@@ -1,15 +1,13 @@
 # HTTP Cookie File downloaded with cookies.txt by Genuinous @genuinous
 # This file can be used by wget, curl, aria2c and other standard compliant tools.
 # Usage Examples:
-#   1) wget -x --load-cookies cookies.txt "https://xss.is/search/"
-#   2) curl --cookie cookies.txt "https://xss.is/search/"
-#   3) aria2c --load-cookies cookies.txt "https://xss.is/search/"
+#   1) wget -x --load-cookies cookies.txt "https://pixabay.com/users/blue-156711/"
+#   2) curl --cookie cookies.txt "https://pixabay.com/users/blue-156711/"
+#   3) aria2c --load-cookies cookies.txt "https://pixabay.com/users/blue-156711/"
 #
-xss.is	FALSE	/	TRUE	0	xf_csrf	PMnZNsr42HETwYEr
-xss.is	FALSE	/	TRUE	0	xf_from_search	google
-xss.is	FALSE	/	TRUE	1642709308	xf_user	215268%2CZNKB_-64Wk-BOpsdtLYy-1UxfS5zGpxWaiEGUhmX
-xss.is	FALSE	/	TRUE	0	xf_session	sGdxJtP_sKV0LCG8vUQbr6cL670_EFWM
-.xss.is	TRUE	/	FALSE	0	muchacho_cache	[&quot;00fbb0f2772c9596b0483d6864563cce&quot;]
-.xss.is	TRUE	/	FALSE	0	muchacho_png	[&quot;00fbb0f2772c9596b0483d6864563cce&quot;]
-.xss.is	TRUE	/	FALSE	0	muchacho_etag	[&quot;00fbb0f2772c9596b0483d6864563cce&quot;]
-.xss.is	TRUE	/	FALSE	1924905600	2e66e4dd94a7a237d0d1b4d50f01e179_evc	[&quot;00fbb0f2772c9596b0483d6864563cce&quot;]
+.pixabay.com	TRUE	/	TRUE	1618356838	__cfduid	d56929cd50d11474f421b849df5758a881615764837
+.pixabay.com	TRUE	/	TRUE	1615766638	__cf_bm	ea8f7c565b44d749f65500f0e45176cebccaeb09-1615764837-1800-AYJIXh2boDJ6HPf44JI9fnteWABHOVvkxiSccACP9EiS1E58UDTGhViXtqjFfVE0QRj1WowP4ss2DzCs+pW+qUc=
+pixabay.com	FALSE	/	FALSE	0	anonymous_user_id	c1e4ee09-5674-4252-aa94-8c47b1ea80ab
+pixabay.com	FALSE	/	FALSE	1647214439	csrftoken	vfetTSvIul7gBlURt6s985JNM18GCdEwN5MWMKqX4yI73xoPgEj42dbNefjGx5fr
+pixabay.com	FALSE	/	FALSE	1647300839	client_width	1680
+pixabay.com	FALSE	/	FALSE	748111764839	is_human	1
@@ -253,7 +253,7 @@ async def main():
    site_data = get_top_sites_for_id(args.id_type)

    if args.new_site_to_submit:
-        is_submitted = await submit_dialog(db, args.new_site_to_submit)
+        is_submitted = await submit_dialog(db, args.new_site_to_submit, args.cookie_file)
        if is_submitted:
            db.save_to_file(args.db_file)

@@ -85,7 +85,7 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
    return changes


-async def submit_dialog(db, url_exists):
+async def submit_dialog(db, url_exists, cookie_file):
    domain_raw = URL_RE.sub('', url_exists).strip().strip('/')
    domain_raw = domain_raw.split('/')[0]

@@ -107,8 +107,14 @@ async def submit_dialog(db, url_exists):
    url_user = url_exists.replace(supposed_username, '{username}')
    url_not_exists = url_exists.replace(supposed_username, non_exist_username)

-    a = requests.get(url_exists).text
-    b = requests.get(url_not_exists).text
+    # cookies
+    cookie_dict = None
+    if cookie_file:
+        cookie_jar = await import_aiohttp_cookies(cookie_file)
+        cookie_dict = {c.key: c.value for c in cookie_jar}
+
+    a = requests.get(url_exists, cookies=cookie_dict).text
+    b = requests.get(url_not_exists, cookies=cookie_dict).text

    tokens_a = set(a.split('"'))
    tokens_b = set(b.split('"'))
@@ -20,8 +20,9 @@ RANKS.update({
    '5000': '5K',
    '10000': '10K',
    '100000': '100K',
-    '10000000': '1M',
-    '50000000': '10M',
+    '10000000': '10M',
+    '50000000': '50M',
+    '100000000': '100M',
 })

 SEMAPHORE = threading.Semaphore(10)
@@ -58,8 +59,9 @@ def get_rank(domain_to_query, site, print_errors=True):
 def get_step_rank(rank):
    def get_readable_rank(r):
        return RANKS[str(r)]
+
    valid_step_ranks = sorted(map(int, RANKS.keys()))
-    if rank == 0:
+    if rank == 0 or rank == sys.maxsize:
        return get_readable_rank(valid_step_ranks[-1])
    else:
        return get_readable_rank(list(filter(lambda x: x >= rank, valid_step_ranks))[0])
@@ -73,6 +75,8 @@ if __name__ == '__main__':
                        help="JSON file with sites data to update.")

    parser.add_argument('--empty-only', help='update only sites without rating', action='store_true')
+    parser.add_argument('--exclude-engine', help='do not update score with certain engine',
+                        action="append", dest="exclude_engine_list", default=[])

    pool = list()

@@ -92,6 +96,8 @@ Rank data fetched from Alexa by domains.
            url_main = site.url_main
            if site.alexa_rank < sys.maxsize and args.empty_only:
                continue
+            if args.exclude_engine_list and site.engine in args.exclude_engine_list:
+                continue
            site.alexa_rank = 0
            th = threading.Thread(target=get_rank, args=(url_main, site))
            pool.append((site.name, url_main, th))