Source code for asrch.commands._send

"""
Send command
"""

import logging
import random
import time
from typing import Optional

from selenium import webdriver
from selenium.common.exceptions import (
    ElementNotVisibleException,
    NoSuchElementException,
    TimeoutException,
)
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

from asrch.modules.logging_formatter import ColorFormatter
from asrch.utils.constants import Colors

options = Options()


c_log = logging.getLogger(__name__)

sh = logging.StreamHandler()

c_form = ColorFormatter("%(asctime)s|%(levelname)8s|%(message)s")

sh.setFormatter(c_form)

c_log.addHandler(sh)

# CONSTANTS
DDG_URL: str = "https://lite.duckduckgo.com/html"
GGL_URL: str = "https://www.google.com/search"

url_index: list[str] = []






[docs] def search_ddg( query: str, header: bool, proxy: Optional[str], log: bool ) -> list[str]: """Opens Google and searches for a query given by the user. :param url: The URL passed from the click argument, defaults to None :raises ValueError: If the URL is not provided :raises NoSuchelementexception: If the element can't be found. :raises ElementNotVisibleException: If the element isn't visible in the DOM. :return: The list of URLs from the webpage :rtype: list[str] """ output: list[str] = [""] if log: c_log.setLevel(logging.DEBUG) else: c_log.setLevel(logging.WARNING) if header: # pragma: no cover c_log.info("Headless false") else: c_log.info( "Headless true" ) # why does true mean no header LOL (not fixing it) options.add_argument("--headless") if proxy is not None: # pragma: no cover c_log.info("Proxy set") with webdriver.Firefox(options=options) as driver: try: c_log.debug("Opening Driver") c_log.debug(DDG_URL) driver.get(DDG_URL) c_log.debug("Loooking for search bar") text_area = WebDriverWait(driver, 10).until( EC.visibility_of_element_located((By.CLASS_NAME, "search__input")) ) WebDriverWait(driver, random.randrange(0, 2)) text_area.send_keys(query) c_log.debug(f"Sending {query}") WebDriverWait(driver, random.randrange(0, 2)) text_area.send_keys(Keys.ENTER) time.sleep(2) lnks = driver.find_elements(By.XPATH, "//a[@href]") seen_urls = set() for lnk in lnks: _url = lnk.get_attribute("href") _title = lnk.get_attribute("text") if _url not in seen_urls: seen_urls.add(_url) url_index.append(_url) output.append( f"\033[1m {Colors.MINT}{_title.strip()}\033[0m\n{_url}\n" ) blacklist: list[str] = [ "spreadprivacy", "javascript:", "bing", "bingj", "yahoo", "google", "duckduckgo", ] if not any(val in _url for val in blacklist): for urls in seen_urls: seen_urls.add(urls) except NoSuchElementException as e: c_log.error(e) c_log.error("No element\n\t'-> continuing") except ElementNotVisibleException as e: c_log.error(e) c_log.error("Element not visible\n\t'-> continuing") except TimeoutException as e: c_log.error(e) c_log.error("Timed out\n\t'-> continuing") return output
[docs] def search_ggl(query: str, header: bool, proxy: Optional[str], log: bool) -> str: """Opens Google and searches for a query given by the user. :param url: The URL passed from the click argument, defaults to None :raises ValueError: If the URL is not provided :raises NoSuchelementexception: If the element can't be found. :raises ElementNotVisibleException: If the element isn't visible in the DOM. :raises Timeoutexception: If seleniumm times out waiting for element. :return: The list of URLs from the webpage :rtype: list[str] """ output: list[str] = [""] if log: c_log.setLevel(logging.DEBUG) else: c_log.setLevel(logging.WARNING) if header: # pragma: no cover c_log.info("Headless false") else: c_log.info( "Headless true" ) # why does true mean no header LOL (not fixing it) options.add_argument("--headless") if proxy is not None: # pragma: no cover c_log.info("Proxy set") with webdriver.Firefox(options=options) as driver: try: c_log.debug("Opening Driver") c_log.debug(GGL_URL) driver.get(GGL_URL) c_log.debug("Loooking for search bar") text_area = WebDriverWait(driver, 10).until( EC.visibility_of_element_located((By.CLASS_NAME, "gLFyf")) ) WebDriverWait(driver, 2) text_area.send_keys(query) c_log.debug(f"Sending {query}") WebDriverWait(driver, random.randrange(0, 2)) text_area.send_keys(Keys.ENTER) time.sleep(3) lnks = driver.find_elements(By.XPATH, "//a[@href]") curr_url = 0 seen_urls = set() i = 0 for lnk in lnks: _url = lnk.get_attribute("href") _title = lnk.get_attribute("text") # Get the title attribute i += 1 if _url not in seen_urls: seen_urls.add(_url) output.append( f"\033[1m {Colors.MINT}{_title.strip()}\033[0m\n{_url}\n" ) except NoSuchElementException as e: c_log.error(e) c_log.error("No element\n\t'-> continuing") except ElementNotVisibleException as e: c_log.error(e) c_log.error("Element not visible\n\t'-> continuing") except TimeoutException as e: c_log.error(e) c_log.error("Timed out\n\t'-> continuing") return output
[docs] def search_engines( query: str, header: bool = False, proxy: Optional[str] = None, log: bool = True, browse = False ) -> str | int: output: list[str] = [] ddg_links = search_ddg(query, header=header, proxy=proxy, log=log) output.extend(ddg_links) # Collect links from Google ggl_links = search_ggl(query, header=header, proxy=proxy, log=log) output.extend(ggl_links) if browse: for index, line in enumerate(output, start=1): print(f"{index}. {line}") prompt = ( "\n[q]uit, [h]istory ([<] back in history 1) " "or Enter page number: " ) while page_num := int(input(prompt)): if page_num not in ["q", "<"]: print(f"Navigating to {url_index[page_num - 2]}") formatted_string = ( f"{Colors.PASTEL_PINK}[<----] {Colors.PASTEL_MINT}[---->] " f"{Colors.PASTEL_CYAN}{url_index[int(page_num) - 1]}\n" f"{Colors.PASTEL_BLUE}[HISTORY]\n" ) print(formatted_string) return (url_index[page_num - 2]) else: return "\n".join(output)