如果需要搜索引擎获取到搜索的内容,又不想支付其他费用,同时要求返回的格式为json格式,则可以使用 DuckDuckGo API
代码如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import argparse import json import time from itertools import islice from typing import Union from duckduckgo_search import DDGS DUCKDUCKGO_MAX_ATTEMPTS = 3 def safe_duckduckdo_results(results: Union[str, list]) -> str: """ Return the results of a Google search in a safe format. Args: results (Union[str, list]): The search results. Returns: str: The results of the search. """ if isinstance(results, list): safe_message = json.dumps( [result.encode("utf-8", "ignore").decode("utf-8") for result in results] ) else: safe_message = results.encode("utf-8", "ignore").decode("utf-8") return safe_message def web_search(query: str, num_results: int = 8) -> str: """Return the results of a Google search Args: query (str): The search query. num_results (int): The number of results to return. Returns: str: The results of the search. """ search_results = [] attempts = 0 while attempts < DUCKDUCKGO_MAX_ATTEMPTS: if not query: return json.dumps(search_results) results = DDGS().text(query) search_results = list(islice(results, num_results)) if search_results: break time.sleep(1) attempts += 1 results = json.dumps(search_results, ensure_ascii=False, indent=4) return safe_duckduckdo_results(results) if __name__ == "__main__": parser = argparse.ArgumentParser(description='Web search using DuckDuckGo') parser.add_argument('query', type=str, help='The search query') parser.add_argument('--num_results', type=int, default=8, help='The number of results to return') args = parser.parse_args() results = web_search(args.query, args.num_results) results = json.loads(results) print(results) |