From a09672f1a5f69f4bdbd9025c41dd48ff516d166d Mon Sep 17 00:00:00 2001 From: Scott Wallace Date: Wed, 29 May 2019 19:27:47 +0100 Subject: [PATCH] Fleshed out Searx role --- .../roles/searx_server/files/settings.yaml | 794 ++++++++++++++++++ .ansible/roles/searx_server/files/uwsgi.ini | 27 + .ansible/roles/searx_server/tasks/main.yaml | 47 +- 3 files changed, 863 insertions(+), 5 deletions(-) create mode 100644 .ansible/roles/searx_server/files/settings.yaml create mode 100644 .ansible/roles/searx_server/files/uwsgi.ini diff --git a/.ansible/roles/searx_server/files/settings.yaml b/.ansible/roles/searx_server/files/settings.yaml new file mode 100644 index 0000000..9f79032 --- /dev/null +++ b/.ansible/roles/searx_server/files/settings.yaml @@ -0,0 +1,794 @@ +general: + debug : False # Debug mode, only for development + instance_name : "searx" # displayed name + +search: + safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict + autocomplete : "startpage" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "wikipedia" - leave blank to turn it off by default + language : "en-US" + ban_time_on_fail : 5 # ban time in seconds after engine errors + max_ban_time_on_fail : 120 # max ban time in seconds after engine errors + +server: + port : 8889 + bind_address : "::" # address to listen on + secret_key : "56ef02b698cd6995ace2156778a5697d" # change this! + base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/" + image_proxy : True # Proxying image results through searx + http_protocol_version : "1.0" # 1.0 and 1.1 are supported + +ui: + static_path : "/usr/share/python3-searx/static" # Custom static path - leave it blank if you didn't change + templates_path : "/usr/share/python3-searx/templates" # Custom templates path - leave it blank if you didn't change + default_theme : oscar # ui theme + default_locale : "" # Default interface locale - leave blank to detect from browser information or use codes from the 'locales' config section + theme_args : + oscar_style : logicodev # default style of oscar + +# searx supports result proxification using an external service: https://github.com/asciimoo/morty +# uncomment below section if you have running morty proxy +#result_proxy: +# url : http://127.0.0.1:3000/ +# key : your_morty_proxy_key + +outgoing: # communication with search engines + request_timeout : 5.0 # seconds + useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator + pool_connections : 100 # Number of different hosts + pool_maxsize : 40 # Number of simultaneous requests by host +# uncomment below section if you want to use a proxy +# see http://docs.python-requests.org/en/latest/user/advanced/#proxies +# SOCKS proxies are also supported: see http://docs.python-requests.org/en/master/user/advanced/#socks +# proxies : +# http : http://127.0.0.1:8080 +# https: http://127.0.0.1:8080 +# uncomment below section only if you have more than one network interface +# which can be the source of outgoing search requests +# source_ips: +# - 1.1.1.1 +# - 1.1.1.2 + +engines: + # - name: apk mirror + # engine: apkmirror + # timeout: 4.0 + # shortcut: apkm + # disabled: True + + - name : arch linux wiki + engine : archlinux + shortcut : al + + - name : archive is + engine : xpath + search_url : https://archive.is/{query} + url_xpath : (//div[@class="TEXT-BLOCK"]/a)/@href + title_xpath : (//div[@class="TEXT-BLOCK"]/a) + content_xpath : //div[@class="TEXT-BLOCK"]/ul/li + categories : general + timeout : 7.0 + disabled : True + shortcut : ai + + - name : arxiv + engine : arxiv + shortcut : arx + categories : science + timeout : 4.0 + + - name : base + engine : base + shortcut : bs + + - name : wikipedia + engine : wikipedia + shortcut : wp + base_url : 'https://{language}.wikipedia.org/' + + - name : bing + engine : bing + shortcut : bi + + - name : bing images + engine : bing_images + shortcut : bii + + - name : bing news + engine : bing_news + shortcut : bin + + - name : bing videos + engine : bing_videos + shortcut : biv + + - name : bitbucket + engine : xpath + paging : True + search_url : https://bitbucket.org/repo/all/{pageno}?name={query} + url_xpath : //article[@class="repo-summary"]//a[@class="repo-link"]/@href + title_xpath : //article[@class="repo-summary"]//a[@class="repo-link"] + content_xpath : //article[@class="repo-summary"]/p + categories : it + timeout : 4.0 + disabled : True + shortcut : bb + + - name : ccc-tv + engine : xpath + paging : False + search_url : https://media.ccc.de/search/?q={query} + url_xpath : //div[@class="caption"]/h3/a/@href + title_xpath : //div[@class="caption"]/h3/a/text() + content_xpath : //div[@class="caption"]/h4/@title + categories : videos + disabled : True + shortcut : c3tv + + - name : crossref + engine : json_engine + paging : True + search_url : https://search.crossref.org/dois?q={query}&page={pageno} + url_query : doi + title_query : title + content_query : fullCitation + categories : science + shortcut : cr + + - name : currency + engine : currency_convert + categories : general + shortcut : cc + + - name : deezer + engine : deezer + shortcut : dz + + - name : deviantart + engine : deviantart + shortcut : da + timeout: 3.0 + + - name : ddg definitions + engine : duckduckgo_definitions + shortcut : ddd + weight : 2 + disabled : True + + - name : digbt + engine : digbt + shortcut : dbt + timeout : 6.0 + disabled : True + + - name : digg + engine : digg + shortcut : dg + + - name : erowid + engine : xpath + paging : True + first_page_num : 0 + page_size : 30 + search_url : https://www.erowid.org/search.php?q={query}&s={pageno} + url_xpath : //dl[@class="results-list"]/dt[@class="result-title"]/a/@href + title_xpath : //dl[@class="results-list"]/dt[@class="result-title"]/a/text() + content_xpath : //dl[@class="results-list"]/dd[@class="result-details"] + categories : general + shortcut : ew + disabled : True + + - name : wikidata + engine : wikidata + shortcut : wd + timeout : 3.0 + weight : 2 + + - name : duckduckgo + engine : duckduckgo + shortcut : ddg + disabled : True + + - name : duckduckgo images + engine : duckduckgo_images + shortcut : ddi + timeout: 3.0 + disabled : True + + - name : etymonline + engine : xpath + paging : True + search_url : http://etymonline.com/?search={query}&p={pageno} + url_xpath : //a[contains(@class, "word--")]/@href + title_xpath : //p[contains(@class, "word__name--")]/text() + content_xpath : //section[contains(@class, "word__defination")]/object + first_page_num : 0 + shortcut : et + disabled : True + + - name : faroo + engine : faroo + shortcut : fa + disabled : True + + - name : 1x + engine : www1x + shortcut : 1x + disabled : True + + - name : fdroid + engine : fdroid + shortcut : fd + disabled : True + + - name : flickr + categories : images + shortcut : fl +# You can use the engine using the official stable API, but you need an API key +# See : https://www.flickr.com/services/apps/create/ +# engine : flickr +# api_key: 'apikey' # required! +# Or you can use the html non-stable engine, activated by default + engine : flickr_noapi + + - name : free software directory + engine : mediawiki + shortcut : fsd + categories : it + base_url : https://directory.fsf.org/ + number_of_results : 5 +# what part of a page matches the query string: title, text, nearmatch +# title - query matches title, text - query matches the text of page, nearmatch - nearmatch in title + search_type : title + timeout : 5.0 + disabled : True + + - name : frinkiac + engine : frinkiac + shortcut : frk + disabled : True + + - name : genius + engine : genius + shortcut : gen + + - name : gigablast + engine : gigablast + shortcut : gb + timeout : 3.0 + disabled: True + + - name : gentoo + engine : gentoo + shortcut : ge + + - name : gitlab + engine : json_engine + paging : True + search_url : https://gitlab.com/api/v4/projects?search={query}&page={pageno} + url_query : web_url + title_query : name_with_namespace + content_query : description + page_size : 20 + categories : it + shortcut : gl + timeout : 10.0 + disabled : True + + - name : github + engine : github + shortcut : gh + + - name : google + engine : google + shortcut : go + disabled : True + + - name : google images + engine : google_images + shortcut : goi + + - name : google news + engine : google_news + shortcut : gon + + - name : google videos + engine : google_videos + shortcut : gov + + - name : google scholar + engine : xpath + paging : True + search_url : https://scholar.google.com/scholar?start={pageno}&q={query}&hl=en&as_sdt=0,5&as_vis=1 + results_xpath : //div[contains(@class, "gs_r")]/div[@class="gs_ri"] + url_xpath : .//h3/a/@href + title_xpath : .//h3/a + content_xpath : .//div[@class="gs_rs"] + suggestion_xpath : //div[@id="gs_res_ccl_top"]//a/b + page_size : 10 + first_page_num : 0 + categories : science + shortcut : gos + + - name : google play apps + engine : xpath + search_url : https://play.google.com/store/search?q={query}&c=apps + url_xpath : //a[@class="title"]/@href + title_xpath : //a[@class="title"] + content_xpath : //a[@class="subtitle"] + categories : files + shortcut : gpa + disabled : True + + - name : google play movies + engine : xpath + search_url : https://play.google.com/store/search?q={query}&c=movies + url_xpath : //a[@class="title"]/@href + title_xpath : //a[@class="title"]/@title + content_xpath : //a[contains(@class, "subtitle")] + categories : videos + shortcut : gpm + disabled : True + + - name : google play music + engine : xpath + search_url : https://play.google.com/store/search?q={query}&c=music + url_xpath : //a[@class="title"]/@href + title_xpath : //a[@class="title"] + content_xpath : //a[@class="subtitle"] + categories : music + shortcut : gps + disabled : True + + - name : geektimes + engine : xpath + paging : True + search_url : https://geektimes.ru/search/page{pageno}/?q={query} + url_xpath : //article[contains(@class, "post")]//a[@class="post__title_link"]/@href + title_xpath : //article[contains(@class, "post")]//a[@class="post__title_link"] + content_xpath : //article[contains(@class, "post")]//div[contains(@class, "post__text")] + categories : it + timeout : 4.0 + disabled : True + shortcut : gt + + - name : habrahabr + engine : xpath + paging : True + search_url : https://habrahabr.ru/search/page{pageno}/?q={query} + url_xpath : //article[contains(@class, "post")]//a[@class="post__title_link"]/@href + title_xpath : //article[contains(@class, "post")]//a[@class="post__title_link"] + content_xpath : //article[contains(@class, "post")]//div[contains(@class, "post__text")] + categories : it + timeout : 4.0 + disabled : True + shortcut : habr + + - name : hoogle + engine : json_engine + paging : True + search_url : https://www.haskell.org/hoogle/?mode=json&hoogle={query}&start={pageno} + results_query : results + url_query : location + title_query : self + content_query : docs + page_size : 20 + categories : it + shortcut : ho + + - name : ina + engine : ina + shortcut : in + timeout : 6.0 + disabled : True + + - name: kickass + engine : kickass + shortcut : kc + timeout : 4.0 + disabled : True + + - name : library genesis + engine : xpath + search_url : http://libgen.io/search.php?req={query} + url_xpath : //a[contains(@href,"bookfi.net")]/@href + title_xpath : //a[contains(@href,"book/")]/text()[1] + content_xpath : //td/a[1][contains(@href,"=author")]/text() + categories : general + timeout : 7.0 + disabled : True + shortcut : lg + + - name : lobste.rs + engine : xpath + search_url : https://lobste.rs/search?utf8=%E2%9C%93&q={query}&what=stories&order=relevance + results_xpath : //li[contains(@class, "story")] + url_xpath : .//span[@class="link"]/a/@href + title_xpath : .//span[@class="link"]/a + content_xpath : .//a[@class="domain"] + categories : it + shortcut : lo + + - name : microsoft academic + engine : microsoft_academic + categories : science + shortcut : ma + + - name : mixcloud + engine : mixcloud + shortcut : mc + + - name : nyaa + engine : nyaa + shortcut : nt + disabled : True + + - name : acgsou + engine : acgsou + shortcut : acg + disabled : True + timeout: 5.0 + + - name : openairedatasets + engine : json_engine + paging : True + search_url : http://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query} + results_query : response/results/result + url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query : metadata/oaf:entity/oaf:result/title/$ + content_query : metadata/oaf:entity/oaf:result/description/$ + categories : science + shortcut : oad + timeout: 5.0 + + - name : openairepublications + engine : json_engine + paging : True + search_url : http://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query} + results_query : response/results/result + url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query : metadata/oaf:entity/oaf:result/title/$ + content_query : metadata/oaf:entity/oaf:result/description/$ + categories : science + shortcut : oap + timeout: 5.0 + + - name : openstreetmap + engine : openstreetmap + shortcut : osm + + - name : openrepos + engine : xpath + paging : True + search_url : https://openrepos.net/search/node/{query}?page={pageno} + url_xpath : //li[@class="search-result"]//h3[@class="title"]/a/@href + title_xpath : //li[@class="search-result"]//h3[@class="title"]/a + content_xpath : //li[@class="search-result"]//div[@class="search-snippet-info"]//p[@class="search-snippet"] + categories : files + timeout : 4.0 + disabled : True + shortcut : or + + - name : pdbe + engine : pdbe + shortcut : pdb +# Hide obsolete PDB entries. +# Default is not to hide obsolete structures +# hide_obsolete : False + + - name : photon + engine : photon + shortcut : ph + + - name : piratebay + engine : piratebay + shortcut : tpb + url: https://pirateproxy.red/ + timeout : 3.0 + + - name : pubmed + engine : pubmed + shortcut : pub + categories: science + timeout : 3.0 + + - name : qwant + engine : qwant + shortcut : qw + categories : general + disabled : True + + - name : qwant images + engine : qwant + shortcut : qwi + categories : images + + - name : qwant news + engine : qwant + shortcut : qwn + categories : news + + - name : qwant social + engine : qwant + shortcut : qws + categories : social media + + - name : reddit + engine : reddit + shortcut : re + page_size : 25 + timeout : 10.0 + + - name : scanr structures + shortcut: scs + engine : scanr_structures + disabled : True + + - name : soundcloud + engine : soundcloud + shortcut : sc + + - name : stackoverflow + engine : stackoverflow + shortcut : st + + - name : searchcode doc + engine : searchcode_doc + shortcut : scd + + - name : searchcode code + engine : searchcode_code + shortcut : scc + disabled : True + + - name : framalibre + engine : framalibre + shortcut : frl + disabled : True + +# - name : searx +# engine : searx_engine +# shortcut : se +# instance_urls : +# - http://127.0.0.1:8888/ +# - ... +# disabled : True + + - name : semantic scholar + engine : xpath + paging : True + search_url : https://www.semanticscholar.org/search?q={query}&sort=relevance&page={pageno}&ae=false + results_xpath : //article + url_xpath : .//div[@class="search-result-title"]/a/@href + title_xpath : .//div[@class="search-result-title"]/a + content_xpath : .//div[@class="search-result-abstract"] + shortcut : se + categories : science + + - name : spotify + engine : spotify + shortcut : stf + + - name : subtitleseeker + engine : subtitleseeker + shortcut : ss +# The language is an option. You can put any language written in english +# Examples : English, French, German, Hungarian, Chinese... +# language : English + + - name : startpage + engine : startpage + shortcut : sp + timeout : 6.0 + + - name : swisscows + engine : swisscows + shortcut : sw + disabled : True + + - name : tokyotoshokan + engine : tokyotoshokan + shortcut : tt + timeout : 6.0 + disabled : True + + - name : torrentz + engine : torrentz + shortcut : tor + url: https://torrentz2.eu/ + timeout : 3.0 + + - name : twitter + engine : twitter + shortcut : tw + +# maybe in a fun category +# - name : uncyclopedia +# engine : mediawiki +# shortcut : unc +# base_url : https://uncyclopedia.wikia.com/ +# number_of_results : 5 + +# tmp suspended - too slow, too many errors +# - name : urbandictionary +# engine : xpath +# search_url : http://www.urbandictionary.com/define.php?term={query} +# url_xpath : //*[@class="word"]/@href +# title_xpath : //*[@class="def-header"] +# content_xpath : //*[@class="meaning"] +# shortcut : ud + + # - name : unsplash + # engine : unsplash + # disabled: True + # shortcut : us + + - name : yahoo + engine : yahoo + shortcut : yh + disabled : True + + - name : yandex + engine : yandex + shortcut : yn + disabled : True + + - name : yahoo news + engine : yahoo_news + shortcut : yhn + + - name : youtube + shortcut : yt + # You can use the engine using the official stable API, but you need an API key + # See : https://console.developers.google.com/project + # engine : youtube_api + # api_key: 'apikey' # required! + # Or you can use the html non-stable engine, activated by default + engine : youtube_noapi + + - name : dailymotion + engine : dailymotion + shortcut : dm + + - name : vimeo + engine : vimeo + shortcut : vm + + - name : wolframalpha + shortcut : wa + # You can use the engine using the official stable API, but you need an API key + # See : http://products.wolframalpha.com/api/ + # engine : wolframalpha_api + # api_key: '' # required! + engine : wolframalpha_noapi + timeout: 6.0 + categories : science + + - name : seedpeer + engine : seedpeer + shortcut: speu + categories: files, music, videos + disabled: True + + - name : dictzone + engine : dictzone + shortcut : dc + + - name : mymemory translated + engine : translated + shortcut : tl + timeout : 5.0 + disabled : True + # You can use without an API key, but you are limited to 1000 words/day + # See : http://mymemory.translated.net/doc/usagelimits.php + # api_key : '' + + - name : voat + engine: xpath + shortcut: vo + categories: social media + search_url : https://searchvoat.co/?t={query} + url_xpath : //div[@class="entry"]/p/a[@class="title"]/@href + title_xpath : //div[@class="entry"]/p/a[@class="title"] + content_xpath : //div[@class="entry"]/p/span[@class="domain"] + timeout : 10.0 + disabled : True + + - name : 1337x + engine : 1337x + shortcut : 1337x + disabled : True + + - name : Duden + engine : duden + shortcut : du + disabled : True + + - name : seznam + shortcut: szn + engine: xpath + paging : True + search_url : https://search.seznam.cz/?q={query}&count=10&from={pageno} + results_xpath: //div[@class="Page-content"]//div[@class="Result "] + url_xpath : ./h3/a/@href + title_xpath : ./h3 + content_xpath : .//p[@class="Result-description"] + first_page_num : 0 + page_size : 10 + disabled : True + + - name : mojeek + shortcut: mjk + engine: xpath + paging : True + search_url : https://www.mojeek.com/search?q={query}&s={pageno} + results_xpath: /html/body//div[@class="results"]/ul[@class="results-standard"]/li + url_xpath : ./h2/a/@href + title_xpath : ./h2 + content_xpath : ./p[@class="s"] + suggestion_xpath : /html/body//div[@class="top-info"]/p[@class="top-info spell"]/a + first_page_num : 1 + page_size : 10 + disabled : True + +# - name : yacy +# engine : yacy +# shortcut : ya +# base_url : 'http://localhost:8090' +# number_of_results : 5 +# timeout : 3.0 + +# Doku engine lets you access to any Doku wiki instance: +# A public one or a privete/corporate one. +# - name : ubuntuwiki +# engine : doku +# shortcut : uw +# base_url : 'http://doc.ubuntu-fr.org' + +locales: + en : English + ar : العَرَبِيَّة (Arabic) + bg : Български (Bulgarian) + ca : Català (Catalan) + cs : Čeština (Czech) + cy : Cymraeg (Welsh) + da : Dansk (Danish) + de : Deutsch (German) + el_GR : Ελληνικά (Greek_Greece) + eo : Esperanto (Esperanto) + es : Español (Spanish) + eu : Euskara (Basque) + fa_IR : (fārsī) فارسى (Persian) + fi : Suomi (Finnish) + fil : Wikang Filipino (Filipino) + fr : Français (French) + gl : Galego (Galician) + he : עברית (Hebrew) + hr : Hrvatski (Croatian) + hu : Magyar (Hungarian) + it : Italiano (Italian) + ja : 日本語 (Japanese) + nl : Nederlands (Dutch) + nl_BE : Vlaams (Dutch_Belgium) + pl : Polski (Polish) + pt : Português (Portuguese) + pt_BR : Português (Portuguese_Brazil) + ro : Română (Romanian) + ru : Русский (Russian) + sk : Slovenčina (Slovak) + sl : Slovenski (Slovene) + sr : српски (Serbian) + sv : Svenska (Swedish) + te : తెలుగు (telugu) + tr : Türkçe (Turkish) + uk : українська мова (Ukrainian) + vi : tiếng việt (㗂越) + zh : 中文 (Chinese) + zh_TW : 國語 (Taiwanese Mandarin) + +doi_resolvers : + oadoi.org : 'https://oadoi.org/' + doi.org : 'https://doi.org/' + doai.io : 'http://doai.io/' + sci-hub.tw : 'http://sci-hub.tw/' + +default_doi_resolver : 'oadoi.org' diff --git a/.ansible/roles/searx_server/files/uwsgi.ini b/.ansible/roles/searx_server/files/uwsgi.ini new file mode 100644 index 0000000..79da2b7 --- /dev/null +++ b/.ansible/roles/searx_server/files/uwsgi.ini @@ -0,0 +1,27 @@ +[uwsgi] +# Who will run the code +uid = www-data +gid = www-data + +# disable logging for privacy +#disable-logging = true + +# Number of workers (usually CPU count) +workers = 4 + +# The right granted on the created socket +chmod-socket = 666 +http-socket = [::]:8888 + +# Plugin to use and interpretor config +single-interpreter = true +master = true +plugin = python3 +enable-threads = true +lazy-apps = true + +# Module to import +module = searx.webapp + +pythonpath = /usr/lib/python3/dist-packages/searx/ +chdir = /usr/lib/python3/dist-packages/searx/ diff --git a/.ansible/roles/searx_server/tasks/main.yaml b/.ansible/roles/searx_server/tasks/main.yaml index 86a5c8b..a583e30 100644 --- a/.ansible/roles/searx_server/tasks/main.yaml +++ b/.ansible/roles/searx_server/tasks/main.yaml @@ -1,8 +1,45 @@ -- name: 'Searx: package' +- name: 'Searx' tags: - install - searx - package: - name: searx - state: latest - become: yes + block: + - name: 'Searx: package' + package: + name: "{{ item }}" + state: latest + become: yes + loop: + - searx + - uwsgi + - uwsgi-plugin-python3 + + - name: 'Searx: Config directory' + file: + state: directory + path: '/etc/searx' + become: yes + + - name: 'Searx: Config' + copy: + dest: '/etc/searx/settings.yml' + src: 'files/settings.yaml' + become: yes + + - name: 'Searx: UWSGI integration' + copy: + dest: '/etc/uwsgi/apps-available/searx.ini' + src: 'files/uwsgi.ini' + become: yes + + - name: 'Searx: UWSGI enable' + file: + dest: '/etc/uwsgi/apps-enabled/searx.ini' + src: '/etc/uwsgi/apps-available/searx.ini' + state: link + become: yes + + - name: 'Searx: UWSGI service' + systemd: + name: 'uwsgi' + state: started + become: yes