Changeset - 3d7ba590f6f5
[Not reviewed]
default
0 4 0
Mads Kiilerich (mads) - 5 years ago 2021-05-09 22:34:02
mads@kiilerich.com
Grafted from: 9355431c99ac
auth: only use X- headers instead of REMOTE_ADDR if explicitly told so in remote_addr_header

Before, X-Forwarded-For (and others) headers would *always* be trusted blindly,
also in setups without a proxy server. It would thus in some cases be
possible for users to fake their IP, and thus potentially be possible to bypass
IP restrictions configured in Kallithea.

Fixed by making it configurable which WSGI environment variable to use for the
remote address. Users can configure remote_addr_header to for example
HTTP_X_FORWARDED_FOR instead of using the default REMOTE_ADDR.

This change is a bit similar to what is going on in the https_fixup middleware,
but is doing a bit more of what for example is happening in similar code in
werkzeug/middleware/proxy_fix.py .
4 files changed with 18 insertions and 16 deletions:
0 comments (0 inline, 0 general)
development.ini
Show inline comments
 
@@ -66,96 +66,99 @@ smtp_use_tls = false
 
#host = 127.0.0.1
 
host = 0.0.0.0
 
port = 5000
 

	
 
## Gearbox serve uses the Waitress web server ##
 
use = egg:waitress#main
 
## avoid multi threading
 
threads = 1
 
## allow push of repos bigger than the default of 1 GB
 
max_request_body_size = 107374182400
 
## use poll instead of select, fixes fd limits, may not work on old
 
## windows systems.
 
#asyncore_use_poll = True
 

	
 
## middleware for hosting the WSGI application under a URL prefix
 
#[filter:proxy-prefix]
 
#use = egg:PasteDeploy#prefix
 
#prefix = /<your-prefix>
 

	
 
[app:main]
 
use = egg:kallithea
 
## enable proxy prefix middleware
 
#filter-with = proxy-prefix
 

	
 
full_stack = true
 
static_files = true
 

	
 
## Internationalization (see setup documentation for details)
 
## By default, the languages requested by the browser are used if available, with English as default.
 
## Set i18n.enabled=false to disable automatic language choice.
 
#i18n.enabled = true
 
## To Force a language, set i18n.enabled=false and specify the language in i18n.lang.
 
## Valid values are the names of subdirectories in kallithea/i18n with a LC_MESSAGES/kallithea.mo
 
#i18n.lang = en
 

	
 
cache_dir = %(here)s/data
 
index_dir = %(here)s/data/index
 

	
 
## uncomment and set this path to use archive download cache
 
archive_cache_dir = %(here)s/data/tarballcache
 

	
 
## change this to unique ID for security
 
#app_instance_uuid = VERY-SECRET
 
app_instance_uuid = development-not-secret
 

	
 
## cut off limit for large diffs (size in bytes)
 
cut_off_limit = 256000
 

	
 
## WSGI environment variable to get the IP address of the client (default REMOTE_ADDR)
 
#remote_addr_variable = HTTP_X_FORWARDED_FOR
 

	
 
## always pretend the client connected using HTTPS (default false)
 
#force_https = true
 

	
 
## use Strict-Transport-Security headers (default false)
 
#use_htsts = true
 

	
 
## number of commits stats will parse on each iteration
 
commit_parse_limit = 25
 

	
 
## Path to Python executable to be used for git hooks.
 
## This value will be written inside the git hook scripts as the text
 
## after '#!' (shebang). When empty or not defined, the value of
 
## 'sys.executable' at the time of installation of the git hooks is
 
## used, which is correct in many cases but for example not when using uwsgi.
 
## If you change this setting, you should reinstall the Git hooks via
 
## Admin > Settings > Remap and Rescan.
 
#git_hook_interpreter = /srv/kallithea/venv/bin/python3
 

	
 
## path to git executable
 
git_path = git
 

	
 
## git rev filter option, --all is the default filter, if you need to
 
## hide all refs in changelog switch this to --branches --tags
 
#git_rev_filter = --branches --tags
 

	
 
## RSS feed options
 
rss_cut_off_limit = 256000
 
rss_items_per_page = 10
 
rss_include_diff = false
 

	
 
## options for showing and identifying changesets
 
show_sha_length = 12
 
show_revision_number = false
 

	
 
## Canonical URL to use when creating full URLs in UI and texts.
 
## Useful when the site is available under different names or protocols.
 
## Defaults to what is provided in the WSGI environment.
 
#canonical_url = https://kallithea.example.com/repos
 

	
 
## gist URL alias, used to create nicer urls for gist. This should be an
 
## url that does rewrites to _admin/gists/<gistid>.
 
## example: http://gist.example.com/{gistid}. Empty means use the internal
 
## Kallithea url, ie. http[s]://kallithea.example.com/_admin/gists/<gistid>
 
gist_alias_url =
 

	
 
## default encoding used to convert from and to unicode
 
## can be also a comma separated list of encoding in case of mixed encodings
 
default_encoding = utf-8
docs/setup.rst
Show inline comments
 
@@ -378,98 +378,102 @@ Kallithea will in most setups work perfe
 
Celery), executing all tasks in the web server process. Some tasks can however
 
take some time to run and it can be better to run such tasks asynchronously in
 
a separate process so the web server can focus on serving web requests.
 

	
 
For installation and configuration of Celery, see the `Celery documentation`_.
 
Note that Celery requires a message broker service like RabbitMQ_ (recommended)
 
or Redis_.
 

	
 
The use of Celery is configured in the Kallithea ini configuration file.
 
To enable it, simply set::
 

	
 
  use_celery = true
 

	
 
and add or change the ``celery.*`` configuration variables.
 

	
 
Configuration settings are prefixed with 'celery.', so for example setting
 
`broker_url` in Celery means setting `celery.broker_url` in the configuration
 
file.
 

	
 
To start the Celery process, run::
 

	
 
  kallithea-cli celery-run -c my.ini
 

	
 
Extra options to the Celery worker can be passed after ``--`` - see ``-- -h``
 
for more info.
 

	
 
.. note::
 
   Make sure you run this command from the same virtualenv, and with the same
 
   user that Kallithea runs.
 

	
 

	
 
Proxy setups
 
------------
 

	
 
When Kallithea is processing HTTP requests from a user, it will see and use
 
some of the basic properties of the connection, both at the TCP/IP level and at
 
the HTTP level. The WSGI server will provide this information to Kallithea in
 
the "environment".
 

	
 
In some setups, a proxy server will take requests from users and forward
 
them to the actual Kallithea server. The proxy server will thus be the
 
immediate client of the Kallithea WSGI server, and Kallithea will basically see
 
it as such. To make sure Kallithea sees the request as it arrived from the
 
client to the proxy server, the proxy server must be configured to
 
somehow pass the original information on to Kallithea, and Kallithea must be
 
configured to pick that information up and trust it.
 

	
 
Kallithea will by default rely on its WSGI server to provide the IP of the
 
client in the WSGI environment as ``REMOTE_ADDR``, but it can also
 
get it from the ``X-Real-IP`` or ``X-Forwarded-For`` HTTP headers.
 
client in the WSGI environment as ``REMOTE_ADDR``, but it can be configured to
 
get it from an HTTP header that has been set by the proxy server. For
 
example, if the proxy server puts the client IP in the ``X-Forwarded-For``
 
HTTP header, set::
 

	
 
    remote_addr_variable = HTTP_X_FORWARDED_FOR
 

	
 
Kallithea will by default rely on finding the protocol (``http`` or ``https``)
 
in the WSGI environment as ``wsgi.url_scheme``. If the proxy server puts
 
the protocol of the client request in the ``X-Url-Scheme``,
 
``X-Forwarded-Scheme``, or ``X-Forwarded-Proto`` HTTP header,
 
Kallithea can be configured to trust these headers by setting::
 

	
 
    https_fixup = true
 

	
 

	
 
HTTPS support
 
-------------
 

	
 
Kallithea will by default generate URLs based on the WSGI environment.
 

	
 
Alternatively, you can use some special configuration settings to control
 
directly which scheme/protocol Kallithea will use when generating URLs:
 

	
 
- With ``https_fixup = true``, the scheme will be taken from the
 
  ``X-Url-Scheme``, ``X-Forwarded-Scheme`` or ``X-Forwarded-Proto`` HTTP header
 
  (default ``http``).
 
- With ``force_https = true``, the scheme will be seen as ``https``.
 
- With ``use_htsts = true``, Kallithea will set ``Strict-Transport-Security`` when using https.
 

	
 
.. _nginx_virtual_host:
 

	
 

	
 
Nginx virtual host example
 
--------------------------
 

	
 
Sample config for Nginx using proxy:
 

	
 
.. code-block:: nginx
 

	
 
    upstream kallithea {
 
        server 127.0.0.1:5000;
 
        # add more instances for load balancing
 
        #server 127.0.0.1:5001;
 
        #server 127.0.0.1:5002;
 
    }
 

	
 
    ## gist alias
 
    server {
 
       listen          443;
 
       server_name     gist.example.com;
 
       access_log      /var/log/nginx/gist.access.log;
 
       error_log       /var/log/nginx/gist.error.log;
 

	
kallithea/controllers/base.py
Show inline comments
 
@@ -36,110 +36,102 @@ import warnings
 

	
 
import decorator
 
import paste.auth.basic
 
import paste.httpexceptions
 
import paste.httpheaders
 
import webob.exc
 
from tg import TGController, config, render_template, request, response, session
 
from tg import tmpl_context as c
 
from tg.i18n import ugettext as _
 

	
 
import kallithea
 
from kallithea.lib import auth_modules, ext_json, webutils
 
from kallithea.lib.auth import AuthUser, HasPermissionAnyMiddleware
 
from kallithea.lib.exceptions import UserCreationError
 
from kallithea.lib.utils import get_repo_slug, is_valid_repo
 
from kallithea.lib.utils2 import AttributeDict, asbool, ascii_bytes, safe_int, safe_str, set_hook_environment
 
from kallithea.lib.vcs.exceptions import ChangesetDoesNotExistError, EmptyRepositoryError, RepositoryError
 
from kallithea.lib.webutils import url
 
from kallithea.model import db, meta
 
from kallithea.model.scm import ScmModel
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def render(template_path):
 
    return render_template({'url': url}, 'mako', template_path)
 

	
 

	
 
def _filter_proxy(ip):
 
    """
 
    HTTP_X_FORWARDED_FOR headers can have multiple IP addresses, with the
 
    leftmost being the original client. Each proxy that is forwarding the
 
    request will usually add the IP address it sees the request coming from.
 

	
 
    The client might have provided a fake leftmost value before hitting the
 
    first proxy, so if we have a proxy that is adding one IP address, we can
 
    only trust the rightmost address.
 
    """
 
    if ',' in ip:
 
        _ips = ip.split(',')
 
        _first_ip = _ips[-1].strip()
 
        log.debug('Got multiple IPs %s, using %s', ','.join(_ips), _first_ip)
 
        return _first_ip
 
    return ip
 

	
 

	
 
def get_ip_addr(environ):
 
    proxy_key = 'HTTP_X_REAL_IP'
 
    proxy_key2 = 'HTTP_X_FORWARDED_FOR'
 
    def_key = 'REMOTE_ADDR'
 

	
 
    ip = environ.get(proxy_key)
 
    if ip:
 
        return _filter_proxy(ip)
 

	
 
    ip = environ.get(proxy_key2)
 
    if ip:
 
        return _filter_proxy(ip)
 

	
 
    ip = environ.get(def_key, '0.0.0.0')
 
    return _filter_proxy(ip)
 
    """The web server will set REMOTE_ADDR to the unfakeable IP layer client IP address.
 
    If using a proxy server, make it possible to use another value, such as
 
    the X-Forwarded-For header, by setting `remote_addr_variable = HTTP_X_FORWARDED_FOR`.
 
    """
 
    remote_addr_variable = kallithea.CONFIG.get('remote_addr_variable', 'REMOTE_ADDR')
 
    return _filter_proxy(environ.get(remote_addr_variable, '0.0.0.0'))
 

	
 

	
 
def get_path_info(environ):
 
    """Return PATH_INFO from environ ... using tg.original_request if available.
 

	
 
    In Python 3 WSGI, PATH_INFO is a unicode str, but kind of contains encoded
 
    bytes. The code points are guaranteed to only use the lower 8 bit bits, and
 
    encoding the string with the 1:1 encoding latin1 will give the
 
    corresponding byte string ... which then can be decoded to proper unicode.
 
    """
 
    org_req = environ.get('tg.original_request')
 
    if org_req is not None:
 
        environ = org_req.environ
 
    return safe_str(environ['PATH_INFO'].encode('latin1'))
 

	
 

	
 
def log_in_user(user, remember, is_external_auth, ip_addr):
 
    """
 
    Log a `User` in and update session and cookies. If `remember` is True,
 
    the session cookie is set to expire in a year; otherwise, it expires at
 
    the end of the browser session.
 

	
 
    Returns populated `AuthUser` object.
 
    """
 
    # It should not be possible to explicitly log in as the default user.
 
    assert not user.is_default_user, user
 

	
 
    auth_user = AuthUser.make(dbuser=user, is_external_auth=is_external_auth, ip_addr=ip_addr)
 
    if auth_user is None:
 
        return None
 

	
 
    user.update_lastlogin()
 
    meta.Session().commit()
 

	
 
    # Start new session to prevent session fixation attacks.
 
    session.invalidate()
 
    session['authuser'] = cookie = auth_user.to_cookie()
 

	
 
    # If they want to be remembered, update the cookie.
 
    # NOTE: Assumes that beaker defaults to browser session cookie.
 
    if remember:
 
        t = datetime.datetime.now() + datetime.timedelta(days=365)
 
        session._set_cookie_expires(t)
 

	
 
    session.save()
 

	
 
    log.info('user %s is now authenticated and stored in '
 
             'session, session attrs %s', user.username, cookie)
kallithea/templates/ini/template.ini.mako
Show inline comments
 
@@ -129,96 +129,99 @@ auto-procname = true                   ;
 
lazy = true                            ; App *must* be loaded in workers - db connections can't be shared
 
workers = 4                            ; On demand scaling up to this many worker processes
 
cheaper = 1                            ; Initial and on demand scaling down to this many worker processes
 
max-requests = 1000                    ; Graceful reload of worker processes to avoid leaks
 

	
 
<%text>##</%text> Tweak defaults:
 
strict = true                          ; Fail on unknown config directives
 
enable-threads = true                  ; Enable Python threads (not threaded workers)
 
vacuum = true                          ; Delete sockets during shutdown
 
single-interpreter = true
 
die-on-term = true                     ; Shutdown when receiving SIGTERM (default is respawn)
 
need-app = true                        ; Exit early if no app can be loaded.
 
reload-on-exception = true             ; Don't assume that the application worker can process more requests after a severe error
 

	
 
%endif
 
<%text>##</%text> middleware for hosting the WSGI application under a URL prefix
 
#[filter:proxy-prefix]
 
#use = egg:PasteDeploy#prefix
 
#prefix = /<your-prefix>
 

	
 
[app:main]
 
use = egg:kallithea
 
<%text>##</%text> enable proxy prefix middleware
 
#filter-with = proxy-prefix
 

	
 
full_stack = true
 
static_files = true
 

	
 
<%text>##</%text> Internationalization (see setup documentation for details)
 
<%text>##</%text> By default, the languages requested by the browser are used if available, with English as default.
 
<%text>##</%text> Set i18n.enabled=false to disable automatic language choice.
 
#i18n.enabled = true
 
<%text>##</%text> To Force a language, set i18n.enabled=false and specify the language in i18n.lang.
 
<%text>##</%text> Valid values are the names of subdirectories in kallithea/i18n with a LC_MESSAGES/kallithea.mo
 
#i18n.lang = en
 

	
 
cache_dir = %(here)s/data
 
index_dir = %(here)s/data/index
 

	
 
<%text>##</%text> uncomment and set this path to use archive download cache
 
archive_cache_dir = %(here)s/data/tarballcache
 

	
 
<%text>##</%text> change this to unique ID for security
 
app_instance_uuid = ${uuid()}
 

	
 
<%text>##</%text> cut off limit for large diffs (size in bytes)
 
cut_off_limit = 256000
 

	
 
<%text>##</%text> WSGI environment variable to get the IP address of the client (default REMOTE_ADDR)
 
#remote_addr_variable = HTTP_X_FORWARDED_FOR
 

	
 
<%text>##</%text> always pretend the client connected using HTTPS (default false)
 
#force_https = true
 

	
 
<%text>##</%text> use Strict-Transport-Security headers (default false)
 
#use_htsts = true
 

	
 
<%text>##</%text> number of commits stats will parse on each iteration
 
commit_parse_limit = 25
 

	
 
<%text>##</%text> Path to Python executable to be used for git hooks.
 
<%text>##</%text> This value will be written inside the git hook scripts as the text
 
<%text>##</%text> after '#!' (shebang). When empty or not defined, the value of
 
<%text>##</%text> 'sys.executable' at the time of installation of the git hooks is
 
<%text>##</%text> used, which is correct in many cases but for example not when using uwsgi.
 
<%text>##</%text> If you change this setting, you should reinstall the Git hooks via
 
<%text>##</%text> Admin > Settings > Remap and Rescan.
 
#git_hook_interpreter = /srv/kallithea/venv/bin/python3
 
%if git_hook_interpreter:
 
git_hook_interpreter = ${git_hook_interpreter}
 
%endif
 

	
 
<%text>##</%text> path to git executable
 
git_path = git
 

	
 
<%text>##</%text> git rev filter option, --all is the default filter, if you need to
 
<%text>##</%text> hide all refs in changelog switch this to --branches --tags
 
#git_rev_filter = --branches --tags
 

	
 
<%text>##</%text> RSS feed options
 
rss_cut_off_limit = 256000
 
rss_items_per_page = 10
 
rss_include_diff = false
 

	
 
<%text>##</%text> options for showing and identifying changesets
 
show_sha_length = 12
 
show_revision_number = false
 

	
 
<%text>##</%text> Canonical URL to use when creating full URLs in UI and texts.
 
<%text>##</%text> Useful when the site is available under different names or protocols.
 
<%text>##</%text> Defaults to what is provided in the WSGI environment.
 
#canonical_url = https://kallithea.example.com/repos
 

	
 
<%text>##</%text> gist URL alias, used to create nicer urls for gist. This should be an
 
<%text>##</%text> url that does rewrites to _admin/gists/<gistid>.
 
<%text>##</%text> example: http://gist.example.com/{gistid}. Empty means use the internal
 
<%text>##</%text> Kallithea url, ie. http[s]://kallithea.example.com/_admin/gists/<gistid>
 
gist_alias_url =
 

	
0 comments (0 inline, 0 general)