Changeset - 52816813cbec
[Not reviewed]
default
0 3 1
Mads Kiilerich (mads) - 5 years ago 2020-11-13 01:04:30
mads@kiilerich.com
Grafted from: acc6ecf5e73f
docs: describe, visualize, and verify internal code structure and layering

Try to describe something that isn't entirely there yet.

deps.py will help track and minimize violations through checks and
visualization in deps.svg .
4 files changed with 389 insertions and 0 deletions:
0 comments (0 inline, 0 general)
.hgignore
Show inline comments
 
@@ -53,3 +53,6 @@ syntax: regexp
 
^\.pytest_cache$
 
^venv$
 
/__pycache__$
 
^deps\.dot$
 
^deps\.svg$
 
^deps\.txt$
docs/contributing.rst
Show inline comments
 
@@ -72,6 +72,94 @@ review and inclusion, via the mailing li
 
.. _contributing-tests:
 

	
 

	
 
Internal dependencies
 
---------------------
 

	
 
We try to keep the code base clean and modular and avoid circular dependencies.
 
Code should only invoke code in layers below itself.
 

	
 
Imports should import whole modules ``from`` their parent module, perhaps
 
``as`` a shortened name. Avoid imports ``from`` modules.
 

	
 
To avoid cycles and partially initialized modules, ``__init__.py`` should *not*
 
contain any non-trivial imports. The top level of a module should *not* be a
 
facade for the module functionality.
 

	
 
Common code for a module is often in ``base.py``.
 

	
 
The important part of the dependency graph is approximately linear. In the
 
following list, modules may only depend on modules below them:
 

	
 
``tests``
 
  Just get the job done - anything goes.
 

	
 
``bin/`` & ``config/`` & ``alembic/``
 
  The main entry points, defined in ``setup.py``. Note: The TurboGears template
 
  use ``config`` for the high WSGI application - this is not for low level
 
  configuration.
 

	
 
``controllers/``
 
  The top level web application, with TurboGears using the ``root`` controller
 
  as entry point, and ``routing`` dispatching to other controllers.
 

	
 
``templates/**.html``
 
  The "view", rendering to HTML. Invoked by controllers which can pass them
 
  anything from lower layers - especially ``helpers`` available as ``h`` will
 
  cut through all layers, and ``c`` gives access to global variables.
 

	
 
``lib/helpers.py``
 
  High level helpers, exposing everything to templates as ``h``. It depends on
 
  everything and has a huge dependency chain, so it should not be used for
 
  anything else. TODO.
 

	
 
``controlles/base.py``
 
  The base class of controllers, with lots of model knowledge.
 

	
 
``lib/auth.py``
 
  All things related to authentication. TODO.
 

	
 
``lib/utils.py``
 
  High level utils with lots of model knowledge. TODO.
 

	
 
``lib/hooks.py``
 
  Hooks into "everything" to give centralized logging to database, cache
 
  invalidation, and extension handling. TODO.
 

	
 
``model/``
 
  Convenience business logic wrappers around database models.
 

	
 
``model/db.py``
 
  Defines the database schema and provides some additional logic.
 

	
 
``model/scm.py``
 
  All things related to anything. TODO.
 

	
 
SQLAlchemy
 
  Database session and transaction in thread-local variables.
 

	
 
``lib/utils2.py``
 
  Low level utils specific to Kallithea.
 

	
 
``lib/webutils.py``
 
  Low level generic utils with awareness of the TurboGears environment.
 

	
 
TurboGears
 
  Request, response and state like i18n gettext in thread-local variables.
 
  External dependency with global state - usage should be minimized.
 

	
 
``lib/vcs/``
 
  Previously an independent library. No awareness of web, database, or state.
 

	
 
``lib/*``
 
  Various "pure" functionality not depending on anything else.
 

	
 
``__init__``
 
  Very basic Kallithea constants - some of them are set very early based on ``.ini``.
 

	
 
This is not exactly how it is right now, but we aim for something like that.
 
Especially the areas marked as TODO have some problems that need untangling.
 

	
 

	
 
Running tests
 
-------------
 

	
scripts/deps.py
Show inline comments
 
new file 100755
 
#!/usr/bin/env python3
 

	
 

	
 
import re
 
import sys
 

	
 

	
 
ignored_modules = set('''
 
argparse
 
base64
 
bcrypt
 
binascii
 
bleach
 
calendar
 
celery
 
celery
 
chardet
 
click
 
collections
 
configparser
 
copy
 
csv
 
ctypes
 
datetime
 
dateutil
 
decimal
 
decorator
 
difflib
 
distutils
 
docutils
 
email
 
errno
 
fileinput
 
functools
 
getpass
 
grp
 
hashlib
 
hmac
 
html
 
http
 
imp
 
importlib
 
inspect
 
io
 
ipaddr
 
IPython
 
isapi_wsgi
 
itertools
 
json
 
kajiki
 
ldap
 
logging
 
mako
 
markdown
 
mimetypes
 
mock
 
msvcrt
 
multiprocessing
 
operator
 
os
 
paginate
 
paginate_sqlalchemy
 
pam
 
paste
 
pkg_resources
 
platform
 
posixpath
 
pprint
 
pwd
 
pyflakes
 
pytest
 
pytest_localserver
 
random
 
re
 
routes
 
setuptools
 
shlex
 
shutil
 
smtplib
 
socket
 
ssl
 
stat
 
string
 
struct
 
subprocess
 
sys
 
tarfile
 
tempfile
 
textwrap
 
tgext
 
threading
 
time
 
traceback
 
traitlets
 
types
 
urllib
 
urlobject
 
uuid
 
warnings
 
webhelpers2
 
webob
 
webtest
 
whoosh
 
win32traceutil
 
zipfile
 
'''.split())
 

	
 
top_modules = set('''
 
kallithea.alembic
 
kallithea.bin
 
kallithea.config
 
kallithea.controllers
 
kallithea.templates.py
 
scripts
 
'''.split())
 

	
 
bottom_external_modules = set('''
 
tg
 
mercurial
 
sqlalchemy
 
alembic
 
formencode
 
pygments
 
dulwich
 
beaker
 
psycopg2
 
docs
 
setup
 
conftest
 
'''.split())
 

	
 
normal_modules = set('''
 
kallithea
 
kallithea.lib.celerylib.tasks
 
kallithea.lib
 
kallithea.lib.auth
 
kallithea.lib.auth_modules
 
kallithea.lib.base
 
kallithea.lib.celerylib
 
kallithea.lib.db_manage
 
kallithea.lib.helpers
 
kallithea.lib.hooks
 
kallithea.lib.indexers
 
kallithea.lib.utils
 
kallithea.lib.utils2
 
kallithea.lib.vcs
 
kallithea.lib.webutils
 
kallithea.model
 
kallithea.model.scm
 
kallithea.templates.py
 
'''.split())
 

	
 
shown_modules = normal_modules | top_modules
 

	
 
# break the chains somehow - this is a cleanup TODO list
 
known_violations = [
 
('kallithea.lib.auth_modules', 'kallithea.lib.auth'),  # needs base&facade
 
('kallithea.lib.utils', 'kallithea.model'),  # clean up utils
 
('kallithea.lib.utils', 'kallithea.model.db'),
 
('kallithea.lib.utils', 'kallithea.model.scm'),
 
('kallithea.lib.celerylib.tasks', 'kallithea.lib.helpers'),
 
('kallithea.lib.celerylib.tasks', 'kallithea.lib.hooks'),
 
('kallithea.lib.celerylib.tasks', 'kallithea.lib.indexers'),
 
('kallithea.lib.celerylib.tasks', 'kallithea.model'),
 
('kallithea.model', 'kallithea.lib.auth'),  # auth.HasXXX
 
('kallithea.model', 'kallithea.lib.auth_modules'),  # validators
 
('kallithea.model', 'kallithea.lib.helpers'),
 
('kallithea.model', 'kallithea.lib.hooks'),  # clean up hooks
 
('kallithea.model', 'kallithea.model.scm'),
 
('kallithea.model.scm', 'kallithea.lib.hooks'),
 
]
 

	
 
extra_edges = [
 
('kallithea.config', 'kallithea.controllers'),  # through TG
 
('kallithea.lib.auth', 'kallithea.lib.auth_modules'),  # custom loader
 
]
 

	
 

	
 
def normalize(s):
 
    """Given a string with dot path, return the string it should be shown as."""
 
    parts = s.replace('.__init__', '').split('.')
 
    short_2 = '.'.join(parts[:2])
 
    short_3 = '.'.join(parts[:3])
 
    short_4 = '.'.join(parts[:4])
 
    if parts[0] in ['scripts', 'contributor_data', 'i18n_utils']:
 
        return 'scripts'
 
    if short_3 == 'kallithea.model.meta':
 
        return 'kallithea.model.db'
 
    if parts[:4] == ['kallithea', 'lib', 'vcs', 'ssh']:
 
        return 'kallithea.lib.vcs.ssh'
 
    if short_4 in shown_modules:
 
        return short_4
 
    if short_3 in shown_modules:
 
        return short_3
 
    if short_2 in shown_modules:
 
        return short_2
 
    if short_2 == 'kallithea.tests':
 
        return None
 
    if parts[0] in ignored_modules:
 
        return None
 
    assert parts[0] in bottom_external_modules, parts
 
    return parts[0]
 

	
 

	
 
def main(filenames):
 
    if not filenames or filenames[0].startswith('-'):
 
        print('''\
 
Usage:
 
    hg files 'set:!binary()&grep("^#!.*python")' 'set:**.py' | xargs scripts/deps.py
 
    dot -Tsvg deps.dot > deps.svg
 
        ''')
 
        raise SystemExit(1)
 

	
 
    files_imports = dict()  # map filenames to its imports
 
    import_deps = set()  # set of tuples with module name and its imports
 
    for fn in filenames:
 
        with open(fn) as f:
 
            s = f.read()
 

	
 
        dot_name = (fn[:-3] if fn.endswith('.py') else fn).replace('/', '.')
 
        file_imports = set()
 
        for m in re.finditer(r'^ *(?:from ([^ ]*) import (?:([a-zA-Z].*)|\(([^)]*)\))|import (.*))$', s, re.MULTILINE):
 
            m_from, m_from_import, m_from_import2, m_import = m.groups()
 
            if m_from:
 
                pre = m_from + '.'
 
                if pre.startswith('.'):
 
                    pre = dot_name.rsplit('.', 1)[0] + pre
 
                importlist = m_from_import or m_from_import2
 
            else:
 
                pre = ''
 
                importlist = m_import
 
            for imp in importlist.split('#', 1)[0].split(','):
 
                full_imp = pre + imp.strip().split(' as ', 1)[0]
 
                file_imports.add(full_imp)
 
                import_deps.add((dot_name, full_imp))
 
        files_imports[fn] = file_imports
 

	
 
    # dump out all deps for debugging and analysis
 
    with open('deps.txt', 'w') as f:
 
        for fn, file_imports in sorted(files_imports.items()):
 
            for file_import in sorted(file_imports):
 
                if file_import.split('.', 1)[0] in ignored_modules:
 
                    continue
 
                f.write('%s: %s\n' % (fn, file_import))
 

	
 
    # find leafs that haven't been ignored - they are the important external dependencies and shown in the bottom row
 
    only_imported = set(
 
        set(normalize(b) for a, b in import_deps) -
 
        set(normalize(a) for a, b in import_deps) -
 
        set([None, 'kallithea'])
 
    )
 

	
 
    normalized_dep_edges = set()
 
    for dot_name, full_imp in import_deps:
 
        a = normalize(dot_name)
 
        b = normalize(full_imp)
 
        if a is None or b is None or a == b:
 
            continue
 
        normalized_dep_edges.add((a, b))
 
        #print((dot_name, full_imp, a, b))
 
    normalized_dep_edges.update(extra_edges)
 

	
 
    unseen_shown_modules = shown_modules.difference(a for a, b in normalized_dep_edges).difference(b for a, b in normalized_dep_edges)
 
    assert not unseen_shown_modules, unseen_shown_modules
 

	
 
    with open('deps.dot', 'w') as f:
 
        f.write('digraph {\n')
 
        f.write('subgraph { rank = same; %s}\n' % ''.join('"%s"; ' % s for s in sorted(top_modules)))
 
        f.write('subgraph { rank = same; %s}\n' % ''.join('"%s"; ' % s for s in sorted(only_imported)))
 
        for a, b in sorted(normalized_dep_edges):
 
            f.write('  "%s" -> "%s"%s\n' % (a, b, ' [color=red]' if (a, b) in known_violations else ' [color=green]' if (a, b) in extra_edges else ''))
 
        f.write('}\n')
 

	
 
    # verify dependencies by untangling dependency chain bottom-up:
 
    todo = set(normalized_dep_edges)
 
    for x in known_violations:
 
        todo.remove(x)
 

	
 
    while todo:
 
        depending = set(a for a, b in todo)
 
        depended = set(b for a, b in todo)
 
        drop = depended - depending
 
        if not drop:
 
            print('ERROR: cycles:', len(todo))
 
            for x in sorted(todo):
 
                print('%s,' % (x,))
 
            raise SystemExit(1)
 
        #for do_b in sorted(drop):
 
        #    print('Picking', do_b, '- unblocks:', ' '.join(a for a, b in sorted((todo)) if b == do_b))
 
        todo = set((a, b) for a, b in todo if b in depending)
 
        #print()
 

	
 

	
 
if __name__ == '__main__':
 
    main(sys.argv[1:])
scripts/run-all-cleanup
Show inline comments
 
@@ -5,6 +5,9 @@
 
set -e
 
set -x
 

	
 
hg files 'set:!binary()&grep("^#!.*python")' 'set:**.py' | xargs scripts/deps.py
 
dot -Tsvg deps.dot > deps.svg
 

	
 
scripts/docs-headings.py
 
scripts/generate-ini.py
 
scripts/whitespacecleanup.sh
0 comments (0 inline, 0 general)