Source code for pylicense3.cli
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# pylicense (https://github.com/ftalbrecht/pylicense): pylicense
# Copyright Holders: Felix Albrecht
# License: BSD 2-Clause License (http://opensource.org/licenses/BSD-2-Clause)
"""
Add header to a given file.
Usage:
pylicense [-hv] [--help] [--verbose] --cfg=CONFIG_FILE PATH
Arguments:
PATH Directory or file to process.
Options:
-h, --help Show this message.
-v, --verbose Be verbose.
"""
from docopt import docopt
from collections import defaultdict
import subprocess
import os
import re
import fnmatch
[docs]def process_dir(dirname, config):
if os.path.isfile(dirname):
yield (dirname, '')
elif os.path.isdir(dirname):
include = re.compile('|'.join(fnmatch.translate(p) for p in config.include_patterns))
exclude = None
if len(config.exclude_patterns) > 0:
exclude = re.compile('|'.join(fnmatch.translate(p) for p in config.exclude_patterns))
os.chdir(dirname)
for root, _, files in os.walk(dirname):
for abspath in sorted([os.path.join(root, f) for f in files]):
if (
include.match(abspath)
and (not exclude or not exclude.match(abspath))
and not os.path.islink(abspath)
):
yield (abspath, dirname)
else:
raise Exception
[docs]def get_git_authors(filename, root):
authors = {}
try:
cmd = 'git log --use-mailmap --follow --pretty=format:"%aN %ad" --date=format:%Y {} | sort | uniq'.format(
filename
)
out = subprocess.check_output(cmd, shell=True, universal_newlines=True, cwd=root)
git_info = sorted(out.splitlines())
years_per_author = defaultdict(set)
for year_and_author in git_info:
year_and_author = year_and_author.strip().split(' ')
assert len(year_and_author) > 1 # otherwise we have either no name or no year
author = ' '.join([word for word in year_and_author[:-1]])
years_per_author[author].add(int(year_and_author[-1]))
# parse years
for author, years in years_per_author.items():
years = sorted(years)
assert len(years) > 0
year_ranges = []
start_year = years[0]
end_year = -1
for ii in range(1, len(years)):
current_year = years[ii]
if current_year == years[ii - 1] + 1:
end_year = current_year
else: # current_year > years[ii - 1], since these are sorted
year_ranges.append((start_year, end_year) if end_year > start_year else start_year)
end_year = -1
start_year = current_year
if end_year > start_year:
year_ranges.append((start_year, end_year))
elif len(year_ranges) == 0 or start_year != years[0]:
year_ranges.append(start_year)
def years_to_string(year_range):
if isinstance(year_range, tuple):
assert len(year_range) == 2
return '{} - {}'.format(year_range[0], year_range[1])
else:
return '{}'.format(year_range)
assert len(year_ranges)
authors[author] = years_to_string(year_ranges[0])
for ii in range(1, len(year_ranges)):
authors[author] += ', ' + years_to_string(year_ranges[ii])
except KeyError as e:
raise GitError('failed to extract authors from git history!')
return authors
[docs]def read_current_header(source_iter, prefix, project_name, copyright_statement, license_str, url, lead_in, lead_out):
header = {'shebang': None, 'encoding': None, 'comments': []}
warning = ''
could_be_an_author = False
while True:
line = next(source_iter)
if line is None:
break
dirt_to_remove = ['\xef', '\xbb', '\xbf']
while len(line) > 0 and line[0] in dirt_to_remove:
for dirt in dirt_to_remove:
line = line.lstrip(dirt)
if len(line) == 0:
break
if line.startswith('#!') and len(line.strip()) > 2:
header['shebang'] = line.strip()
continue
if (lead_in and lead_in in line) or (lead_out and lead_out in line):
continue
if not line.startswith(prefix):
break
else:
can_be_discarded = ['Copyright', 'copyright', 'License']
for ii in (project_name, copyright_statement, license_str):
for ll in ii.split('\n'):
can_be_discarded.append(ll.strip().lstrip(prefix).strip())
if re.match('.*coding[:=]\s*', line):
header['encoding'] = line[len(prefix) :]
elif any([line[len(prefix) :].strip().startswith(discard) for discard in can_be_discarded]):
continue
elif line[len(prefix) :].strip().startswith(url):
continue
elif any([line[len(prefix) :].strip().startswith(some_url) for some_url in ('http://', 'https://')]):
warning = 'dropping url \'{}\'!'.format(line[len(prefix) :].strip())
elif line[len(prefix) :].strip().startswith('Authors:'): # the following header lines may be authors
could_be_an_author = True
continue
elif could_be_an_author:
if (
line[len(prefix) :].startswith(' ') and line.strip()[-1] == ')'
): # we just have to assume that this is an author line
continue
else:
could_be_an_author = False
# from now on this is a comment
header['comments'].append(line)
else:
header['comments'].append(line)
return header, warning, line
[docs]def write_header(
target, header, authors, license_str, prefix, project_name, url, max_width, copyright_statement, lead_in, lead_out
):
shebang, encoding = header['shebang'], header['encoding']
if shebang:
target.write(shebang + '\n')
if encoding:
target.write(prefix + ' ' + encoding + '\n')
if shebang or encoding:
target.write(prefix + '\n')
if lead_in:
target.write(lead_in + '\n')
# project name and url
line = prefix + ' ' + project_name
if url is not None:
if len(line) + len(url) + len('().') <= max_width:
target.write(u'{line} ({url}).\n'.format(line=line, url=url))
else:
target.write(line + '\n')
if max_width - len(prefix) - 1 - len(url):
target.write(u'{prefix} {url}\n'.format(prefix=prefix, url=url))
else:
target.write(u'{prefix} {url}\n'.format(prefix=prefix, url=url))
# copyright statement
target.write(prefix + ' ' + copyright_statement + '\n')
# license_str
l_str = '\n{}'.format(prefix).join(license_str.split('\n'))
target.write(u'{} License: {}\n'.format(prefix, l_str))
# authors, either as dict with years or only a contribution team
if isinstance(authors, str):
target.write(f'{prefix} {authors}\n')
else:
target.write(prefix + ' Authors:\n')
max_author_length = 0
for author in authors:
max_author_length = max(max_author_length, len(author))
for author in sorted(authors.keys()):
year = '(' + authors[author] + ')'
if len(prefix) + 4 + max_author_length + len(year) <= max_width:
for ii in range(max_author_length - len(author)):
author += ' '
target.write(u'{} {} {}\n'.format(prefix, author, year))
# comments
def prune_first_empty_comments(ll):
first_real_comment_line = False
ret = []
for line in ll:
line = line.strip()
if first_real_comment_line:
ret.append(line)
elif len(line) >= len(line) and len(line[len(prefix) :].strip()) > 0:
first_real_comment_line = True
ret.append(line)
return ret
comments = header['comments']
if comments and len(comments) > 0:
comments.reverse()
comments = prune_first_empty_comments(comments)
comments.reverse()
comments = prune_first_empty_comments(comments)
if len(comments) > 0:
target.write(prefix + '\n')
for comment in comments:
target.write(comment + '\n')
if lead_out:
target.write(lead_out + '\n')
[docs]def process_file(filename, config, root):
# parse config
assert hasattr(config, 'name')
project_name = config.name.strip()
assert hasattr(config, 'license')
license_str = config.license
url = getattr(config, 'url', None)
copyright_statement = getattr(
config, 'copyright_statement', 'The copyright lies with the authors of this file (see below).'
)
max_width = getattr(config, 'max_width', 78)
prefix = getattr(config, 'prefix', '#')
lead_out = getattr(config, 'lead_out', None)
lead_in = getattr(config, 'lead_in', None)
authors = getattr(config, 'contributors_team', get_git_authors(filename, root))
source = open(filename).readlines()
source.append(None)
source_iter = iter(source)
print('*' * 88)
print(license_str)
print('*' * 88)
header, warning, last_header_line = read_current_header(
source_iter, prefix, project_name, copyright_statement, license_str, url, lead_in, lead_out
)
line = last_header_line
# write new file
with open(filename, 'w') as target:
# skip lines containing only whitespace
while line is not None and line.isspace():
line = next(source_iter)
write_header(
target,
header,
authors,
license_str,
prefix,
project_name,
url,
max_width,
copyright_statement,
lead_in,
lead_out,
)
target.write('\n')
# copy all remaining content
while line is not None:
target.write(line)
line = next(source_iter)
return warning
[docs]def main():
# parse arguments
args = docopt(__doc__)
verbose = '--verbose' in args
cfg = args['--cfg']
import importlib.util
spec = importlib.util.spec_from_file_location("config", cfg)
config = importlib.util.module_from_spec(spec)
spec.loader.exec_module(config)
for filename, dirname in process_dir(args['PATH'], config):
print('{}: '.format(filename[(len(dirname)) :]), end='')
try:
res = process_file(filename, config, dirname if dirname != '' else '.')
print('{}'.format(res if len(res) else 'success'))
except GitError as e:
print(e)
if __name__ == '__main__':
main()