Source code for pylicense3.cli

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# pylicense (https://github.com/ftalbrecht/pylicense): pylicense
# Copyright Holders: Felix Albrecht
# License: BSD 2-Clause License (http://opensource.org/licenses/BSD-2-Clause)

"""
Add header to a given file.

Usage:
    pylicense [-hv] [--help] [--verbose] --cfg=CONFIG_FILE PATH


Arguments:
    PATH            Directory or file to process.

Options:
    -h, --help      Show this message.

    -v, --verbose   Be verbose.
"""

from docopt import docopt
from collections import defaultdict
import subprocess
import os
import re
import fnmatch


[docs]class GitError(Exception): pass
[docs]def process_dir(dirname, config): if os.path.isfile(dirname): yield (dirname, '') elif os.path.isdir(dirname): include = re.compile('|'.join(fnmatch.translate(p) for p in config.include_patterns)) exclude = None if len(config.exclude_patterns) > 0: exclude = re.compile('|'.join(fnmatch.translate(p) for p in config.exclude_patterns)) os.chdir(dirname) for root, _, files in os.walk(dirname): for abspath in sorted([os.path.join(root, f) for f in files]): if ( include.match(abspath) and (not exclude or not exclude.match(abspath)) and not os.path.islink(abspath) ): yield (abspath, dirname) else: raise Exception
[docs]def get_git_authors(filename, root): authors = {} try: cmd = 'git log --use-mailmap --follow --pretty=format:"%aN %ad" --date=format:%Y {} | sort | uniq'.format( filename ) out = subprocess.check_output(cmd, shell=True, universal_newlines=True, cwd=root) git_info = sorted(out.splitlines()) years_per_author = defaultdict(set) for year_and_author in git_info: year_and_author = year_and_author.strip().split(' ') assert len(year_and_author) > 1 # otherwise we have either no name or no year author = ' '.join([word for word in year_and_author[:-1]]) years_per_author[author].add(int(year_and_author[-1])) # parse years for author, years in years_per_author.items(): years = sorted(years) assert len(years) > 0 year_ranges = [] start_year = years[0] end_year = -1 for ii in range(1, len(years)): current_year = years[ii] if current_year == years[ii - 1] + 1: end_year = current_year else: # current_year > years[ii - 1], since these are sorted year_ranges.append((start_year, end_year) if end_year > start_year else start_year) end_year = -1 start_year = current_year if end_year > start_year: year_ranges.append((start_year, end_year)) elif len(year_ranges) == 0 or start_year != years[0]: year_ranges.append(start_year) def years_to_string(year_range): if isinstance(year_range, tuple): assert len(year_range) == 2 return '{} - {}'.format(year_range[0], year_range[1]) else: return '{}'.format(year_range) assert len(year_ranges) authors[author] = years_to_string(year_ranges[0]) for ii in range(1, len(year_ranges)): authors[author] += ', ' + years_to_string(year_ranges[ii]) except KeyError as e: raise GitError('failed to extract authors from git history!') return authors
[docs]def read_current_header(source_iter, prefix, project_name, copyright_statement, license_str, url, lead_in, lead_out): header = {'shebang': None, 'encoding': None, 'comments': []} warning = '' could_be_an_author = False while True: line = next(source_iter) if line is None: break dirt_to_remove = ['\xef', '\xbb', '\xbf'] while len(line) > 0 and line[0] in dirt_to_remove: for dirt in dirt_to_remove: line = line.lstrip(dirt) if len(line) == 0: break if line.startswith('#!') and len(line.strip()) > 2: header['shebang'] = line.strip() continue if (lead_in and lead_in in line) or (lead_out and lead_out in line): continue if not line.startswith(prefix): break else: can_be_discarded = ['Copyright', 'copyright', 'License'] for ii in (project_name, copyright_statement, license_str): for ll in ii.split('\n'): can_be_discarded.append(ll.strip().lstrip(prefix).strip()) if re.match('.*coding[:=]\s*', line): header['encoding'] = line[len(prefix) :] elif any([line[len(prefix) :].strip().startswith(discard) for discard in can_be_discarded]): continue elif line[len(prefix) :].strip().startswith(url): continue elif any([line[len(prefix) :].strip().startswith(some_url) for some_url in ('http://', 'https://')]): warning = 'dropping url \'{}\'!'.format(line[len(prefix) :].strip()) elif line[len(prefix) :].strip().startswith('Authors:'): # the following header lines may be authors could_be_an_author = True continue elif could_be_an_author: if ( line[len(prefix) :].startswith(' ') and line.strip()[-1] == ')' ): # we just have to assume that this is an author line continue else: could_be_an_author = False # from now on this is a comment header['comments'].append(line) else: header['comments'].append(line) return header, warning, line
[docs]def write_header( target, header, authors, license_str, prefix, project_name, url, max_width, copyright_statement, lead_in, lead_out ): shebang, encoding = header['shebang'], header['encoding'] if shebang: target.write(shebang + '\n') if encoding: target.write(prefix + ' ' + encoding + '\n') if shebang or encoding: target.write(prefix + '\n') if lead_in: target.write(lead_in + '\n') # project name and url line = prefix + ' ' + project_name if url is not None: if len(line) + len(url) + len('().') <= max_width: target.write(u'{line} ({url}).\n'.format(line=line, url=url)) else: target.write(line + '\n') if max_width - len(prefix) - 1 - len(url): target.write(u'{prefix} {url}\n'.format(prefix=prefix, url=url)) else: target.write(u'{prefix} {url}\n'.format(prefix=prefix, url=url)) # copyright statement target.write(prefix + ' ' + copyright_statement + '\n') # license_str l_str = '\n{}'.format(prefix).join(license_str.split('\n')) target.write(u'{} License: {}\n'.format(prefix, l_str)) # authors, either as dict with years or only a contribution team if isinstance(authors, str): target.write(f'{prefix} {authors}\n') else: target.write(prefix + ' Authors:\n') max_author_length = 0 for author in authors: max_author_length = max(max_author_length, len(author)) for author in sorted(authors.keys()): year = '(' + authors[author] + ')' if len(prefix) + 4 + max_author_length + len(year) <= max_width: for ii in range(max_author_length - len(author)): author += ' ' target.write(u'{} {} {}\n'.format(prefix, author, year)) # comments def prune_first_empty_comments(ll): first_real_comment_line = False ret = [] for line in ll: line = line.strip() if first_real_comment_line: ret.append(line) elif len(line) >= len(line) and len(line[len(prefix) :].strip()) > 0: first_real_comment_line = True ret.append(line) return ret comments = header['comments'] if comments and len(comments) > 0: comments.reverse() comments = prune_first_empty_comments(comments) comments.reverse() comments = prune_first_empty_comments(comments) if len(comments) > 0: target.write(prefix + '\n') for comment in comments: target.write(comment + '\n') if lead_out: target.write(lead_out + '\n')
[docs]def process_file(filename, config, root): # parse config assert hasattr(config, 'name') project_name = config.name.strip() assert hasattr(config, 'license') license_str = config.license url = getattr(config, 'url', None) copyright_statement = getattr( config, 'copyright_statement', 'The copyright lies with the authors of this file (see below).' ) max_width = getattr(config, 'max_width', 78) prefix = getattr(config, 'prefix', '#') lead_out = getattr(config, 'lead_out', None) lead_in = getattr(config, 'lead_in', None) authors = getattr(config, 'contributors_team', get_git_authors(filename, root)) source = open(filename).readlines() source.append(None) source_iter = iter(source) print('*' * 88) print(license_str) print('*' * 88) header, warning, last_header_line = read_current_header( source_iter, prefix, project_name, copyright_statement, license_str, url, lead_in, lead_out ) line = last_header_line # write new file with open(filename, 'w') as target: # skip lines containing only whitespace while line is not None and line.isspace(): line = next(source_iter) write_header( target, header, authors, license_str, prefix, project_name, url, max_width, copyright_statement, lead_in, lead_out, ) target.write('\n') # copy all remaining content while line is not None: target.write(line) line = next(source_iter) return warning
[docs]def main(): # parse arguments args = docopt(__doc__) verbose = '--verbose' in args cfg = args['--cfg'] import importlib.util spec = importlib.util.spec_from_file_location("config", cfg) config = importlib.util.module_from_spec(spec) spec.loader.exec_module(config) for filename, dirname in process_dir(args['PATH'], config): print('{}: '.format(filename[(len(dirname)) :]), end='') try: res = process_file(filename, config, dirname if dirname != '' else '.') print('{}'.format(res if len(res) else 'success')) except GitError as e: print(e)
if __name__ == '__main__': main()