Source code for snippet_fmt
#!/usr/bin/env python3
#
# __init__.py
"""
Format and validate code snippets in reStructuredText files.
"""
#
# Copyright © 2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
#
# Parts based on https://github.com/asottile/blacken-docs
# Copyright (c) 2018 Anthony Sottile
# MIT Licensed
#
# stdlib
import contextlib
import os
import re
import textwrap
from typing import Dict, Iterator, List, Match, NamedTuple, Optional
# 3rd party
import click
import entrypoints # type: ignore[import-untyped]
import tokenize_rt # type: ignore[import-untyped]
from consolekit.terminal_colours import ColourTrilean, resolve_color_default
from consolekit.utils import coloured_diff
from domdf_python_tools.paths import PathPlus
from domdf_python_tools.stringlist import StringList
from domdf_python_tools.typing import PathLike
from formate.utils import syntaxerror_for_file
# this package
import snippet_fmt.docstring
from snippet_fmt.config import SnippetFmtConfigDict
from snippet_fmt.formatters import Formatter, format_ini, format_json, format_python, format_toml, noformat
__author__: str = "Dominic Davis-Foster"
__copyright__: str = "2021 Dominic Davis-Foster"
__license__: str = "MIT License"
__version__: str = "0.2.0"
__email__: str = "dominic@davis-foster.co.uk"
__all__ = (
"CodeBlockError",
"DocstringReformatter",
"PyReformatter",
"RSTReformatter",
"Reformatter",
"reformat_docstrings",
"reformat_file",
)
TRAILING_NL_RE = re.compile(r'\n+\Z', re.MULTILINE)
[docs]class CodeBlockError(NamedTuple):
"""
Represents an exception raised when parsing and reformatting a code block.
"""
#: The character offset where the exception was raised.
offset: int
#: The exception itself.
exc: Exception
# TODO: reformatter for docstrings
[docs]class Reformatter:
"""
Base class for reformatters.
:param source: The file content.
:param filename: The file being formatted, for display in error messages.
:param config: The ``snippet_fmt`` configuration, parsed from a TOML file (or similar).
.. versionadded:: 0.2.0
"""
#: The filename being reformatted, as a POSIX-style path.
filename: str
#: The ``formate`` configuration, parsed from a TOML file (or similar).
config: SnippetFmtConfigDict
errors: List[CodeBlockError]
def __init__(self, source: str, filename: str, config: SnippetFmtConfigDict):
self.filename = filename
self.config = config
self._unformatted_source = source
self._reformatted_source: Optional[str] = None
self.errors = []
self._formatters: Dict[str, Formatter] = {
"bash": noformat,
"python": format_python,
"python3": format_python,
"toml": format_toml,
"ini": format_ini,
"json": format_json,
}
self.load_extra_formatters()
[docs] def compile_regex(self) -> re.Pattern:
"""
Compile the regular expression for finding directives.
.. versionadded:: 0.2.0
"""
directives = '|'.join(self.config["directives"])
return re.compile(
rf'(?P<before>'
rf'^(?P<indent>[ \t]*)\.\.[ \t]*('
rf'({directives})::\s*(?P<lang>[A-Za-z0-9-_]+)?)\n'
rf'((?P=indent)[ \t]+:.*\n)*' # Limitation: should be `(?P=body_indent)` rather than `[ \t]+`
rf'\n*'
rf')'
rf'(?P<code>^((?P=indent)(?P<body_indent>[ \t]+).*)?\n(^((?P=indent)(?P=body_indent).*)?\n)*)',
re.MULTILINE,
)
[docs] def run(self) -> bool:
"""
Run the reformatter.
:return: Whether the file was changed.
"""
content = StringList(self._unformatted_source)
content.blankline(ensure_single=True)
pattern = self.compile_regex()
self._reformatted_source = pattern.sub(self.process_match, str(content))
for error in self.errors:
self.report_error(error)
return self._reformatted_source != self._unformatted_source
[docs] def report_error(self, error: CodeBlockError) -> None:
"""
Print the error message.
:param error:
.. versionadded:: 0.2.0
"""
lineno = self._unformatted_source[:error.offset].count('\n') + 1
click.echo(f"{self.filename}:{lineno}: {error.exc.__class__.__name__}: {error.exc}", err=True)
[docs] def process_match(self, match: Match[str]) -> str:
"""
Process a :meth:`re.Match <re.Match.expand>` for a single code block.
:param match:
"""
lang = match.group("lang")
if lang in self.config["languages"]:
lang_config = self.config["languages"][lang]
# TODO: show warning if not found and in "strict" mode
formatter = self._formatters.get(lang.lower(), noformat)
else:
lang_config = {}
formatter = noformat
trailing_ws_match = TRAILING_NL_RE.search(match["code"])
assert trailing_ws_match
trailing_ws = trailing_ws_match.group()
code = textwrap.dedent(match["code"])
with self._collect_error(match):
with syntaxerror_for_file(self.filename):
code = formatter(code, **lang_config)
code = textwrap.indent(code, match["indent"] + match["body_indent"])
return f'{match["before"]}{code.rstrip()}{trailing_ws}'
[docs] def get_diff(self) -> str:
"""
Returns the diff between the original and reformatted file content.
"""
# Based on yapf
# Apache 2.0 License
after = self.to_string().split('\n')
before = self._unformatted_source.split('\n')
return coloured_diff(
before,
after,
os.fspath(self.filename),
os.fspath(self.filename),
"(original)",
"(reformatted)",
lineterm='',
)
[docs] def to_string(self) -> str:
"""
Return the reformatted file as a string.
"""
if self._reformatted_source is None:
raise ValueError("'Reformatter.run()' must be called first!")
return self._reformatted_source
@contextlib.contextmanager
def _collect_error(self, match: Match[str]) -> Iterator[None]:
try:
yield
except Exception as e:
self.errors.append(CodeBlockError(match.start(), e))
[docs] def load_extra_formatters(self) -> None:
"""
Load custom formatters defined via entry points.
"""
group = "snippet_fmt.formatters"
for distro_config, _ in entrypoints.iter_files_distros():
if group in distro_config:
for name, epstr in distro_config[group].items():
with contextlib.suppress(entrypoints.BadEntryPoint, ImportError): # pylint: disable=W8205
# TODO: show warning for bad entry point if verbose, or "strict"?
ep = entrypoints.EntryPoint.from_string(epstr, name)
self._formatters[name] = ep.load()
[docs]class RSTReformatter(Reformatter):
"""
Reformat code snippets in a reStructuredText file.
:param filename: The filename to reformat.
:param config: The ``snippet_fmt`` configuration, parsed from a TOML file (or similar).
"""
#: The filename being reformatted.
file_to_format: PathPlus
def __init__(self, filename: PathLike, config: SnippetFmtConfigDict):
self.file_to_format = PathPlus(filename)
super().__init__(self.file_to_format.read_text(), self.file_to_format.as_posix(), config)
[docs] def to_file(self) -> None:
"""
Write the reformatted source to the original file.
"""
self.file_to_format.write_text(self.to_string())
[docs]class DocstringReformatter(Reformatter):
"""
Reformat code snippets in a docstring from a Python file.
:param token: The docstring token to format.
:param filename: The filename being reformated.
:param config: The ``snippet_fmt`` configuration, parsed from a TOML file (or similar).
.. versionadded:: 0.2.0
"""
#: The docstring token being reformatted.
token: tokenize_rt.Token
#: Letters before the string e.g. ``f``, ``u``, ``r``, ``fr``
prefix_char: str
#: Quotes used for the docstring, e.g. ``'`` or ``"""``
quote_char: str
#: The docstring's indentation.
indent: str
def __init__(self, token: tokenize_rt.Token, filename: PathLike, config: SnippetFmtConfigDict):
self.token = token
prefix_char, quote_char, indent, docstring = snippet_fmt.docstring.get_parts(token.src)
self.prefix_char = prefix_char
self.quote_char = quote_char
self.indent = indent
super().__init__(docstring, PathPlus(filename).as_posix(), config)
[docs] def report_error(self, error: CodeBlockError) -> None:
"""
Print the error message.
:param error:
"""
lineno = self._unformatted_source[:error.offset].count('\n') + 1
click.echo(
f"{self.filename}:{lineno+self.token.line-1}: {error.exc.__class__.__name__}: {error.exc}",
err=True,
)
[docs] def get_diff(self) -> str:
"""
Returns the diff between the original and reformatted file content.
"""
after = self.to_string().split('\n')
return snippet_fmt.docstring.diff(
self.token,
after,
os.fspath(self.filename),
)
[docs] def to_string(self) -> str:
"""
Return the reformatted file as a string.
"""
if self._reformatted_source is None:
raise ValueError("'Reformatter.run()' must be called first!")
parts = [
self.prefix_char,
self.quote_char,
textwrap.indent(self._reformatted_source, self.indent).rstrip(),
]
if len(self.quote_char) == 3:
parts.append('\n')
parts.append(self.indent)
parts.append(self.quote_char)
return ''.join(parts)
[docs] def to_token(self) -> tokenize_rt.Token:
"""
Return the docstring as a token for ``tokenize_rt``.
"""
return tokenize_rt.Token(
name="STRING",
src=self.to_string(),
line=self.token.line,
utf8_byte_offset=self.token.utf8_byte_offset,
)
[docs] def run(self) -> bool:
"""
Run the reformatter.
:return: Whether the file was changed.
"""
content = StringList(self._unformatted_source)
if len(self.quote_char) == 3:
content.blankline(ensure_single=True)
pattern = self.compile_regex()
self._reformatted_source = pattern.sub(self.process_match, str(content))
for error in self.errors:
self.report_error(error)
return self._reformatted_source != self._unformatted_source
[docs]class PyReformatter(RSTReformatter):
"""
Reformat code snippets in docstrings in a Python file.
:param filename: The filename to reformat.
:param config: The ``snippet_fmt`` configuration, parsed from a TOML file (or similar).
.. versionadded:: 0.2.0
"""
[docs] def run(self) -> bool:
"""
Run the reformatter.
:return: Whether the file was changed.
"""
original_tokens = snippet_fmt.docstring.get_tokens(self._unformatted_source)
tokens: List[tokenize_rt.Token] = []
file_ret = 0
for token in original_tokens:
if token.name == "DOCSTRING":
r = DocstringReformatter(token, self.file_to_format, self.config)
with syntaxerror_for_file(self.filename):
if r.run():
token = r.to_token()
file_ret = True
tokens.append(token)
self._reformatted_source = tokenize_rt.tokens_to_src(tokens)
if file_ret:
assert tokenize_rt.tokens_to_src(tokens) != self._unformatted_source
return True
else:
assert tokenize_rt.tokens_to_src(tokens) == self._unformatted_source
return False
[docs]def reformat_file(
filename: PathLike,
config: SnippetFmtConfigDict,
colour: ColourTrilean = None,
) -> int:
"""
Reformat the given reStructuredText file, and show the diff if changes were made.
:param filename: The filename to reformat.
:param config: The ``snippet-fmt`` configuration, parsed from a TOML file (or similar).
:param colour: Whether to force coloured output on (:py:obj:`True`) or off (:py:obj:`False`).
"""
r = RSTReformatter(filename, config)
ret = r.run()
if ret:
click.echo(r.get_diff(), color=resolve_color_default(colour))
r.to_file()
return ret
[docs]def reformat_docstrings(
filename: PathLike,
config: SnippetFmtConfigDict,
colour: ColourTrilean = None,
) -> int:
"""
Reformat docstrings in the given Python file, and show the diff if changes were made.
:param filename: The filename to reformat.
:param config: The ``snippet-fmt`` configuration, parsed from a TOML file (or similar).
:param colour: Whether to force coloured output on (:py:obj:`True`) or off (:py:obj:`False`).v
:rtype:
.. versionadded:: 0.2.0
"""
file = PathPlus(filename)
source = file.read_text()
original_tokens = snippet_fmt.docstring.get_tokens(source)
tokens: List[tokenize_rt.Token] = []
file_ret = 0
for token in original_tokens:
if token.name == "DOCSTRING":
r = DocstringReformatter(token, file, config)
with syntaxerror_for_file(file.name):
if r.run():
token = r.to_token()
click.echo(r.get_diff(), color=resolve_color_default(colour))
file_ret = True
tokens.append(token)
if file_ret:
file.write_text(tokenize_rt.tokens_to_src(tokens))
assert tokenize_rt.tokens_to_src(tokens) != source
return True
else:
assert tokenize_rt.tokens_to_src(tokens) == source
return False