rnorth · dmitry-timofeev · Feb 14, 2020 · Feb 14, 2020 · Feb 17, 2020 · Feb 17, 2020
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,4 @@ __pycache__/
 .eggs/
 .pytest_cache/
 .DS_Store
+.venv/
diff --git a/README.md b/README.md
@@ -3,4 +3,116 @@
 A plugin for mkdocs that allows some advanced 'includes' functionality to be used for embedded code blocks.
 This is effectively an extended Markdown format, but is intended to degrade gracefully when rendered with a different renderer. 
 
-This README will be extended to include examples at a later date.
+## Installation
+
+1. Add dependency on the plugin:
+
+```requirements.txt
+-e git+https://github.com/rnorth/mkdocs-codeinclude-plugin#egg=mkdocs_codeinclude_plugin
+```
+
+You have to use Git dependency specification until the plugin is published on PyPy.
+
+2. Add `codeinclude` to the list of your MkDocs plugins (typically listed in `mkdocs.yml`):
+
+```yaml
+plugins:
+  - codeinclude
+```
+
+## Usage
+
+A codeinclude block resembles a regular markdown link surrounded by a pair of XML comments, e.g.:
+
+<!-- 
+To prevent this from being rendered as a codeinclude when rendering this page, we use HTML tags.
+See this in its rendered form to understand its actual appearance, or look at other pages in the
+docs.
+-->
+
+<pre><code>&lt;!--codeinclude--&gt;
+[Human readable title for snippet](./relative_path_to_example_code.java) targeting_expression
+&lt;!--/codeinclude--&gt;
+</code></pre>
+
+Where `targeting_expression` could be:
+
+* `block:someString` or
+* `inside_block:someString`
+
+If these are provided, the macro will seek out any line containing the token `someString` and grab the next curly brace
+delimited block that it finds. `block` will grab the starting line and closing brace, whereas `inside_block` will omit 
+these. If no `targeting_expression` is provided, the whole file is included.
+
+e.g., given:
+```java
+
+public class FooService {
+
+    public void doFoo() {
+        foo.doSomething();
+    }
+
+}
+```
+
+If we use `block:doFoo` as our targeting expression, we will have the following content included into our page:
+
+```java
+public void doFoo() {
+    foo.doSomething();
+}
+```
+
+Whereas using `inside_block:doFoo` we would just have the inner content of the method included:
+
+```java
+foo.doSomething();
+```
+
+Note that:
+
+* Any code included will be have its indentation reduced
+* Every line in the source file will be searched for an instance of the token (e.g. `doFoo`). If more than one line
+  includes that token, then potentially more than one block could be targeted for inclusion. It is advisable to use a
+  specific, unique token to avoid unexpected behaviour.
+
+When we wish to include a section of code that does not naturally appear within braces, we can simply insert our token,
+with matching braces, in a comment. 
+While a little ugly, this has the benefit of working in any context, even in languages that do not use
+curly braces, and is easy to understand. 
+For example:
+
+```java
+public class FooService {
+
+    public void boringMethod() {
+        doSomethingBoring();
+
+        // doFoo {
+        doTheThingThatWeActuallyWantToShow();
+        // }
+    }
+
+}
+```
+
+will be rendered as:
+
+```java
+doTheThingThatWeActuallyWantToShow();
+```
+
+## Building the Project
+
+Install the dependencies:
+
+```shell
+pip install -r requirements.txt
+pip install nose # Optionally, install nose to run the tests
+```
+
+Run the tests:
+```shell
+nosetests
+```
diff --git a/codeinclude/languages.py b/codeinclude/languages.py
@@ -0,0 +1,20 @@
+from pygments.lexers import get_lexer_for_filename
+from pygments.util import ClassNotFound
+
+
+def get_lang_class(filename: str) -> str:
+    """Returns the Pygments _language alias_ for the filename.
+
+    Pygments is used by codehilite, a widely used extension for code highlighting:
+    https://squidfunk.github.io/mkdocs-material/extensions/codehilite/
+
+    The Pygments language aliases are expected to be compatible with highlight.js language classes,
+    which are used by some MkDocs themes: https://www.mkdocs.org/user-guide/styling-your-docs/#built-in-themes
+    For a table of 'Language -> Language Classes' in _highlight.js_,
+    see https://github.com/highlightjs/highlight.js#supported-languages
+    """
+    try:
+        lexer = get_lexer_for_filename(filename)
+        return lexer.aliases[0]
+    except ClassNotFound:
+        return "none"
diff --git a/codeinclude/plugin.py b/codeinclude/plugin.py
@@ -2,9 +2,12 @@
 import os
 import shlex
 import textwrap
+from dataclasses import dataclass
+from typing import List
 
 from mkdocs.plugins import BasePlugin
 from codeinclude.resolver import select
+from codeinclude.languages import get_lang_class
 
 RE_START = r"""(?x)
     ^
@@ -22,21 +25,104 @@
     $
 """
 
-RE_SNIPPET = r"""(?x)
+RE_SNIPPET = r"""(?xm)
     ^
     (?P<leading_space>\s*)
     \[(?P<title>[^\]]*)\]\((?P<filename>[^)]+)\)
-    ([\t ]+(?P<params>.*))?
+    ([\t\n ]+(?P<params>[\w:-]+))?
     (?P<ignored_trailing_space>\s*)
     $
 """
 
 
-def get_substitute(page, title, filename, lines, block, inside_block):
+class CodeIncludePlugin(BasePlugin):
+    def on_page_markdown(self, markdown, page, config, site_navigation=None, **kwargs):
+        "Provide a hook for defining functions from an external module"
 
+        blocks = find_code_include_blocks(markdown)
+        substitutes = get_substitutes(blocks, page)
+        return substitute(markdown, substitutes)
+
+
+@dataclass
+class CodeIncludeBlock(object):
+    first_line_index: int
+    last_line_index: int
+    content: str
+
+
+def find_code_include_blocks(markdown: str) -> List[CodeIncludeBlock]:
+    ci_blocks = list()
+    first = -1
+    in_block = False
+    lines = markdown.splitlines()
+    for index, line in enumerate(lines):
+        if re.match(RE_START, lines[index]):
+            if in_block:
+                raise ValueError(f"Found two consecutive code-include starts: at lines {first} and {index}")
+            first = index
+            in_block = True
+        elif re.match(RE_END, lines[index]):
+            if not in_block:
+                raise ValueError(f"Found code-include end without preceding start at line {index}")
+            last = index
+            content = '\n'.join(lines[first:last + 1])
+            ci_blocks.append(CodeIncludeBlock(first, last, content))
+            in_block = False
+    return ci_blocks
+
+
+@dataclass
+class Replacement(object):
+    first_line_index: int
+    last_line_index: int
+    content: str
+
+
+def get_substitutes(blocks: List[CodeIncludeBlock], page) -> List[Replacement]:
+    replacements = list()
+    for ci_block in blocks:
+        replacement_content = ""
+        for snippet_match in re.finditer(RE_SNIPPET, ci_block.content):
+            title = snippet_match.group("title")
+            filename = snippet_match.group("filename")
+            indent = snippet_match.group("leading_space")
+            raw_params = snippet_match.group("params")
+
+            if raw_params:
+                params = dict(token.split(":") for token in shlex.split(raw_params))
+                lines = params.get("lines", "")
+                block = params.get("block", "")
+                inside_block = params.get("inside_block", "")
+            else:
+                lines = ""
+                block = ""
+                inside_block = ""
+
+            code_block = get_substitute(
+                page, title, filename, lines, block, inside_block
+            )
+            # re-indent
+            code_block = re.sub("^", indent, code_block, flags=re.MULTILINE)
+
+            replacement_content += code_block
+        replacements.append(Replacement(ci_block.first_line_index, ci_block.last_line_index, replacement_content))
+    return replacements
+
+
+def get_substitute(page, title, filename, lines, block, inside_block):
+    # Compute the fence header
+    lang_code = get_lang_class(filename)
+    header = lang_code
+    title = title.strip()
+    if len(title) > 0:
+        header += f' tab="{title}"'
+
+    # Select the code content
     page_parent_dir = os.path.dirname(page.file.abs_src_path)
     import_path = os.path.join(page_parent_dir, filename)
-    with open(import_path) as f:
+    # Always use UTF-8, as it is the recommended default for source file encodings.
+    with open(import_path, encoding='UTF-8') as f:
         content = f.read()
 
     selected_content = select(
@@ -45,56 +131,32 @@ def get_substitute(page, title, filename, lines, block, inside_block):
 
     dedented = textwrap.dedent(selected_content)
 
-    return '\n```java tab="' + title + '"\n' + dedented + "\n```\n\n"
-
-
-class CodeIncludePlugin(BasePlugin):
-    def on_page_markdown(self, markdown, page, config, site_navigation=None, **kwargs):
-        "Provide a hook for defining functions from an external module"
-
-        active = False
-        results = ""
-        for line in markdown.splitlines():
-            boundary = False
-
-            # detect end
-            if active and re.match(RE_END, line):
-                active = False
-                boundary = True
-
-            # handle each line of a codeinclude zone
-            if active:
-                snippet_match = re.match(RE_SNIPPET, line)
-                if snippet_match:
-                    title = snippet_match.group("title")
-                    filename = snippet_match.group("filename")
-                    indent = snippet_match.group("leading_space")
-                    raw_params = snippet_match.group("params")
-
-                    if raw_params:
-                        params = dict(token.split(":") for token in shlex.split(raw_params))
-                        lines = params.get("lines", "")
-                        block = params.get("block", "")
-                        inside_block = params.get("inside_block", "")
-                    else:
-                        lines = ""
-                        block = ""
-                        inside_block = ""
-
-                    code_block = get_substitute(
-                        page, title, filename, lines, block, inside_block
-                    )
-                    # re-indent
-                    code_block = re.sub("^", indent, code_block, flags=re.MULTILINE)
-                    results += code_block
-
-            # detect start
-            if re.match(RE_START, line):
-                active = True
-                boundary = True
-
-            # outside a codeinclude zone and ignoring the boundaries
-            if not active and not boundary:
-                results += line + "\n"
-
-        return results
+    return f'''
+```{header}
+{dedented}
+```
+
+'''
+
+
+def substitute(markdown: str, substitutes: List[Replacement]) -> str:
+    substitutes_by_first_line = dict()
+    # Index substitutes by the first line
+    for s in substitutes:
+        substitutes_by_first_line[s.first_line_index] = s
+
+    # Perform substitutions
+    result = ""
+    index = 0
+    lines = markdown.splitlines()
+    while index < len(lines):
+        if index in substitutes_by_first_line.keys():
+            # Replace the codeinclude fragment starting at this line
+            substitute = substitutes_by_first_line[index]
+            result += substitute.content
+            index = substitute.last_line_index
+        else:
+            # Keep the input line
+            result += lines[index] + "\n"
+        index += 1
+    return result
diff --git a/codeinclude/resolver.py b/codeinclude/resolver.py
@@ -43,21 +43,31 @@ def select(
             delim_count -= line.count("}")
 
     if inside_block:
-        i = 0
         delim_count = 0
-        for line in text.splitlines():
+        inside_matching = False
+        for line_number, line in enumerate(text.splitlines(), start=1):
             first_line_of_block = False
-            i = i + 1
+            # Detect the block beginning
             if inside_block in line and delim_count <= 0:
                 delim_count = 0
                 first_line_of_block = True
-                delim_count += line.count("{")
+                inside_matching = True
 
+            # Don't process lines that are outside the matching block
+            if not inside_matching:
+                continue
+
+            # Count the brackets in the line
+            delim_count += line.count("{")
             delim_count -= line.count("}")
 
-            if delim_count > 0 and not first_line_of_block:
-                delim_count += line.count("{")
-                selected_lines.append(i)
+            # If we closed the opening bracket (= dropped below 0), the matching block has ended
+            if delim_count <= 0:
+                inside_matching = False
+
+            # Append the lines inside the matching block, skipping the first matching
+            if inside_matching and not first_line_of_block:
+                selected_lines.append(line_number)
 
     if from_token and to_token:
         i = 0

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1 @@
+-e .
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,3 +7,4 @@ __pycache__/ @@
     .eggs/
     .pytest_cache/
     .DS_Store
+    .venv/