Skip to content

enhancements to body parsing #29

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ jobs:

- uses: actions/upload-artifact@v2
with:
name: .mypy-coverage_${{ matrix.platform }}_${{ matrix.python-version }}
path: .mypy-coverage/
name: .coverage.mypy_${{ matrix.platform }}_${{ matrix.python-version }}
path: .coverage.mypy/


pypi:
Expand Down
1 change: 1 addition & 0 deletions orgparse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ def load(path, env=None):
:rtype: :class:`orgparse.node.OrgRootNode`

"""
path = str(path) # in case of pathlib.Path
if isinstance(path, basestring):
orgfile = codecs.open(path, encoding='utf8')
filename = path
Expand Down
116 changes: 116 additions & 0 deletions orgparse/extra.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import re
from typing import List, Sequence, Dict, Iterator, Iterable, Union, Optional


RE_TABLE_SEPARATOR = re.compile(r'\s*\|(\-+\+)*\-+\|')
RE_TABLE_ROW = re.compile(r'\s*\|([^|]+)+\|')
STRIP_CELL_WHITESPACE = True


Row = Sequence[str]

class Table:
def __init__(self, lines: List[str]) -> None:
self._lines = lines

@property
def blocks(self) -> Iterator[Sequence[Row]]:
group: List[Row] = []
first = True
for r in self._pre_rows():
if r is None:
if not first or len(group) > 0:
yield group
first = False
group = []
else:
group.append(r)
if len(group) > 0:
yield group

def __iter__(self) -> Iterator[Row]:
return self.rows

@property
def rows(self) -> Iterator[Row]:
for r in self._pre_rows():
if r is not None:
yield r

def _pre_rows(self) -> Iterator[Optional[Row]]:
for l in self._lines:
if RE_TABLE_SEPARATOR.match(l):
yield None
else:
pr = l.strip().strip('|').split('|')
if STRIP_CELL_WHITESPACE:
pr = [x.strip() for x in pr]
yield pr
# TODO use iparse helper?

@property
def as_dicts(self) -> 'AsDictHelper':
bl = list(self.blocks)
if len(bl) != 2:
raise RuntimeError('Need two-block table to non-ambiguously guess column names')
hrows = bl[0]
if len(hrows) != 1:
raise RuntimeError(f'Need single row heading to guess column names, got: {hrows}')
columns = hrows[0]
assert len(set(columns)) == len(columns), f'Duplicate column names: {columns}'
return AsDictHelper(
columns=columns,
rows=bl[1],
)


class AsDictHelper:
def __init__(self, columns: Sequence[str], rows: Sequence[Row]) -> None:
self.columns = columns
self._rows = rows

def __iter__(self) -> Iterator[Dict[str, str]]:
for x in self._rows:
yield {k: v for k, v in zip(self.columns, x)}


class Gap:
# todo later, add indices etc
pass


Rich = Union[Table, Gap]
def to_rich_text(text: str) -> Iterator[Rich]:
'''
Convert an org-mode text into a 'rich' text, e.g. tables/lists/etc, interleaved by gaps.
NOTE: you shouldn't rely on the number of items returned by this function,
it might change in the future when more types are supported.

At the moment only tables are supported.
'''
lines = text.splitlines(keepends=True)
group: List[str] = []
last = Gap
def emit() -> Rich:
nonlocal group, last
if last is Gap:
res = Gap()
elif last is Table:
res = Table(group) # type: ignore
else:
raise RuntimeError(f'Unexpected type {last}')
group = []
return res

for line in lines:
if RE_TABLE_ROW.match(line) or RE_TABLE_SEPARATOR.match(line):
cur = Table
else:
cur = Gap # type: ignore
if cur is not last:
if len(group) > 0:
yield emit()
last = cur # type: ignore
group.append(line)
if len(group) > 0:
yield emit()
82 changes: 50 additions & 32 deletions orgparse/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from .date import OrgDate, OrgDateClock, OrgDateRepeatedTask, parse_sdc
from .inline import to_plain_text
from .extra import to_rich_text, Rich
from .utils.py3compat import PY3, unicode


Expand Down Expand Up @@ -353,6 +354,8 @@ class OrgBaseNode(Sequence):
5
"""

_body_lines: List[str] # set by the child classes

def __init__(self, env, index=None) -> None:
"""
Create an :class:`OrgBaseNode` object.
Expand Down Expand Up @@ -703,6 +706,41 @@ def shallow_tags(self) -> Set[str]:
"""
return self._get_tags(inher=False)

@staticmethod
def _get_text(text, format='plain'):
if format == 'plain':
return to_plain_text(text)
elif format == 'raw':
return text
elif format == 'rich':
return to_rich_text(text)
else:
raise ValueError('format={0} is not supported.'.format(format))

def get_body(self, format='plain') -> str:
"""
Return a string of body text.

See also: :meth:`get_heading`.

"""
return self._get_text(
'\n'.join(self._body_lines), format) if self._lines else ''

@property
def body(self) -> str:
"""Alias of ``.get_body(format='plain')``."""
return self.get_body()

@property
def body_rich(self) -> Iterator[Rich]:
r = self.get_body(format='rich')
return cast(Iterator[Rich], r) # meh..

@property
def heading(self) -> str:
raise NotImplementedError

def is_root(self):
"""
Return ``True`` when it is a root node.
Expand Down Expand Up @@ -757,7 +795,15 @@ class OrgRootNode(OrgBaseNode):

"""

# getter
@property
def _body_lines(self) -> List[str]: # type: ignore[override]
# todo hacky..
# for root node, the body is whatever is before the first node
return self._lines

@property
def heading(self) -> str:
return ''

@property
def level(self):
Expand All @@ -766,8 +812,6 @@ def level(self):
def get_parent(self, max_level=None):
return None

# misc

def is_root(self):
return True

Expand Down Expand Up @@ -909,17 +953,6 @@ def _iparse_repeated_tasks(self, ilines: Iterator[str]) -> Iterator[str]:
\[ (?P<date> [^\]]+) \]''',
re.VERBOSE)

# getter

@staticmethod
def _get_text(text, format='plain'):
if format == 'plain':
return to_plain_text(text)
elif format == 'raw':
return text
else:
raise ValueError('format={0} is not supported.'.format(format))

def get_heading(self, format='plain'):
"""
Return a string of head text without tags and TODO keywords.
Expand All @@ -942,26 +975,11 @@ def get_heading(self, format='plain'):
"""
return self._get_text(self._heading, format)

def get_body(self, format='plain'):
"""
Return a string of body text.

See also: :meth:`get_heading`.

"""
return self._get_text(
'\n'.join(self._body_lines), format) if self._lines else ''

@property
def heading(self):
def heading(self) -> str:
"""Alias of ``.get_heading(format='plain')``."""
return self.get_heading()

@property
def body(self):
"""Alias of ``.get_body(format='plain')``."""
return self.get_body()

@property
def level(self):
return self._level
Expand Down Expand Up @@ -1022,7 +1040,7 @@ def todo(self) -> Optional[str]:
"""
return self._todo

def get_property(self, key, val=None):
def get_property(self, key, val=None) -> Optional[PropertyValue]:
"""
Return property named ``key`` if exists or ``val`` otherwise.

Expand All @@ -1036,7 +1054,7 @@ def get_property(self, key, val=None):
return self._properties.get(key, val)

@property
def properties(self):
def properties(self) -> Dict[str, PropertyValue]:
"""
Node properties as a dictionary.

Expand Down
12 changes: 12 additions & 0 deletions orgparse/tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,18 @@ def test_empty_heading() -> None:
assert h.tags == {'sometag'}


def test_root() -> None:
root = loads('''
#+STARTUP: hidestars
Whatever
# comment
* heading 1
'''.strip())
assert len(root.children) == 1
# todo not sure if should strip special comments??
assert root.body.endswith('Whatever\n# comment')


def test_stars():
# https://github.com/karlicoss/orgparse/issues/7#issuecomment-533732660
root = loads("""
Expand Down
89 changes: 89 additions & 0 deletions orgparse/tests/test_rich.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
'''
Tests for rich formatting: tables etc.
'''
from .. import load, loads
from ..extra import Table

import pytest # type: ignore


def test_table() -> None:
root = loads('''
| | | |
| | "heading" | |
| | | |
|-------+-----------+-----|
| reiwf | fef | |
|-------+-----------+-----|
|-------+-----------+-----|
| aba | caba | 123 |
| yeah | | X |

|------------------------+-------|
| when | count |
| datetime | int |
|------------------------+-------|
| | -1 |
| [2020-11-05 Thu 23:44] | |
| [2020-11-06 Fri 01:00] | 1 |
|------------------------+-------|

some irrelevant text

| simple |
|--------|
| value1 |
| value2 |
''')

[gap1, t1, gap2, t2, gap3, t3, gap4] = root.body_rich

t1 = Table(root._lines[1:10])
t2 = Table(root._lines[11:19])
t3 = Table(root._lines[22:26])

assert ilen(t1.blocks) == 4
assert list(t1.blocks)[2] == []
assert ilen(t1.rows) == 6

with pytest.raises(RuntimeError):
list(t1.as_dicts) # not sure what should it be

assert ilen(t2.blocks) == 2
assert ilen(t2.rows) == 5
assert list(t2.rows)[3] == ['[2020-11-05 Thu 23:44]', '']


assert ilen(t3.blocks) == 2
assert list(t3.rows) == [['simple'], ['value1'], ['value2']]
assert t3.as_dicts.columns == ['simple']
assert list(t3.as_dicts) == [{'simple': 'value1'}, {'simple': 'value2'}]


def test_table_2() -> None:
root = loads('''
* item

#+tblname: something
| date | value | comment |
|----------------------+-------+-------------------------------|
| 14.04.17 | 11 | aaaa |
| May 26 2017 08:00 | 12 | what + about + pluses? |
| May 26 09:00 - 10:00 | 13 | time is |

some comment

#+BEGIN_SRC python :var fname="plot.png" :var table=something :results file
fig.savefig(fname)
return fname
#+END_SRC

#+RESULTS:
[[file:plot.png]]
''')
[_, t, _] = root.children[0].body_rich
assert ilen(t.as_dicts) == 3


def ilen(x) -> int:
return len(list(x))
Loading