Skip to content

Commit 6142560

Browse files
committed
Add text_rich()
1 parent eb8a14d commit 6142560

File tree

3 files changed

+52
-10
lines changed

3 files changed

+52
-10
lines changed

README.md

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ Python binding to the rust [rust-html2text](https://github.com/jugglerchris/rust
99
- [Installation](#installation)
1010
- [Usage](#usage)
1111
- [text_markdown()](#1-text_markdown)
12-
- [text_plain()](#2-text_plain)
12+
- [text_plain()](#2-text_plain)
13+
- [text_rich()](#3-text_rich)
1314

1415
## Installation
1516

@@ -29,12 +30,12 @@ def text_markdown(html: str, width: int = 100):
2930
3031
"""
3132
```
32-
*example*
33+
example:
3334
```python
3435
import html2text_rs
3536
import requests
3637

37-
resp = requests.get("https://tmz.com")
38+
resp = requests.get("https://en.wikipedia.org/wiki/AGM-88_HARM")
3839

3940
text_markdown = html2text_rs.text_markdown(resp.text)
4041
print(text_markdown)
@@ -50,13 +51,34 @@ def text_plain(html: str, width: int = 100):
5051
5152
"""
5253
```
53-
*example*
54+
example:
5455
```python
5556
import html2text_rs
5657
import requests
5758

58-
resp = requests.get("https://tmz.com")
59+
resp = requests.get("https://en.wikipedia.org/wiki/AGM-88_HARM")
5960

6061
text_plain = html2text_rs.text_plain(resp.text)
6162
print(text_plain)
6263
```
64+
### 3. text_rich()
65+
```python
66+
def text_rich(html: str, width: int = 100):
67+
"""Convert HTML to rich text.
68+
69+
Args:
70+
html (str): input html text.
71+
width (int): wrap text to width columns. Default is 100.
72+
73+
"""
74+
```
75+
example:
76+
```python
77+
import html2text_rs
78+
import requests
79+
80+
resp = requests.get("https://en.wikipedia.org/wiki/AGM-88_HARM")
81+
82+
text_rich = html2text_rs.text_rich(resp.text)
83+
print(text_rich)
84+
```

src/lib.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
use html2text::{from_read, from_read_with_decorator, render::text_renderer::TrivialDecorator};
1+
use html2text::{
2+
from_read, from_read_with_decorator,
3+
render::text_renderer::{RichDecorator, TrivialDecorator},
4+
};
25
use pyo3::prelude::*;
36

47
/// Convert HTML to markdown text
@@ -19,9 +22,19 @@ fn text_plain(html: String, width: usize, py: Python) -> PyResult<String> {
1922
Ok(text)
2023
}
2124

25+
/// Convert HTML to rich text
26+
#[pyfunction]
27+
#[pyo3(signature=(html, width=100))]
28+
fn text_rich(html: String, width: usize, py: Python) -> PyResult<String> {
29+
let text =
30+
py.allow_threads(|| from_read_with_decorator(html.as_bytes(), width, RichDecorator::new()));
31+
Ok(text)
32+
}
33+
2234
#[pymodule]
2335
fn html2text_rs(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
24-
m.add_function(wrap_pyfunction!(text_plain, m)?)?;
2536
m.add_function(wrap_pyfunction!(text_markdown, m)?)?;
37+
m.add_function(wrap_pyfunction!(text_plain, m)?)?;
38+
m.add_function(wrap_pyfunction!(text_rich, m)?)?;
2639
Ok(())
2740
}

tests/test_html2text_rs.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
import pytest
2-
3-
import html2text_rs
1+
import html2text_rs # type: ignore
42

53

64
def test_text_markdown():
@@ -19,3 +17,12 @@ def test_text_plain():
1917
assert (
2018
result == expected_output
2119
), f"\nExpected:\n {expected_output} \nGot:\n {result}"
20+
21+
22+
def test_text_rich():
23+
html = "<h1>Hello World</h1><p>This is a test.</p>"
24+
expected_output = "# Hello World\n\nThis is a test.\n"
25+
result = html2text_rs.text_rich(html, width=80)
26+
assert (
27+
result == expected_output
28+
), f"\nExpected:\n {expected_output} \nGot:\n {result}"

0 commit comments

Comments
 (0)