Skip to content

Commit 3351d5b

Browse files
committed
Relax paragraph pattern
Fix #1298 Not all paragraphs in documentations start with a capital letter, as usual English text.
1 parent 0293887 commit 3351d5b

File tree

2 files changed

+36
-3
lines changed

2 files changed

+36
-3
lines changed

lib/rdoc/generator/darkfish.rb

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -700,7 +700,11 @@ def template_for file, page = true, klass = ERB
700700
template
701701
end
702702

703-
ParagraphExcerptRegexp = /[A-Z][^\.:\/]+\./
703+
# :stopdoc:
704+
ParagraphExcerptRegexpOther = %r[\b\w[^./:]++\.]
705+
# use \p/\P{letter} instead of \w/\W in Unicode
706+
ParagraphExcerptRegexpUnicode = %r[\b\p{letter}[^./:]++\.]
707+
# :startdoc:
704708

705709
# Returns an excerpt of the comment for usage in meta description tags
706710
def excerpt(comment)
@@ -713,11 +717,19 @@ def excerpt(comment)
713717

714718
# Match from a capital letter to the first period, discarding any links, so
715719
# that we don't end up matching badges in the README
716-
first_paragraph_match = text.match(ParagraphExcerptRegexp)
720+
pattern = ParagraphExcerptRegexpUnicode
721+
begin
722+
first_paragraph_match = text.match(pattern)
723+
rescue Encoding::CompatibilityError
724+
# The doc is non-ASCII text and encoded in other than Unicode base encodings.
725+
raise unless pattern.eaual?(ParagraphExcerptRegexpUnicode)
726+
pattern = ParagraphExcerptRegexpOther
727+
retry
728+
end
717729
return text[0...150].tr_s("\n", " ").squeeze(" ") unless first_paragraph_match
718730

719731
extracted_text = first_paragraph_match[0]
720-
second_paragraph = first_paragraph_match.post_match.match(ParagraphExcerptRegexp)
732+
second_paragraph = text.match(pattern, first_paragraph_match.end(0))
721733
extracted_text << " " << second_paragraph[0] if second_paragraph
722734

723735
extracted_text[0...150].tr_s("\n", " ").squeeze(" ")

test/rdoc/test_rdoc_generator_darkfish.rb

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,27 @@ def test_meta_tags_for_rdoc_files
449449
)
450450
end
451451

452+
def test_meta_tags_for_markdwon_files_paragraph
453+
top_level = @store.add_file("README.md", parser: RDoc::Parser::Simple)
454+
top_level.comment = <<~MARKDOWN
455+
# Distributed Ruby: dRuby
456+
457+
dRuby is a distributed object system for Ruby. It allows an object in one
458+
Ruby process to invoke methods on an object in another Ruby process.
459+
MARKDOWN
460+
461+
@g.generate
462+
463+
content = File.binread("README_md.html")
464+
assert_include(
465+
content,
466+
"<meta name=\"description\" content=\"" \
467+
"README: dRuby " \
468+
"dRuby is a distributed object system for Ruby. " \
469+
"It allows an object in one Ruby process to invoke methods on an object"
470+
)
471+
end
472+
452473
def test_meta_tags_for_markdown_files
453474
top_level = @store.add_file("MyPage.md", parser: RDoc::Parser::Markdown)
454475
top_level.comment = <<~MARKDOWN

0 commit comments

Comments
 (0)