diff --git a/lib/rdoc/generator/darkfish.rb b/lib/rdoc/generator/darkfish.rb index e4e20831f3..558e58c53b 100644 --- a/lib/rdoc/generator/darkfish.rb +++ b/lib/rdoc/generator/darkfish.rb @@ -700,6 +700,12 @@ def template_for file, page = true, klass = ERB template end + # :stopdoc: + ParagraphExcerptRegexpOther = %r[\b\w[^./:]++\.] + # use \p/\P{letter} instead of \w/\W in Unicode + ParagraphExcerptRegexpUnicode = %r[\b\p{letter}[^./:]++\.] + # :startdoc: + # Returns an excerpt of the comment for usage in meta description tags def excerpt(comment) text = case comment @@ -711,14 +717,22 @@ def excerpt(comment) # Match from a capital letter to the first period, discarding any links, so # that we don't end up matching badges in the README - first_paragraph_match = text.match(/[A-Z][^\.:\/]+\./) - return text[0...150].gsub(/\n/, " ").squeeze(" ") unless first_paragraph_match + pattern = ParagraphExcerptRegexpUnicode + begin + first_paragraph_match = text.match(pattern) + rescue Encoding::CompatibilityError + # The doc is non-ASCII text and encoded in other than Unicode base encodings. + raise if pattern == ParagraphExcerptRegexpOther + pattern = ParagraphExcerptRegexpOther + retry + end + return text[0...150].tr_s("\n", " ").squeeze(" ") unless first_paragraph_match extracted_text = first_paragraph_match[0] - second_paragraph = first_paragraph_match.post_match.match(/[A-Z][^\.:\/]+\./) + second_paragraph = text.match(pattern, first_paragraph_match.end(0)) extracted_text << " " << second_paragraph[0] if second_paragraph - extracted_text[0...150].gsub(/\n/, " ").squeeze(" ") + extracted_text[0...150].tr_s("\n", " ").squeeze(" ") end def generate_ancestor_list(ancestors, klass) diff --git a/test/rdoc/test_rdoc_generator_darkfish.rb b/test/rdoc/test_rdoc_generator_darkfish.rb index ed84543ee1..680c663287 100644 --- a/test/rdoc/test_rdoc_generator_darkfish.rb +++ b/test/rdoc/test_rdoc_generator_darkfish.rb @@ -449,6 +449,26 @@ def test_meta_tags_for_rdoc_files ) end + def test_meta_tags_for_markdwon_files_paragraph + top_level = @store.add_file("README.md", parser: RDoc::Parser::Simple) + top_level.comment = <<~MARKDOWN + # Distributed Ruby: dRuby + + dRuby is a distributed object system for Ruby. It allows an object. + MARKDOWN + + @g.generate + + content = File.binread("README_md.html") + assert_include( + content, + "