ruby · nobu · Mar 8, 2025 · Feb 22, 2025 · Feb 27, 2025 · Mar 5, 2025
diff --git a/lib/rdoc/generator/darkfish.rb b/lib/rdoc/generator/darkfish.rb
@@ -700,6 +700,12 @@ def template_for file, page = true, klass = ERB
     template
   end
 
+  # :stopdoc:
+  ParagraphExcerptRegexpOther = %r[\b\w[^./:]++\.]
+  # use \p/\P{letter} instead of \w/\W in Unicode
+  ParagraphExcerptRegexpUnicode = %r[\b\p{letter}[^./:]++\.]
+  # :startdoc:
+
   # Returns an excerpt of the comment for usage in meta description tags
   def excerpt(comment)
     text = case comment
@@ -711,14 +717,22 @@ def excerpt(comment)
 
     # Match from a capital letter to the first period, discarding any links, so
     # that we don't end up matching badges in the README
-    first_paragraph_match = text.match(/[A-Z][^\.:\/]+\./)
-    return text[0...150].gsub(/\n/, " ").squeeze(" ") unless first_paragraph_match
+    pattern = ParagraphExcerptRegexpUnicode
+    begin
+      first_paragraph_match = text.match(pattern)
+    rescue Encoding::CompatibilityError
+      # The doc is non-ASCII text and encoded in other than Unicode base encodings.
+      raise if pattern == ParagraphExcerptRegexpOther
+      pattern = ParagraphExcerptRegexpOther
+      retry
+    end
+    return text[0...150].tr_s("\n", " ").squeeze(" ") unless first_paragraph_match
 
     extracted_text = first_paragraph_match[0]
-    second_paragraph = first_paragraph_match.post_match.match(/[A-Z][^\.:\/]+\./)
+    second_paragraph = text.match(pattern, first_paragraph_match.end(0))
     extracted_text << " " << second_paragraph[0] if second_paragraph
 
-    extracted_text[0...150].gsub(/\n/, " ").squeeze(" ")
+    extracted_text[0...150].tr_s("\n", " ").squeeze(" ")
   end
 
   def generate_ancestor_list(ancestors, klass)

diff --git a/test/rdoc/test_rdoc_generator_darkfish.rb b/test/rdoc/test_rdoc_generator_darkfish.rb
@@ -449,6 +449,26 @@ def test_meta_tags_for_rdoc_files
     )
   end
 
+  def test_meta_tags_for_markdwon_files_paragraph
+    top_level = @store.add_file("README.md", parser: RDoc::Parser::Simple)
+    top_level.comment = <<~MARKDOWN
+      # Distributed Ruby: dRuby
+
+      dRuby is a distributed object system for Ruby.  It allows an object.
+    MARKDOWN
+
+    @g.generate
+
+    content = File.binread("README_md.html")
+    assert_include(
+      content,
+      "<meta name=\"description\" content=\"" \
+      "README: dRuby " \
+      "dRuby is a distributed object system for Ruby. " \
+      "It allows an object."
+    )
+  end
+
   def test_meta_tags_for_markdown_files
     top_level = @store.add_file("MyPage.md", parser: RDoc::Parser::Markdown)
     top_level.comment = <<~MARKDOWN