module RDoc::Text

Methods for manipulating comment text



Maps markup formats to classes that can parse them. If the format is unknown, “rdoc” format is used.


Maps an encoding to a Hash of characters properly transcoded for that encoding.

See also encode_fallback.

Public Class Methods

encode_fallback(character, encoding, fallback) click to toggle source

Transcodes character to encoding with a fallback character.

# File lib/rdoc/text.rb, line 49
def self.encode_fallback character, encoding, fallback
  character.encode(encoding, :fallback => { character => fallback },
                   :undef => :replace, :replace => fallback)

Public Instance Methods

expand_tabs(text) click to toggle source

Expands tab characters in text to eight spaces

# File lib/rdoc/text.rb, line 57
def expand_tabs text
  expanded = []

  text.each_line do |line|
    nil while line.gsub!(/(?:\G|\r)((?:.{8})*?)([^\t\r\n]{0,7})\t/) do
      r = "#{$1}#{$2}#{' ' * (8 - $2.size)}"
      r = RDoc::Encoding.change_encoding r, text.encoding

    expanded << line

flush_left(text) click to toggle source

Flush text left based on the shortest line

# File lib/rdoc/text.rb, line 76
def flush_left text
  indent = 9999

  text.each_line do |line|
    line_indent = line =~ /\S/ || 9999
    indent = line_indent if indent > line_indent

  empty = ''
  empty = RDoc::Encoding.change_encoding empty, text.encoding

  text.gsub(/^ {0,#{indent}}/, empty)
markup(text) click to toggle source

Convert a string in markup format into HTML.

Requires the including class to implement formatter

# File lib/rdoc/text.rb, line 95
def markup text
  if @store.rdoc.options
    locale = @store.rdoc.options.locale
    locale = nil
  if locale
    i18n_text =
    text = i18n_text.translate(locale)
  parse(text).accept formatter
normalize_comment(text) click to toggle source

Strips hashes, expands tabs then flushes text to the left

# File lib/rdoc/text.rb, line 111
def normalize_comment text
  return text if text.empty?

  text = strip_stars    text
  text = strip_hashes   text
  text = expand_tabs    text
  text = flush_left     text
  text = strip_newlines text
parse(text, format = 'rdoc') click to toggle source

Normalizes text then builds a RDoc::Markup::Document from it

# File lib/rdoc/text.rb, line 125
def parse text, format = 'rdoc'
  return text if RDoc::Markup::Document === text
  return text.parse if RDoc::Comment === text

  text = normalize_comment text # TODO remove, should not be necessary

  return if text =~ /\A\n*\z/

  MARKUP_FORMAT[format].parse text
snippet(text, limit = 100) click to toggle source

The first limit characters of text as HTML

# File lib/rdoc/text.rb, line 139
def snippet text, limit = 100
  document = parse text, limit).convert document
strip_hashes(text) click to toggle source

Strips leading # characters from text

# File lib/rdoc/text.rb, line 148
def strip_hashes text
  return text if text =~ /^(?>\s*)[^\#]/

  empty = ''
  empty = RDoc::Encoding.change_encoding empty, text.encoding

  text.gsub(/^\s*(#+)/) { $ '#', ' ' }.gsub(/^\s+$/, empty)
strip_newlines(text) click to toggle source

Strips leading and trailing n characters from text

# File lib/rdoc/text.rb, line 160
def strip_newlines text
  text.gsub(/\A\n*(.*?)\n*\z/m) do $1 end # block preserves String encoding
strip_stars(text) click to toggle source

Strips style comments

# File lib/rdoc/text.rb, line 167
def strip_stars text
  return text unless text =~ %r%/\*.*\*/%m

  encoding = text.encoding

  text = text.gsub %r%Document-method:\s+[\w:.#=!?|^&<>~+\-/*\%@%x`\[\]]+%, ''

  space = ' '
  space = RDoc::Encoding.change_encoding space, encoding if encoding

  text.sub!  %r%/\*+%       do space * $&.length end
  text.sub!  %r%\*+/%       do space * $&.length end
  text.gsub! %r%^[ \t]*\*%m do space * $&.length end

  empty = ''
  empty = RDoc::Encoding.change_encoding empty, encoding if encoding
  text.gsub(/^\s+$/, empty)

# Converts ampersand, dashes, ellipsis, quotes, copyright and registered
# trademark symbols in +text+ to properly encoded characters.

def to_html text
  html = (''.encode text.encoding).dup

  encoded = RDoc::Text::TO_HTML_CHARACTERS[text.encoding]

  s = text
  insquotes = false
  indquotes = false
  after_word = nil

  until s.eos? do
    when s.scan(/<(tt|code)>.*?<\/\1>/) then # skip contents of tt
      html << s.matched.gsub('\\\\', '\\')
    when s.scan(/<(tt|code)>.*?/) then
      warn "mismatched <#{s[1]}> tag" # TODO signal file/line
      html << s.matched
    when s.scan(/<[^>]+\/?s*>/) then # skip HTML tags
      html << s.matched
    when s.scan(/\\(\S)/) then # unhandled suppressed crossref
      html << s[1]
      after_word = nil
    when s.scan(/\.\.\.(\.?)/) then
      html << s[1] << encoded[:ellipsis]
      after_word = nil
    when s.scan(/\(c\)/) then
      html << encoded[:copyright]
      after_word = nil
    when s.scan(/\(r\)/) then
      html << encoded[:trademark]
      after_word = nil
    when s.scan(/---/) then
      html << encoded[:em_dash]
      after_word = nil
    when s.scan(/--/) then
      html << encoded[:en_dash]
      after_word = nil
    when s.scan(/&quot;|"/) then
      html << encoded[indquotes ? :close_dquote : :open_dquote]
      indquotes = !indquotes
      after_word = nil
    when s.scan(/`%x`/) then # backtick double quote
      html << encoded[:open_dquote]
      after_word = nil
    when s.scan(/''/) then # tick double quote
      html << encoded[:close_dquote]
      after_word = nil
    when s.scan(/'/) then # single quote
      if insquotes
        html << encoded[:close_squote]
        insquotes = false
      elsif after_word
        # Mary's dog, my parents' house: do not start paired quotes
        html << encoded[:close_squote]
        html << encoded[:open_squote]
        insquotes = true

      after_word = nil
    else # advance to the next potentially significant character
      match = s.scan(/.+?(?=[<\\.("'`&-])/) #"

      if match then
        html << match
        after_word = match =~ /\w$/
        html <<