MarkupText


1   package hudson;
2   
3   import java.util.ArrayList  ;
4   import java.util.Collections  ;
5   import java.util.List  ;
6   import java.util.regex.Matcher  ;
7   import java.util.regex.Pattern  ;
8   
9   /**
10   * Mutable representation of string with HTML mark up.
11   *
12   * <p>
13   * This class is used to put mark up on plain text.
14   * See <a HREF="https://hudson.dev.java.net/source/browse/hudson/hudson/main/core/src/test/java/hudson/MarkupTextTest.java?view=markup">
15   * the test code</a> for a typical usage and its result.
16   *
17   * @author Kohsuke Kawaguchi
18   * @since 1.70
19   */
20  public class MarkupText {
21      private final String   text;
22  
23      /**
24       * Added mark up tags.
25       */
26      private final List  <Tag> tags = new ArrayList  <Tag>();
27  
28      /**
29       * Represents one mark up inserted into text.
30       */
31      private static final class Tag implements Comparable  <Tag> {
32          private final int pos;
33          private final String   markup;
34  
35          public Tag(int pos, String   markup) {
36              this.pos = pos;
37              this.markup = markup;
38          }
39  
40          public int compareTo(Tag that) {
41              return this.pos-that.pos;
42          }
43      }
44  
45      /**
46       * Represents a substring of a {@link MarkupText}.
47       */
48      public final class SubText {
49          private final int start,end;
50          private final int[] groups;
51  
52          public SubText(Matcher   m) {
53              start = m.start();
54              end   = m.end();
55  
56              int cnt = m.groupCount();
57              groups = new int[cnt*2];
58              for( int i=0; i<cnt; i++ ) {
59                  groups[i*2  ] = m.start(i+1);
60                  groups[i*2+1] = m.end(i+1);
61              }
62          }
63  
64          /**
65           * Surrounds this subext with the specifid start tag and the end tag.
66           *
67           * <p>
68           * Start/end tag text can contain special tokens "$0", "$1", ...
69           * and they will be replaced by their {@link #group(int) group match}.
70           * "\\c" can be used to escape characters. 
71           */
72          public void surroundWith(String   startTag, String   endTag) {
73              addMarkup(start,end,replace(startTag),replace(endTag));
74          }
75  
76          /**
77           * Gets the start index of the captured group within {@link MarkupText#getText()}.
78           *
79           * @param groupIndex
80           *      0 means the start of the whole subtext. 1, 2, ... are
81           *      groups captured by '(...)' in the regexp.
82           */
83          public int start(int groupIndex) {
84              if(groupIndex==0)    return start;
85              return groups[groupIndex*2-2];
86          }
87  
88          /**
89           * Gets the start index of this subtext within {@link MarkupText#getText()}.
90           */
91          public int start() {
92              return start;
93          }
94  
95          /**
96           * Gets the end index of the captured group within {@link MarkupText#getText()}.
97           */
98          public int end(int groupIndex) {
99              if(groupIndex==0)    return end;
100             return groups[groupIndex*2-1];
101         }
102 
103         /**
104          * Gets the end index of this subtext within {@link MarkupText#getText()}.
105          */
106         public int end() {
107             return end;
108         }
109 
110         /**
111          * Gets the text that represents the captured group.
112          */
113         public String   group(int groupIndex) {
114             if(start(groupIndex)==-1)
115                 return null;
116             return text.substring(start(groupIndex),end(groupIndex));
117         }
118 
119         /**
120          * Replaces the group tokens like "$0", "$1", and etc with their actual matches.
121          */
122         private String   replace(String   s) {
123             StringBuffer   buf = new StringBuffer  ();
124 
125             for( int i=0; i<s.length(); i++) {
126                 char ch = s.charAt(i);
127                 if (ch == '\\') {// escape char
128                     i++;
129                     buf.append(s.charAt(i));
130                 } else if (ch == '$') {// replace by group
131                     i++;
132 
133                     // get the group number
134                     int groupId = s.charAt(i) - '0';
135 
136                     // add the group text
137                     String   group = group(groupId);
138                     if (group != null)
139                         buf.append(group);
140                 } else {
141                     // other chars
142                     buf.append(ch);
143                 }
144             }
145 
146             return buf.toString();
147         }
148     }
149 
150     public MarkupText(String   text) {
151         this.text = text;
152     }
153 
154     /**
155      * Returns the plain text portion of this {@link MarkupText} without
156      * any markup.
157      */
158     public String   getText() {
159         return text;
160     }
161 
162     /**
163      * Adds a start tag and end tag at the specified position.
164      *
165      * <p>
166      * For example, if the text was "abc", then <tt>addMarkup(1,2,"&lt;b>","&lt;/b>")</tt>
167      * would generate <tt>"a&lt;b>b&lt;/b>c"</tt>
168      */
169     public void addMarkup( int startPos, int endPos, String   startTag, String   endTag ) {
170         rangeCheck(startPos);
171         rangeCheck(endPos);
172         if(startPos>endPos) throw new IndexOutOfBoundsException  ();
173 
174         // when multiple tags are added to the same range, we want them to show up like
175         // <b><i>abc</i></b>, not <b><i>abc</b></i>. Do this by inserting them to different
176         // places.
177         tags.add(0,new Tag(startPos, startTag));
178         tags.add(new Tag(endPos,endTag));
179     }
180 
181     private void rangeCheck(int pos) {
182         if(pos<0 || pos>text.length())
183             throw new IndexOutOfBoundsException  ();
184     }
185 
186     /**
187      * Returns the fully marked-up text.
188      */
189     public String   toString() {
190         if(tags.isEmpty())
191             return text;    // the most common case
192 
193         // somewhat inefficient implementation, if there are a lot of mark up and text is large.
194         Collections.sort(tags);
195         StringBuilder   buf = new StringBuilder  ();
196         buf.append(text);
197         int offset = 0;     // remember the # of chars inserted.
198         for (Tag tag : tags) {
199             buf.insert(tag.pos+offset,tag.markup);
200             offset += tag.markup.length();
201         }
202 
203         return buf.toString();
204     }
205 
206     /**
207      * Find all "tokens" that match the given pattern in this text.
208      *
209      * <p>
210      * A token is like a substring, except that it's aware of word boundaries.
211      * For example, while "bc" is a string of "abc", calling {@code findTokens}
212      * with "bc" as a pattern on string "abc" won't match anything.
213      *
214      * <p>
215      * This method is convenient for finding keywords that follow a certain syntax
216      * from natural text. You can then use {@link SubText#surroundWith(String,String)}
217      * to put mark up around such text.
218      */
219     public List  <SubText> findTokens(Pattern   pattern) {
220         Matcher   m = pattern.matcher(text);
221         List  <SubText> r = new ArrayList  <SubText>();
222 
223         while(m.find()) {
224             int idx = m.start();
225             if(idx>0) {
226                 char ch = text.charAt(idx-1);
227                 if(Character.isLetter(ch) || Character.isDigit(ch))
228                     continue;   // not at a word boundary
229             }
230             idx = m.end();
231             if(idx<text.length()) {
232                 char ch = text.charAt(idx);
233                 if(Character.isLetter(ch) || Character.isDigit(ch))
234                     continue;   // not at a word boundary
235             }
236             r.add(new SubText(m));
237         }
238 
239         return r;
240     }
241 }
242
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags