View Javadoc

1   package fit;
2   
3   // Copyright (c) 2002 Cunningham & Cunningham, Inc.
4   // Released under the terms of the GNU General Public License version 2 or later.
5   
6   import java.io.*;
7   import java.text.ParseException;
8   
9   public class Parse {
10  
11      public String leader;
12      public String tag;
13      public String body;
14      public String end;
15      public String trailer;
16  
17      public Parse more;
18      public Parse parts;
19  
20      public Parse (String tag, String body, Parse parts, Parse more) {
21          this.leader = "\n";
22          this.tag = "<"+tag+">";
23          this.body = body;
24          this.end = "</"+tag+">";
25          this.trailer = "";
26          this.parts = parts;
27          this.more = more;
28      }
29  
30      public static String tags[] = {"table", "tr", "td"};
31  
32      public Parse (String text) throws ParseException {
33          this (text, tags, 0, 0);
34      }
35  
36      public Parse (String text, String tags[]) throws ParseException {
37          this (text, tags, 0, 0);
38      }
39  
40      public Parse (String text, String tags[], int level, int offset) throws ParseException {
41          String lc = text.toLowerCase();
42          int startTag = lc.indexOf("<"+tags[level]);
43          int endTag = lc.indexOf(">", startTag) + 1;
44  //        int startEnd = lc.indexOf("</"+tags[level], endTag);
45  		int startEnd = findMatchingEndTag(lc, endTag, tags[level], offset);
46          int endEnd = lc.indexOf(">", startEnd) + 1;
47          int startMore = lc.indexOf("<"+tags[level], endEnd);
48          if (startTag<0 || endTag<0 || startEnd<0 || endEnd<0) {
49              throw new ParseException ("Can't find tag: "+tags[level], offset);
50          }
51  
52          leader = text.substring(0,startTag);
53          tag = text.substring(startTag, endTag);
54          body = text.substring(endTag, startEnd);
55          end = text.substring(startEnd,endEnd);
56          trailer = text.substring(endEnd);
57  
58          if (level+1 < tags.length) {
59              parts = new Parse (body, tags, level+1, offset+endTag);
60              body = null;
61          }
62  		else { // Check for nested table
63  			int index = body.indexOf("<" + tags[0]);
64  			if (index >= 0) {
65  				parts = new Parse(body, tags, 0, offset + endTag);
66  				body = "";
67  			}
68  		}
69  
70          if (startMore>=0) {
71              more = new Parse (trailer, tags, level, offset+endEnd);
72              trailer = null;
73          }
74      }
75  
76  	/* Added by Rick Mugridge, Feb 2005 */
77  	protected static int findMatchingEndTag(String lc, int matchFromHere, String tag, int offset) throws ParseException {
78  		int fromHere = matchFromHere;
79  		int count = 1;
80  		int startEnd = 0;
81  		while (count > 0) {
82  			int embeddedTag = lc.indexOf("<" + tag, fromHere);
83  			int embeddedTagEnd = lc.indexOf("</" + tag, fromHere);
84  			// Which one is closer?
85  			if (embeddedTag < 0 && embeddedTagEnd < 0)
86  				throw new ParseException("Can't find tag: " + tag, offset);
87  			if (embeddedTag < 0)
88  				embeddedTag = Integer.MAX_VALUE;
89  			if (embeddedTagEnd < 0)
90  				embeddedTagEnd = Integer.MAX_VALUE;
91  			if (embeddedTag < embeddedTagEnd) {
92  				count++;
93  				startEnd = embeddedTag;
94  				fromHere = lc.indexOf(">", embeddedTag) + 1;
95  			}
96  			else if (embeddedTagEnd < embeddedTag) {
97  				count--;
98  				startEnd = embeddedTagEnd;
99  				fromHere = lc.indexOf(">", embeddedTagEnd) + 1;
100 			}
101 		}
102 		return startEnd;
103 	}
104 
105     public int size() {
106         return more==null ? 1 : more.size()+1;
107     }
108 
109     public Parse last() {
110         return more==null ? this : more.last();
111     }
112 
113     public Parse leaf() {
114         return parts==null ? this : parts.leaf();
115     }
116 
117     public Parse at(int i) {
118         return i==0 || more==null ? this : more.at(i-1);
119     }
120 
121     public Parse at(int i, int j) {
122         return at(i).parts.at(j);
123     }
124 
125     public Parse at(int i, int j, int k) {
126         return at(i,j).parts.at(k);
127     }
128 
129     public String text() {
130     	return htmlToText(body);
131     }
132 
133     public static String htmlToText(String s)
134     {
135 		s = normalizeLineBreaks(s);
136     	s = removeNonBreakTags(s);
137 		s = condenseWhitespace(s);
138 		s = unescape(s);
139     	return s;
140     }
141 
142     private static String removeNonBreakTags(String s) {
143         int i=0, j;
144         while ((i=s.indexOf('<',i))>=0) {
145             if ((j=s.indexOf('>',i+1))>0) {
146                 if (!(s.substring(i, j+1).equals("<br />"))) {
147                 	s = s.substring(0,i) + s.substring(j+1);
148                 } else i++;
149             } else break;
150         }
151         return s;
152     }
153 
154     public static String unescape(String s) {
155     	s = s.replaceAll("<br />", "\n");
156 		s = unescapeEntities(s);
157 		s = unescapeSmartQuotes(s);
158         return s;
159     }
160 
161 	private static String unescapeSmartQuotes(String s) {
162 		s = s.replace('\u201c', '"');
163 		s = s.replace('\u201d', '"');
164 		s = s.replace('\u2018', '\'');
165 		s = s.replace('\u2019', '\'');
166 		return s;
167 	}
168 
169 	private static String unescapeEntities(String s) {
170 		s = s.replaceAll("&lt;", "<");
171 		s = s.replaceAll("&gt;", ">");
172 		s = s.replaceAll("&nbsp;", " ");
173 		s = s.replaceAll("&quot;", "\"");
174 		s = s.replaceAll("&amp;", "&");
175 		return s;
176 	}
177 
178 	private static String normalizeLineBreaks(String s) {
179 		s = s.replaceAll("<\\s*br\\s*/?\\s*>", "<br />");
180 		s = s.replaceAll("<\\s*/\\s*p\\s*>\\s*<\\s*p( .*?)?>", "<br />");
181 		return s;
182 	}
183 
184     public static String condenseWhitespace(String s) {
185     	final char NON_BREAKING_SPACE = (char)160;
186 
187     	s = s.replaceAll("\\s+", " ");
188 		s = s.replace(NON_BREAKING_SPACE, ' ');
189 		s = s.replaceAll("&nbsp;", " ");
190 		s = s.trim();
191     	return s;
192     }
193 
194     public void addToTag(String text) {
195         int last = tag.length()-1;
196         tag = tag.substring(0,last) + text + ">";
197     }
198 
199     public void addToBody(String text) {
200         body = body + text;
201     }
202 
203     public void print(PrintWriter out) {
204         out.print(leader);
205         out.print(tag);
206         if (parts != null) {
207             parts.print(out);
208         } else {
209             out.print(body);
210         }
211         out.print(end);
212         if (more != null) {
213             more.print(out);
214         } else {
215             out.print(trailer);
216         }
217     }
218 
219     public static int footnoteFiles=0;
220     public String footnote () {
221         if (footnoteFiles>=25) {
222             return "[-]";
223         } else {
224             try {
225                 int thisFootnote = ++footnoteFiles;
226                 String html = "footnotes/" + thisFootnote + ".html";
227                 File file = new File("Reports/" + html);
228                 file.delete();
229                 PrintWriter output = new PrintWriter(new BufferedWriter(new FileWriter(file)));
230                 print(output);
231                 output.close();
232                 return "<a href=/fit/Release/Reports/" + html + "> [" + thisFootnote + "]</a>";
233             } catch (IOException e) {
234                 return "[!]";
235             }
236         }
237     }
238 }