1 package fit;
2
3
4
5
6 import java.io.*;
7 import java.text.ParseException;
8
9 public class Parse {
10
11 public String leader;
12 public String tag;
13 public String body;
14 public String end;
15 public String trailer;
16
17 public Parse more;
18 public Parse parts;
19
20 public Parse (String tag, String body, Parse parts, Parse more) {
21 this.leader = "\n";
22 this.tag = "<"+tag+">";
23 this.body = body;
24 this.end = "</"+tag+">";
25 this.trailer = "";
26 this.parts = parts;
27 this.more = more;
28 }
29
30 public static String tags[] = {"table", "tr", "td"};
31
32 public Parse (String text) throws ParseException {
33 this (text, tags, 0, 0);
34 }
35
36 public Parse (String text, String tags[]) throws ParseException {
37 this (text, tags, 0, 0);
38 }
39
40 public Parse (String text, String tags[], int level, int offset) throws ParseException {
41 String lc = text.toLowerCase();
42 int startTag = lc.indexOf("<"+tags[level]);
43 int endTag = lc.indexOf(">", startTag) + 1;
44
45 int startEnd = findMatchingEndTag(lc, endTag, tags[level], offset);
46 int endEnd = lc.indexOf(">", startEnd) + 1;
47 int startMore = lc.indexOf("<"+tags[level], endEnd);
48 if (startTag<0 || endTag<0 || startEnd<0 || endEnd<0) {
49 throw new ParseException ("Can't find tag: "+tags[level], offset);
50 }
51
52 leader = text.substring(0,startTag);
53 tag = text.substring(startTag, endTag);
54 body = text.substring(endTag, startEnd);
55 end = text.substring(startEnd,endEnd);
56 trailer = text.substring(endEnd);
57
58 if (level+1 < tags.length) {
59 parts = new Parse (body, tags, level+1, offset+endTag);
60 body = null;
61 }
62 else {
63 int index = body.indexOf("<" + tags[0]);
64 if (index >= 0) {
65 parts = new Parse(body, tags, 0, offset + endTag);
66 body = "";
67 }
68 }
69
70 if (startMore>=0) {
71 more = new Parse (trailer, tags, level, offset+endEnd);
72 trailer = null;
73 }
74 }
75
76
77 protected static int findMatchingEndTag(String lc, int matchFromHere, String tag, int offset) throws ParseException {
78 int fromHere = matchFromHere;
79 int count = 1;
80 int startEnd = 0;
81 while (count > 0) {
82 int embeddedTag = lc.indexOf("<" + tag, fromHere);
83 int embeddedTagEnd = lc.indexOf("</" + tag, fromHere);
84
85 if (embeddedTag < 0 && embeddedTagEnd < 0)
86 throw new ParseException("Can't find tag: " + tag, offset);
87 if (embeddedTag < 0)
88 embeddedTag = Integer.MAX_VALUE;
89 if (embeddedTagEnd < 0)
90 embeddedTagEnd = Integer.MAX_VALUE;
91 if (embeddedTag < embeddedTagEnd) {
92 count++;
93 startEnd = embeddedTag;
94 fromHere = lc.indexOf(">", embeddedTag) + 1;
95 }
96 else if (embeddedTagEnd < embeddedTag) {
97 count--;
98 startEnd = embeddedTagEnd;
99 fromHere = lc.indexOf(">", embeddedTagEnd) + 1;
100 }
101 }
102 return startEnd;
103 }
104
105 public int size() {
106 return more==null ? 1 : more.size()+1;
107 }
108
109 public Parse last() {
110 return more==null ? this : more.last();
111 }
112
113 public Parse leaf() {
114 return parts==null ? this : parts.leaf();
115 }
116
117 public Parse at(int i) {
118 return i==0 || more==null ? this : more.at(i-1);
119 }
120
121 public Parse at(int i, int j) {
122 return at(i).parts.at(j);
123 }
124
125 public Parse at(int i, int j, int k) {
126 return at(i,j).parts.at(k);
127 }
128
129 public String text() {
130 return htmlToText(body);
131 }
132
133 public static String htmlToText(String s)
134 {
135 s = normalizeLineBreaks(s);
136 s = removeNonBreakTags(s);
137 s = condenseWhitespace(s);
138 s = unescape(s);
139 return s;
140 }
141
142 private static String removeNonBreakTags(String s) {
143 int i=0, j;
144 while ((i=s.indexOf('<',i))>=0) {
145 if ((j=s.indexOf('>',i+1))>0) {
146 if (!(s.substring(i, j+1).equals("<br />"))) {
147 s = s.substring(0,i) + s.substring(j+1);
148 } else i++;
149 } else break;
150 }
151 return s;
152 }
153
154 public static String unescape(String s) {
155 s = s.replaceAll("<br />", "\n");
156 s = unescapeEntities(s);
157 s = unescapeSmartQuotes(s);
158 return s;
159 }
160
161 private static String unescapeSmartQuotes(String s) {
162 s = s.replace('\u201c', '"');
163 s = s.replace('\u201d', '"');
164 s = s.replace('\u2018', '\'');
165 s = s.replace('\u2019', '\'');
166 return s;
167 }
168
169 private static String unescapeEntities(String s) {
170 s = s.replaceAll("<", "<");
171 s = s.replaceAll(">", ">");
172 s = s.replaceAll(" ", " ");
173 s = s.replaceAll(""", "\"");
174 s = s.replaceAll("&", "&");
175 return s;
176 }
177
178 private static String normalizeLineBreaks(String s) {
179 s = s.replaceAll("<\\s*br\\s*/?\\s*>", "<br />");
180 s = s.replaceAll("<\\s*/\\s*p\\s*>\\s*<\\s*p( .*?)?>", "<br />");
181 return s;
182 }
183
184 public static String condenseWhitespace(String s) {
185 final char NON_BREAKING_SPACE = (char)160;
186
187 s = s.replaceAll("\\s+", " ");
188 s = s.replace(NON_BREAKING_SPACE, ' ');
189 s = s.replaceAll(" ", " ");
190 s = s.trim();
191 return s;
192 }
193
194 public void addToTag(String text) {
195 int last = tag.length()-1;
196 tag = tag.substring(0,last) + text + ">";
197 }
198
199 public void addToBody(String text) {
200 body = body + text;
201 }
202
203 public void print(PrintWriter out) {
204 out.print(leader);
205 out.print(tag);
206 if (parts != null) {
207 parts.print(out);
208 } else {
209 out.print(body);
210 }
211 out.print(end);
212 if (more != null) {
213 more.print(out);
214 } else {
215 out.print(trailer);
216 }
217 }
218
219 public static int footnoteFiles=0;
220 public String footnote () {
221 if (footnoteFiles>=25) {
222 return "[-]";
223 } else {
224 try {
225 int thisFootnote = ++footnoteFiles;
226 String html = "footnotes/" + thisFootnote + ".html";
227 File file = new File("Reports/" + html);
228 file.delete();
229 PrintWriter output = new PrintWriter(new BufferedWriter(new FileWriter(file)));
230 print(output);
231 output.close();
232 return "<a href=/fit/Release/Reports/" + html + "> [" + thisFootnote + "]</a>";
233 } catch (IOException e) {
234 return "[!]";
235 }
236 }
237 }
238 }