1 package com.insanityengine.ghia.util;
2
3 import java.io.*;
4 import java.net.*;
5
6 import java.awt.event.*;
7
8 import com.insanityengine.ghia.events.*;
9
10 /***
11 *
12 * <P>
13 * Takes html and pulls out href and anchors which is sends to listeners...
14 * </P>
15 *
16 * @author BrianHammond
17 *
18 * $Header: /usr/local/cvsroot/ghia/src/java/com/insanityengine/ghia/util/Hreferee.java,v 1.3 2005/03/19 17:50:02 brian Exp $
19 *
20 */
21 public class Hreferee extends ActionEventGenerator implements ActionListener {
22
23 public final static void main( String argv[] ) {
24 Hreferee href = new Hreferee();
25 href.addActionListener( href );
26
27 if ( 0 == argv.length ) {
28 href.ref( System.in );
29 } else {
30 for ( int i = 0 ; i < argv.length ; i++ ) href.ref( argv[ i ] );
31 }
32 }
33
34 public Hreferee() {
35 }
36
37 /***
38 *
39 * Chunk up a stream to produce events in the form of HREF>ANCHOR
40 *
41 * @param href to parse
42 *
43 */
44 public void ref( String href ) {
45 try {
46 ref( new URL( href ) );
47 } catch ( Exception e ) {
48 }
49 }
50
51 /***
52 *
53 * Chunk up a stream to produce events in the form of HREF>ANCHOR
54 *
55 * @param url to parse
56 *
57 */
58 public void ref( URL url ) {
59 try {
60 setUrl( url );
61 ref( url.openStream() );
62 } catch ( Exception e ) {
63 }
64 }
65
66 /***
67 *
68 * Chunk up a stream to produce events in the form of HREF>ANCHOR
69 *
70 * @param in stream
71 *
72 */
73 public void ref( InputStream in ) {
74 _init();
75
76 char c;
77 byte [] bytes = new byte[ 1024 ];
78 int i;
79 int len = 0;
80 StringBuffer buf = new StringBuffer();
81
82 try {
83 while ( -1 != ( len = in.read( bytes ) ) ) {
84 for ( i = 0 ; i < len ; ++i ) {
85
86 c = ( char ) bytes[ i ];
87 if ( '\n' != c && '\r' != c ) {
88
89
90 if ( '<' == c ) {
91 ref( buf );
92 buf = new StringBuffer();
93 }
94 buf.append( c );
95 }
96 }
97 }
98 } catch( Exception e ) {
99 }
100 ref( buf );
101 }
102
103 /***
104 *
105 * For ActionListener interface
106 *
107 * @param event to act on
108 *
109 */
110 public void actionPerformed( ActionEvent event ) {
111 System.out.println( event.getActionCommand() );
112 }
113
114 /***
115 *
116 * Set the value
117 *
118 * @param newValue to use
119 *
120 */
121 URL getUrl() { return url; }
122
123 /***
124 *
125 * Set the value
126 *
127 * @param newValue to use
128 *
129 */
130 void setUrl( URL newValue ) { url = newValue; }
131
132 /***
133 *
134 *
135 */
136 private void ref( StringBuffer buf ) {
137 String lower = lowerIt( buf, 10 );
138
139 if ( !inScript ) {
140 inScript = ( 0 != lower.indexOf( "</script" ) );
141 } else {
142 inScript = ( 0 == lower.indexOf( "<script" ) );
143 }
144
145 if ( !inScript && !inStyle ) refA( buf );
146 }
147
148 /***
149 *
150 *
151 */
152 private void refA( StringBuffer buf ) {
153 if ( 0 == lowerIt( buf, 2 ).indexOf( "<a" ) ) {
154 fireActionPerformed(
155 absUrl( href( buf ) ) + ">" + anchor( buf )
156 );
157 }
158 }
159
160 private String absUrl( String href ) {
161 if ( null != getUrl() ) {
162 try {
163 URL url = new URL( getUrl(), href );
164 href = url.toExternalForm();
165 } catch ( Exception e ) {
166 }
167 }
168 return href;
169 }
170
171 /***
172 *
173 *
174 */
175 private static String href( StringBuffer buf ) {
176
177 int hrefIdx = lowerIt( buf ).indexOf( "href" );
178 if ( -1 == hrefIdx ) return "";
179
180 String notIn = " =\"'//>";
181
182 for ( hrefIdx += 4 ; hrefIdx < buf.length() ; hrefIdx++ ) {
183 if ( -1 == notIn.indexOf( buf.charAt( hrefIdx ) ) ) {
184 break;
185 }
186 }
187
188 int space = buf.substring( hrefIdx ).indexOf( " " );
189 int close = buf.indexOf( ">" );
190
191 if ( -1 != space && space < close ) close = space;
192
193 if ( -1 == close ) {
194 close = buf.length();
195 } else {
196 for ( close-- ; close > hrefIdx ; close-- ) {
197 if ( -1 == notIn.indexOf( buf.charAt( close ) ) ) {
198 close ++;
199 break;
200 }
201 }
202 }
203
204 return buf.substring( hrefIdx, close );
205 }
206
207 /***
208 *
209 *
210 */
211 private static String anchor( StringBuffer buf ) {
212 return buf.substring( 1 + buf.indexOf( ">" ) );
213 }
214
215
216 /***
217 *
218 *
219 */
220 private final static String lowerIt( StringBuffer buf, int n ) {
221 int idx = buf.length();
222 if ( idx > n ) idx = n;
223 return buf.substring( 0, idx ).toLowerCase();
224 }
225
226 /***
227 *
228 *
229 */
230 private final static String lowerIt( StringBuffer buf ) {
231 return buf.toString().toLowerCase();
232 }
233 /***
234 *
235 *
236 */
237 private void _init() {
238 inScript = inStyle = false;
239 }
240
241
242
243 private boolean inScript, inStyle;
244 private URL url = null;
245
246 };
247
248 /***
249 *
250 * $Log: Hreferee.java,v $
251 * Revision 1.3 2005/03/19 17:50:02 brian
252 * repackaging
253 *
254 * Revision 1.2 2004/09/23 02:14:02 brian
255 * relative to absolute url fix
256 *
257 * Revision 1.1 2004/09/23 01:03:45 brian
258 * takes html and pulls out href and anchors which is sends to listeners...
259 *
260 *
261 */