View Javadoc

1   package com.insanityengine.ghia.util;
2   
3   import java.io.*;
4   import java.net.*;
5   
6   import java.awt.event.*;
7   
8   import com.insanityengine.ghia.events.*;
9   
10  /***
11   *
12   * <P>
13   * Takes html and pulls out href and anchors which is sends to listeners...
14   * </P>
15   *
16   * @author BrianHammond
17   *
18   * $Header: /usr/local/cvsroot/ghia/src/java/com/insanityengine/ghia/util/Hreferee.java,v 1.3 2005/03/19 17:50:02 brian Exp $
19   *
20   */
21  public class Hreferee extends ActionEventGenerator implements ActionListener {
22  
23  	public final static void main( String argv[] ) {
24  		Hreferee href = new Hreferee();
25  		href.addActionListener( href );
26  
27  		if ( 0 == argv.length ) {
28  			href.ref( System.in );
29  		} else {
30  			for ( int i = 0 ; i < argv.length ; i++ ) href.ref( argv[ i ] );
31  		}
32  	}
33  
34  	public Hreferee() {
35  	}
36  
37  	/***
38  	 *
39  	 * Chunk up a stream to produce events in the form of HREF&gt;ANCHOR
40  	 *
41  	 * @param href to parse
42  	 *
43  	 */
44  	public void ref( String href ) {
45  		try {
46  			ref( new URL( href ) );
47  		} catch ( Exception e ) {
48  		}
49  	}
50  	
51  	/***
52  	 *
53  	 * Chunk up a stream to produce events in the form of HREF&gt;ANCHOR
54  	 *
55  	 * @param url to parse
56  	 *
57  	 */
58  	public void ref( URL url ) {
59  		try {
60  			setUrl( url );
61  			ref( url.openStream() );
62  		} catch ( Exception e ) {
63  		}
64  	}
65  
66  	/***
67  	 *
68  	 * Chunk up a stream to produce events in the form of HREF&gt;ANCHOR
69  	 *
70  	 * @param in stream
71  	 *
72  	 */
73  	public void ref( InputStream in ) {
74  		_init();
75  		
76  		char c;
77  		byte [] bytes = new byte[ 1024 ];
78  		int i;
79  		int len = 0;
80  		StringBuffer buf = new StringBuffer();
81  		
82  		try {
83  			while ( -1 != ( len = in.read( bytes ) ) ) {
84  				for ( i = 0 ; i < len ; ++i ) {
85  					
86  					c = ( char ) bytes[ i ];
87  					if ( '\n' != c && '\r' != c ) {
88  					
89  						
90  						if ( '<' == c ) {
91  							ref( buf );
92  							buf = new StringBuffer();
93  						} 
94  						buf.append( c );
95  					}
96  				}
97  			}
98  		} catch( Exception e ) {
99  		}
100 		ref( buf );
101 	}
102 
103 	/***
104 	 *
105 	 * For ActionListener interface
106 	 *
107 	 * @param event to act on
108 	 *
109 	 */
110 	public void actionPerformed( ActionEvent event ) {
111 		System.out.println( event.getActionCommand() );
112 	}
113 
114 	/***
115 	  *
116 	  * Set the value
117 	  *
118 	  * @param newValue to use
119 	  *
120 	  */
121 	URL getUrl() { return url; }
122 
123 	/***
124 	  *
125 	  * Set the value
126 	  *
127 	  * @param newValue to use
128 	  *
129 	  */
130 	void setUrl( URL newValue ) { url = newValue; }
131 
132 	/***
133 	 *
134 	 *
135 	 */
136 	private void ref( StringBuffer buf ) {
137 		String lower = lowerIt( buf, 10 );
138 
139 		if ( !inScript ) {
140 			inScript = ( 0 != lower.indexOf( "</script" ) );
141 		} else {
142 			inScript = ( 0 == lower.indexOf( "<script" ) );
143 		}
144 		
145 		if ( !inScript && !inStyle ) refA( buf );
146 	}
147 
148 	/***
149 	 *
150 	 *
151 	 */
152 	private void refA( StringBuffer buf ) {
153 		if ( 0 == lowerIt( buf, 2 ).indexOf( "<a" ) ) {
154 			fireActionPerformed( 
155 				absUrl( href( buf ) ) + ">" + anchor( buf )
156 			);
157 		}
158 	}
159 
160 	private String absUrl( String href ) {
161 		if ( null != getUrl() ) {
162 			try {
163 				URL url = new URL( getUrl(), href );
164 				href = url.toExternalForm();
165 			} catch ( Exception e ) {
166 			}
167 		}
168 		return href;
169 	}
170 
171 	/***
172 	 *
173 	 *
174 	 */
175 	private static String href( StringBuffer buf ) {
176 		
177 		int hrefIdx = lowerIt( buf ).indexOf( "href" );
178 		if ( -1 == hrefIdx ) return "";
179 
180 		String notIn = " =\"'//>";
181 
182 		for ( hrefIdx += 4 ; hrefIdx < buf.length() ; hrefIdx++ ) {
183 			if ( -1 == notIn.indexOf( buf.charAt( hrefIdx ) ) ) {
184 				break;	
185 			}
186 		}
187 
188 		int space = buf.substring( hrefIdx ).indexOf( " " );
189 		int close = buf.indexOf( ">" );
190 
191 		if ( -1 != space && space < close ) close = space;
192 		
193 		if ( -1 == close ) {
194 			close = buf.length();
195 		} else {
196 			for ( close-- ; close > hrefIdx ; close-- ) {
197 				if ( -1 == notIn.indexOf( buf.charAt( close ) ) ) {
198 					close ++;
199 					break;
200 				}
201 			}
202 		}
203 		
204 		return buf.substring( hrefIdx, close );
205 	}
206 
207 	/***
208 	 *
209 	 *
210 	 */
211 	private static String anchor( StringBuffer buf ) {
212 		return buf.substring( 1 + buf.indexOf( ">" ) );
213 	}
214 
215 
216 	/***
217 	 *
218 	 *
219 	 */
220 	private final static String lowerIt( StringBuffer buf, int n ) {
221 		int idx = buf.length();
222 		if ( idx > n ) idx = n;
223 		return buf.substring( 0, idx ).toLowerCase();
224 	}
225 
226 	/***
227 	 *
228 	 *
229 	 */
230 	private final static String lowerIt( StringBuffer buf ) {
231 		return buf.toString().toLowerCase();
232 	}
233 	/***
234 	 *
235 	 *
236 	 */
237 	private void _init() {
238 		inScript = inStyle = false;
239 	}
240 
241 	////
242 
243 	private boolean inScript, inStyle;
244 	private URL url = null;
245 	
246 };
247 
248 /***
249  *
250  * $Log: Hreferee.java,v $
251  * Revision 1.3  2005/03/19 17:50:02  brian
252  * repackaging
253  *
254  * Revision 1.2  2004/09/23 02:14:02  brian
255  * relative to absolute url fix
256  *
257  * Revision 1.1  2004/09/23 01:03:45  brian
258  * takes html and pulls out href and anchors which is sends to listeners...
259  *
260  *
261  */