Click here to Skip to main content
15,911,786 members
Home / Discussions / Java
   

Java

 
GeneralRe: How to make Swing GUI stable in GridBagLayout Pin
don Moen2-Dec-09 19:57
don Moen2-Dec-09 19:57 
Questionjava effects Pin
ammu2328-Nov-09 5:23
ammu2328-Nov-09 5:23 
AnswerRe: java effects Pin
Richard MacCutchan28-Nov-09 5:52
mveRichard MacCutchan28-Nov-09 5:52 
AnswerRe: java effects Pin
scottgp28-Nov-09 11:09
professionalscottgp28-Nov-09 11:09 
QuestionEmbedding Pentaho Dashboard into my web application in struts 1 Pin
Vivek Vijayan27-Nov-09 17:43
Vivek Vijayan27-Nov-09 17:43 
AnswerRe: Embedding Pentaho Dashboard into my web application in struts 1 Pin
Richard MacCutchan27-Nov-09 22:03
mveRichard MacCutchan27-Nov-09 22:03 
QuestionWrite XML Attribute Pin
toby3127-Nov-09 13:02
toby3127-Nov-09 13:02 
QuestionA Web Crawler code...help Pin
sangeeta200927-Nov-09 2:23
sangeeta200927-Nov-09 2:23 
i have got a code for Web Crawler in java language....
Following is applet code...

import java.applet.Applet;
import java.text.*;
import java.awt.*;
import java.awt.List;
import java.awt.event.*;
import java.util.*;
import java.util.*;
import java.net.*;
import java.io.*;

/* <applet code=WebCrawler height=400 width=400>
</applet>
*/



public class WebCrawler extends Applet implements ActionListener, Runnable {
    public static final String SEARCH = "Search";
    public static final String STOP = "Stop";
    public static final String DISALLOW = "Disallow:";
    public static final int    SEARCH_LIMIT = 50;

    Panel   panelMain;
    List    listMatches;
    Label   labelStatus;

    // URLs to be searched
    Vector vectorToSearch;
    // URLs already searched
    Vector vectorSearched;
    // URLs which match
    Vector vectorMatches;

    Thread searchThread;

    TextField textURL;
    Choice    choiceType;

    public void init() {

	// set up the main UI panel
	panelMain = new Panel();
	panelMain.setLayout(new BorderLayout(5, 5));

	// text entry components
	Panel panelEntry = new Panel();
	panelEntry.setLayout(new BorderLayout(5, 5));

	Panel panelURL = new Panel();
	panelURL.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
	Label labelURL = new Label("Starting URL: ", Label.RIGHT);
	panelURL.add(labelURL);
	textURL = new TextField("", 40);
	panelURL.add(textURL);
	panelEntry.add("North", panelURL);

	Panel panelType = new Panel();
	panelType.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
	Label labelType = new Label("Content type: ", Label.RIGHT);
	panelType.add(labelType);
	choiceType = new Choice();
	choiceType.addItem("text/html");
	choiceType.addItem("audio/basic");
	choiceType.addItem("audio/au");
	choiceType.addItem("audio/aiff");
	choiceType.addItem("audio/wav");
	choiceType.addItem("video/mpeg");
	choiceType.addItem("video/x-avi");
	panelType.add(choiceType);
	panelEntry.add("South", panelType);

	panelMain.add("North", panelEntry);

	// list of result URLs
	Panel panelListButtons = new Panel();
	panelListButtons.setLayout(new BorderLayout(5, 5));

	Panel panelList = new Panel();
	panelList.setLayout(new BorderLayout(5, 5));
	Label labelResults = new Label("Search results");
	panelList.add("North", labelResults);
	Panel panelListCurrent = new Panel();
	panelListCurrent.setLayout(new BorderLayout(5, 5));
	listMatches = new List(10);
	panelListCurrent.add("North", listMatches);
	labelStatus = new Label("");
	panelListCurrent.add("South", labelStatus);
	panelList.add("South", panelListCurrent);

	panelListButtons.add("North", panelList);

	// control buttons
	Panel panelButtons = new Panel();
	Button buttonSearch = new Button(SEARCH);
	buttonSearch.addActionListener(this);
	panelButtons.add(buttonSearch);
	Button buttonStop = new Button(STOP);
	buttonStop.addActionListener(this);
	panelButtons.add(buttonStop);

	panelListButtons.add("South", panelButtons);

	panelMain.add("South", panelListButtons);

	add(panelMain);
	setVisible(true);

	repaint();

	// initialize search data structures
	vectorToSearch = new Vector();
	vectorSearched = new Vector();
	vectorMatches = new Vector();

	// set default for URL access
	URLConnection.setDefaultAllowUserInteraction(false);
    }

    public void start() {
    }

    public void stop() {
	if (searchThread != null) {
	    setStatus("stopping...");
	    searchThread = null;
	}
    }

    public void destroy() {
    }

    boolean robotSafe(URL url) {
	String strHost = url.getHost();

	// form URL of the robots.txt file
	String strRobot = "http://" + strHost + "/robots.txt";
	URL urlRobot;
	try {
	    urlRobot = new URL(strRobot);
	} catch (MalformedURLException e) {
	    // something weird is happening, so don't trust it
	    return false;
	}

	String strCommands;
	try {
	    InputStream urlRobotStream = urlRobot.openStream();

	    // read in entire file
	    byte b[] = new byte[1000];
	    int numRead = urlRobotStream.read(b);
	    strCommands = new String(b, 0, numRead);
	    while (numRead != -1) {
		if (Thread.currentThread() != searchThread)
		    break;
		numRead = urlRobotStream.read(b);
		if (numRead != -1) {
		    String newCommands = new String(b, 0, numRead);
		    strCommands += newCommands;
		}
	    }
	    urlRobotStream.close();
	} catch (IOException e) {
	    // if there is no robots.txt file, it is OK to search
	    return true;
	}

	// assume that this robots.txt refers to us and
	// search for "Disallow:" commands.
	String strURL = url.getFile();
	int index = 0;
	while ((index = strCommands.indexOf(DISALLOW, index)) != -1) {
	    index += DISALLOW.length();
	    String strPath = strCommands.substring(index);
	    StringTokenizer st = new StringTokenizer(strPath);

	    if (!st.hasMoreTokens())
		break;

	    String strBadPath = st.nextToken();

	    // if the URL starts with a disallowed path, it is not safe
	    if (strURL.indexOf(strBadPath) == 0)
		return false;
	}

	return true;
    }

    public void paint(Graphics g) {
      	//Draw a Rectangle around the applet's display area.
      	g.drawRect(0, 0, getSize().width - 1, getSize().height - 1);

	panelMain.paint(g);
	panelMain.paintComponents(g);
	// update(g);
	// panelMain.update(g);
    }

    public void run() {
	String strURL = textURL.getText();
	String strTargetType = choiceType.getSelectedItem();
	int numberSearched = 0;
	int numberFound = 0;

	if (strURL.length() == 0) {
	    setStatus("ERROR: must enter a starting URL");
	    return;
	}

	// initialize search data structures
	vectorToSearch.removeAllElements();
	vectorSearched.removeAllElements();
	vectorMatches.removeAllElements();
	listMatches.removeAll();

	vectorToSearch.addElement(strURL);

	while ((vectorToSearch.size() > 0)
	  && (Thread.currentThread() == searchThread)) {
	    // get the first element from the to be searched list
	    strURL = (String) vectorToSearch.elementAt(0);

	    setStatus("searching " + strURL);

	    URL url;
	    try {
		url = new URL(strURL);
	    } catch (MalformedURLException e) {
		setStatus("ERROR: invalid URL " + strURL);
		break;
	    }

	    // mark the URL as searched (we want this one way or the other)
	    vectorToSearch.removeElementAt(0);
	    vectorSearched.addElement(strURL);

	    // can only search http: protocol URLs
	    if (url.getProtocol().compareTo("http") != 0)
		break;

	    // test to make sure it is before searching
	    if (!robotSafe(url))
		break;

	    try {
		// try opening the URL
		URLConnection urlConnection = url.openConnection();

		urlConnection.setAllowUserInteraction(false);

		InputStream urlStream = url.openStream();
		String type
		  = urlConnection.guessContentTypeFromStream(urlStream);
		if (type == null)
		    break;
		if (type.compareTo("text/html") != 0)
		    break;

		// search the input stream for links
		// first, read in the entire URL
		byte b[] = new byte[1000];
		int numRead = urlStream.read(b);
		String content = new String(b, 0, numRead);
		while (numRead != -1) {
		    if (Thread.currentThread() != searchThread)
			break;
		    numRead = urlStream.read(b);
		    if (numRead != -1) {
			String newContent = new String(b, 0, numRead);
			content += newContent;
		    }
		}
		urlStream.close();

		if (Thread.currentThread() != searchThread)
		    break;

		String lowerCaseContent = content.toLowerCase();

		int index = 0;
		while ((index = lowerCaseContent.indexOf("<a", index)) != -1)
		{
		    if ((index = lowerCaseContent.indexOf("href", index)) == -1)
			break;
		    if ((index = lowerCaseContent.indexOf("=", index)) == -1)
			break;

		    if (Thread.currentThread() != searchThread)
			break;

		    index++;
		    String remaining = content.substring(index);

		    StringTokenizer st
		      = new StringTokenizer(remaining, "\t\n\r\">#");
		    String strLink = st.nextToken();

		    URL urlLink;
		    try {
			urlLink = new URL(url, strLink);
			strLink = urlLink.toString();
		    } catch (MalformedURLException e) {
			setStatus("ERROR: bad URL " + strLink);
			continue;
		    }

		    // only look at http links
		    if (urlLink.getProtocol().compareTo("http") != 0)
			break;

		    if (Thread.currentThread() != searchThread)
			break;

		    try {
			// try opening the URL
			URLConnection urlLinkConnection
			  = urlLink.openConnection();
			urlLinkConnection.setAllowUserInteraction(false);
			InputStream linkStream = urlLink.openStream();
			String strType
			  = urlLinkConnection.guessContentTypeFromStream(linkStream);
			linkStream.close();

			// if another page, add to the end of search list
			if (strType == null)
			    break;
			if (strType.compareTo("text/html") == 0) {
			    // check to see if this URL has already been
			    // searched or is going to be searched
			    if ((!vectorSearched.contains(strLink))
			      && (!vectorToSearch.contains(strLink))) {

				// test to make sure it is robot-safe!
				if (robotSafe(urlLink))
				    vectorToSearch.addElement(strLink);
			    }
			}

			// if the proper type, add it to the results list
			// unless we have already seen it
			if (strType.compareTo(strTargetType) == 0) {
			    if (vectorMatches.contains(strLink) == false) {
				listMatches.add(strLink);
				vectorMatches.addElement(strLink);
				numberFound++;
				if (numberFound >= SEARCH_LIMIT)
				    break;
			    }
			}
		    } catch (IOException e) {
			setStatus("ERROR: couldn't open URL " + strLink);
			continue;
		    }
		}
	    } catch (IOException e) {
		setStatus("ERROR: couldn't open URL " + strURL);
		break;
	    }

	    numberSearched++;
	    if (numberSearched >= SEARCH_LIMIT)
		break;
	}

	if (numberSearched >= SEARCH_LIMIT || numberFound >= SEARCH_LIMIT)
	    setStatus("reached search limit of " + SEARCH_LIMIT);
	else
	    setStatus("done");
	searchThread = null;
	// searchThread.stop();
    }

    void setStatus(String status) {
	labelStatus.setText(status);
    }

    public void actionPerformed(ActionEvent event) {
	String command = event.getActionCommand();

	if (command.compareTo(SEARCH) == 0) {
	    setStatus("searching...");

	    // launch a thread to do the search
	    if (searchThread == null) {
		searchThread = new Thread(this);
	    }
	    searchThread.start();
	}
	else if (command.compareTo(STOP) == 0) {
	    stop();
	}
    }
        public static void main (String argv[])
        {
                Frame f = new Frame("WebFrame");
                WebCrawler applet = new WebCrawler();
		f.add("Center", applet);

/*		Behind a firewall set your proxy and port here!
*/
                Properties props= new Properties(System.getProperties());
                props.put("http.proxySet", "true");
        	props.put("http.proxyHost", "webcache-cup");
        	props.put("http.proxyPort", "8080");

                Properties newprops = new Properties(props);
                System.setProperties(newprops);
/**/


                applet.init();
                applet.start();
                f.pack();
                f.show();
        }

}





But when i run it, and enter any URL it shows Following error in Cmd



Exception in thread "AWT-EventQueue-1" java.lang.IllegalThreadStateExcepti
        at java.lang.Thread.start(Thread.java:571)
        at WebCrawler.actionPerformed(WebCrawler.java:388)
        at java.awt.Button.processActionEvent(Button.java:388)
        at java.awt.Button.processEvent(Button.java:356)
        at java.awt.Component.dispatchEventImpl(Component.java:3955)
        at java.awt.Component.dispatchEvent(Component.java:3803)
        at java.awt.EventQueue.dispatchEvent(EventQueue.java:463)
        at java.awt.EventDispatchThread.pumpOneEventForHierarchy(EventDisp
        at java.awt.EventDispatchThread.pumpEventsForHierarchy(EventDispat
        at java.awt.EventDispatchThread.pumpEvents(EventDispatchThread.jav
        at java.awt.EventDispatchThread.pumpEvents(EventDispatchThread.jav
        at java.awt.EventDispatchThread.run(EventDispatchThread.java:110)




Please look at it...please tell what i need to do in it....

and what i need to edit in this Code of webcrawler



Properties props= new Properties(System.getProperties());
                props.put("http.proxySet", "true");
        	props.put("http.proxyHost", "webcache-cup");
        	props.put("http.proxyPort", "8080");


Please help me out....
Please help.....
AnswerRe: A Web Crawler code...help Pin
Richard MacCutchan27-Nov-09 2:32
mveRichard MacCutchan27-Nov-09 2:32 
GeneralRe: A Web Crawler code...help Pin
sangeeta200927-Nov-09 2:54
sangeeta200927-Nov-09 2:54 
GeneralRe: A Web Crawler code...help Pin
Richard MacCutchan27-Nov-09 3:55
mveRichard MacCutchan27-Nov-09 3:55 
GeneralRe: A Web Crawler code...help Pin
Nagy Vilmos27-Nov-09 4:12
professionalNagy Vilmos27-Nov-09 4:12 
GeneralRe: A Web Crawler code...help Pin
Richard MacCutchan27-Nov-09 5:13
mveRichard MacCutchan27-Nov-09 5:13 
GeneralRe: A Web Crawler code...help Pin
sangeeta200928-Nov-09 1:29
sangeeta200928-Nov-09 1:29 
GeneralRe: A Web Crawler code...help Pin
Richard MacCutchan28-Nov-09 1:36
mveRichard MacCutchan28-Nov-09 1:36 
GeneralRe: A Web Crawler code...help Pin
sangeeta200928-Nov-09 1:54
sangeeta200928-Nov-09 1:54 
GeneralRe: A Web Crawler code...help Pin
Richard MacCutchan28-Nov-09 2:10
mveRichard MacCutchan28-Nov-09 2:10 
Questionrouting table in java Pin
vidzdas27-Nov-09 2:02
vidzdas27-Nov-09 2:02 
AnswerRe: routing table in java Pin
Richard MacCutchan27-Nov-09 2:27
mveRichard MacCutchan27-Nov-09 2:27 
GeneralRe: routing table in java Pin
vidzdas27-Nov-09 2:54
vidzdas27-Nov-09 2:54 
GeneralRe: routing table in java Pin
Richard MacCutchan27-Nov-09 3:01
mveRichard MacCutchan27-Nov-09 3:01 
GeneralRe: routing table in java Pin
vidzdas27-Nov-09 3:23
vidzdas27-Nov-09 3:23 
QuestionSerialize and Deserialize Excel File Pin
2008.sobha26-Nov-09 14:34
2008.sobha26-Nov-09 14:34 
AnswerRe: Serialize and Deserialize Excel File Pin
Richard MacCutchan26-Nov-09 21:52
mveRichard MacCutchan26-Nov-09 21:52 
GeneralRe: Serialize and Deserialize Excel File Pin
Nagy Vilmos26-Nov-09 21:54
professionalNagy Vilmos26-Nov-09 21:54 

General General    News News    Suggestion Suggestion    Question Question    Bug Bug    Answer Answer    Joke Joke    Praise Praise    Rant Rant    Admin Admin   

Use Ctrl+Left/Right to switch messages, Ctrl+Up/Down to switch threads, Ctrl+Shift+Left/Right to switch pages.