package pSpider;

/**
 * The class that implements the Runnable interface is the one
 * that defines what is to be done by each 'thread' running concurrently.
 * In our case, grabbing pages from the 'work' queue and processing them.
 * 
 * @author shoop
 *
 */
public class ParallelSpider implements Runnable {
	private String beginningURL = null;
	/**
	 * Helps download and parse the web pages.
	 */
	private HttpHelper helper = new HttpHelper();
	/**
	 * Maximum number of urls that should be scraped.
	 */
	private int maxUrls = 3;   // you can experiment with this value
	
	private int urlCount = 0;


	// To continue the 'pattern' from RunThreadedSpider, have this shared data
	// be passed into the constructor from the single place where it was originally created.
	private SharedSpiderData sharedData;
	/**
	 * Create a new spider with access to the shared data by passing it a reference.
	 * This constructor could be called many times.
	 * @param data Thread-safe data structure for this program.
	 */
	public ParallelSpider(SharedSpiderData data) {
		sharedData = data;
	}
	/**
	 * Create a new spider with access to the shared data by passing it a reference.
	 * Also give it the starting point to begin scraping.  This constructor should be called
	 * once.
	 * @param data Thread-safe data structure for this program.
	 * @param startURL Initial URL for crawling.
	 */
	public ParallelSpider(SharedSpiderData data, String startURL) {
		sharedData = data;
		beginningURL = startURL;
	}
	
	/**
	 * The method that is executed when you 'start()' a thread with this class.
	 * Thus, the thread behavior is here in this run method.
	 */
	public void run() {
		// To get things started, we need one thread to put the starting point
		// URL onto the work queue.
		if (beginningURL != null) {  
			try {
				sharedData.getWork().put(beginningURL);
			} catch (InterruptedException e) {
				// catch errors that can occur from the 'put' to the shared queue
				System.out.println("Error putting data into work queue");
				e.printStackTrace();
			}
		}
		
		while (urlCount <= maxUrls) {  // each thread does a certain amount of 'work'
			String url;

				// you make this work: grab from the work queue and process the page

			urlCount++;    
		}
		System.err.println("ParallelSpider done with URLs");
	}
		
	/**
	 * Retrieves content from a url and processes that content. 
	 * @param url A URL to process.
	 */
	public void processPage(String url) {
		String html = helper.retrieve(url);
		for (String url2 : helper.extractLinks(url, html)) {
			System.out.println("next URL on " + url + ":" + url2);
			if (!helper.isImage(url2)) {
				// you make this work: process the page here

			}
		}
	}


}