import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.openqa.selenium.By;
import org.openqa.selenium.Keys;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;
import static selenium.practice.BdJobsQARelatdJobsHtmlUnitDriver.m;
public class Getallurlsfromwebsite {
static WebDriver driver;
static List<WebElement> mainurl, suburl;
static List<String> uniqueurl;
public static void main(String[] a) throws InterruptedException {
// Initialize Firefox driver
driver = new FirefoxDriver();
//Maximize browser window
driver.manage().window().maximize();
String websiteaddress = "http://www.google.com";
//Go to website
uniqueurl = new ArrayList();
driver.get(websiteaddress);
driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
//get all url of page
mainurl = driver.findElements(By.tagName("a"));
for (int k = 0; k < mainurl.size(); k++) {
// check url not null, empty and only current website
if (!(mainurl.get(k).getText().trim().equals("")) && !(mainurl.get(k).getText().trim() == null) && !(mainurl.get(k).getAttribute("href").trim() == null) && !(mainurl.get(k).getAttribute("href").trim().equals("")) && (mainurl.get(k).getAttribute("href").contains("google.com")) && !(mainurl.get(k).getAttribute("href").contains("@"))) {
if (!(uniqueurl.contains(mainurl.get(k).getAttribute("href").trim()))) {
uniqueurl.add(mainurl.get(k).getAttribute("href").trim());
//Print urls
System.out.println(mainurl.get(k).getAttribute("href"));
// Open new tab
driver.findElement(By.cssSelector("body")).sendKeys(Keys.CONTROL + "t");
driver.get(mainurl.get(k).getAttribute("href").trim());
driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
// get all sub page url
suburl = driver.findElements(By.tagName("a"));
/get all sub page url
for (int m = 0; m < suburl.size(); m++) {
if (!(suburl.get(m).getText().trim().equals("")) && !(suburl.get(m).getText().trim() == null) && !(suburl.get(m).getAttribute("href").equals("")) && !(suburl.get(m).getAttribute("href") == null)) {
if (!(uniqueurl.contains(suburl.get(m).getAttribute("href").trim()))) {
uniqueurl.add(suburl.get(k).getAttribute("href").trim());
//Print urls
System.out.println(suburl.get(m).getAttribute("href"));
}
}
}
// close open tab
driver.findElement(By.cssSelector("body")).sendKeys(Keys.CONTROL + "w");
// Move default window
driver.findElement(By.cssSelector("body")).sendKeys(Keys.CONTROL + "\t");
driver.switchTo().defaultContent();
Thread.sleep(7000);
}
}
}
driver.quit();
}
}
How to get all valid urls from a website in selenium webdriver using java.
Subscribe to:
Post Comments (Atom)
import static selenium.practice.BdJobsQARelatdJobsHtmlUnitDriver.m; --> This driver is giving error.
ReplyDeleteProgram opens homepage and terminates. Please check once. Whole code is not working. Its not going to sub pages.