How to find duplicate contents on website or web page in Selenium Webdriver using java

Duplicate content is content that appears on the website or web page in more than one URL same title. When create one more web page with same title under same content type in famous CMS like Drupal, Joomla and WordPress etc then created pages with same title but pages url are different. For example i create three basic page with title 'About QA' in drupal CMS and created three urls are 'http://localhost/duplicatexontent/about-qa', 'http://localhost/duplicatexontent/about-qa-0' and http://localhost/duplicatexontent/about-qa-1. Sometime migrate one website data like as pages, orders, product to other website by script and create duplicate content. Now I show how to find duplicate content in this tutorial using Selenium Webdriver.


Demo Duplicate Content HTML Page Code
 <!DOCTYPE html>  
 <html>  
 <head>  
 <title>duplicte</title>  
 </head>  
 <body>  
 <div align="center">  
 <table border="1">  
  <thead>   
   <tr>   
       <th>Title</th>   
       <th>Type</th>  
        <th>Author</th>  
      </tr>  
  </thead>  
  <tbody>   
   <tr>  
       <td><a href="http://localhost/duplicatexontent/about-qa">About QA</a> </td>  
       <td>Basic page</td>  
        <td>hiro</td>  
      </tr>   
       <tr>  
        <td><a href="http://localhost/duplicatexontent/about-qa-1">About QA</a> </td>  
        <td>Basic page</td>  
        <td>hiro</td>  
       </tr>        
       <tr>  
        <td><a href="http://localhost/duplicatexontent/code-runner">Code Runner</a> </td>  
        <td>Basic page</td>  
        <td>hiro</td>  
       </tr>        
       <tr>  
        <td> <a href="http://localhost/duplicatexontent/access-denied">ACCESS DENIED</a> </td>  
        <td>Basic page</td>  
        <td>hiro</td>  
       </tr>        
       <tr>  
        <td><a href="http://localhost/duplicatexontent/circulation">Circulation</a> </td>  
        <td>Basic page</td>  
        <td>hiro</td>  
       </tr>   
       <tr>  
        <td><a href="http://localhost/duplicatexontent/digital-advertising">Digital Advertising</a> </td>  
        <td>Basic page</td>  
        <td>hiro</td>  
       </tr>        
       <tr>  
        <td><a href="http://localhost/duplicatexontent/summary-body">Summary of Body</a> </td>  
        <td>Basic page</td>  
        <td>hiro</td>  
       </tr>        
       <tr>  
        <td> <a href="http://localhost/duplicatexontent/webinars">Webinars</a></td>  
        <td>Basic page</td>  
        <td>hiro</td>  
       </tr>        
       <tr>  
        <td><a href="http://localhost/duplicatexontent/videos">Videos</a> </td>  
        <td>Basic page</td>  
        <td>hiro</td>  
       </tr>        
       <tr>  
        <td> <a href="http://localhost/duplicatexontent/resources">Resources</a></td>  
        <td>Basic page</td>  
        <td>hiro</td>  
       </tr>        
       <tr>  
        <td> <a href="http://localhost/duplicatexontent/news">News</a></td>  
        <td>Basic page</td>  
        <td>hiro</td>  
       </tr>  
   <tr>  
        <td><a href="http://localhost/duplicatexontent/about-qa-10">About QA</a> </td>  
        <td>Basic page</td>  
        <td>hiro</td>  
       </tr>         
  </tbody>  
 </table>  
 </div>  
 </body>  
 </html>  

HTML page Output


Demo Selenium Webdriver Code for Upper Duplicate Contents Html page
 import java.util.ArrayList;  
 import java.util.List;  
 import java.util.concurrent.TimeUnit;  
 import org.openqa.selenium.By;  
 import org.openqa.selenium.WebDriver;  
 import org.openqa.selenium.WebElement;  
 import org.openqa.selenium.firefox.FirefoxDriver;  
 import org.openqa.selenium.NoSuchElementException;

  
 public class Duplicatecontentshandler {  
   public static void main(String[] args) throws InterruptedException {  
     List<String> freshcontents, duplicatecontents;  
     freshcontents = new ArrayList();  
     duplicatecontents = new ArrayList();  
     List<WebElement> urllist;  
     try {  
       WebDriver driver = new FirefoxDriver();  
       driver.manage().window().maximize();  
       driver.get("file:///C:/Users/Hiro%20Mia/Desktop/Blog%20content/duplicate%20contents.html");  
       driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);  
       urllist = driver.findElements(By.tagName("a"));  
       for (WebElement url : urllist) {  
         // check duplicate content  
         if (url.getAttribute("href").trim().matches("([^\\s]+(\\-[0-9])$)")) {  
           //store duplicate contents into duplicatecontents List variable  
           duplicatecontents.add(url.getText().trim() + "  " + url.getAttribute("href").trim());  
         } else {  
           //store Fresh content into freshcontents List variable  
           freshcontents.add(url.getText().trim() + "  " + url.getAttribute("href").trim());  
         }  
       }  
       driver.quit();  
     } catch (NoSuchElementException e) {  
       e.printStackTrace();  
     }  
     System.out.println("===== Duplicate contents =======");  
     System.out.println("Number of duplicate contents =: " + duplicatecontents.size());  
     for (String duplicate : duplicatecontents) {  
       System.out.println(duplicate);  
     }  
     System.out.println("\n===== Fresh contents =======");  
     System.out.println("Number of Fresh contents =: " + freshcontents.size());  
     for (String fresh : freshcontents) {  
       System.out.println(fresh);  
     }  
   }  
 }  

Output
 ===== Duplicate contents =======  
 Number of duplicate contents =: 2  
 About QA  http://localhost/duplicatexontent/about-qa-1  
 About QA  http://localhost/duplicatexontent/about-qa-10  
 ===== Fresh contents =======  
 Number of Fresh contents =: 10  
 About QA  http://localhost/duplicatexontent/about-qa  
 Code Runner  http://localhost/duplicatexontent/code-runner  
 ACCESS DENIED  http://localhost/duplicatexontent/access-denied  
 Circulation  http://localhost/duplicatexontent/circulation  
 Digital Advertising  http://localhost/duplicatexontent/digital-advertising  
 Summary of Body  http://localhost/duplicatexontent/summary-body  
 Webinars  http://localhost/duplicatexontent/webinars  
 Videos  http://localhost/duplicatexontent/videos  
 Resources  http://localhost/duplicatexontent/resources  
 News  http://localhost/duplicatexontent/news  

How to use String operations like as compare, replace, search and split string etc in Selenium Webdriver

Common programming tasks like as compares the two given strings, searches the sequence of characters in this string, eliminates leading and trailing spaces, length of the string, a string replacing all the old char or CharSequence to new char or CharSequence, converts all characters of the string into lower to upper, upper to lower case letter etc and in Java has special String class that provides several methods to handle those type of operation. We can use String methods in selenium webdriver to operate these type tasks. This tutorial I will see how to use these String methods in selenium webdriver.


Method name: trim
Syntax : public String trim()
Purpose : Return string with omitted leading and trailing spaces

Method name: contains
Syntax : public boolean contains(CharSequence sequence)
Purpose : searches the sequence of characters in this string. It returns true if sequence of char values are found in this string otherwise returns false.

Method name: format
Syntax : public static String format(String format, Object... args) )
Purpose : returns the formatted string by given locale, format and arguments.

Method name: startsWith
Syntax : public boolean startsWith(String Sequence_of_character)
Purpose : checks if this string starts with given Sequence of character. It returns true if this string starts with given Sequence of character else returns false.

Method name: endsWith
Syntax : public boolean endsWith(String Sequence_of_character)
Purpose : checks if this string ends with given Sequence of character. It returns true if this string ends with given Sequence of character else returns false.

Method name: equals and equalsIgnoreCase
Syntax : public boolean equals(Object anotherObject) or public boolean equalsIgnoreCase(Object anotherObject
Purpose : compares the two given strings based on the content of the string. If any character is not matched, it returns false. If all characters are matched, it returns true. equals method is case sensitive and equalsIgnoreCase is not case sensitive.

Method name: isEmpty
Syntax : public boolean isEmpty()
Purpose : checks if this string is empty. It returns true, if length of string is 0 otherwise false.

Method name: length or size
Syntax : public int length() or public int size()
Purpose : length of the string. It returns count of total number of characters.

Method name: replace
Syntax : public String replace(char oldChar, char newChar)
Purpose : returns a string replacing all the old char or CharSequence to new char or CharSequence.

Method name: split
Syntax : public String split(String regex)
Purpose : splits this string against given regular expression and returns a char array

Method name: toLowerCase
Syntax : public String toLowerCase()
Purpose : returns the string in lowercase letter. In other words, it converts all characters of the string into lower case letter.

Method name: toUpperCase
Syntax : public String toUpperCase()
Purpose : returns the string in uppercase letter. In other words, it converts all characters of the string into upper case letter.

Demo Java Source Code
 public class Stringmethodshandler {  
   public static void main(String[] args) {  
     String str_normal, str_equals, stra[];  
     double formatnumer = 125.58655;  
     str_normal = " Welcome to Java Demo Program ";  
     str_equals = "Selenium Webdriver";  
     // trim method example  
     System.out.println("Befor Trim =: " + str_normal);  
     System.out.println("After Trim =: " + str_normal.trim());  
     // contains method example  
     if (str_normal.contains("Demo")) {  
       System.out.println("contains method pass");  
     } else {  
       System.out.println("contains method fail");  
     }  
     // startsWith method example  
     if (str_normal.trim().startsWith("Welc")) {  
       System.out.println("startsWith method pass");  
     } else {  
       System.out.println("startsWith method fail");  
     }  
     // endsWith method example  
     if (str_normal.trim().endsWith("ogram")) {  
       System.out.println("endsWith method pass");  
     } else {  
       System.out.println("endsWith method fail");  
     }  
     // isEmpty method example  
     if (str_normal.isEmpty()) {  
       System.out.println("isEmpty method pass");  
     } else {  
       System.out.println("isEmpty method fail");  
     }  
     // equals method example  
     if (str_equals.equals("Selenium webdriver")) {  
       System.out.println("equals method pass");  
     } else {  
       System.out.println("equals method fail");  
     }  
     // equalsIgnoreCase method example  
     if (str_equals.equalsIgnoreCase("Selenium webdriver")) {  
       System.out.println("equalsIgnoreCase method pass");  
     } else {  
       System.out.println("equalsIgnoreCase method fail");  
     }  
     // length method example      
     System.out.println("String length =: " + str_normal.length());  
     // replace method example  
     System.out.println("String replaced =: " + str_normal.trim().replace("Demo", "Example"));  
     // format method example  
     System.out.println("Format method examplet =: " + String.format("%.2f", formatnumer));  
     // toLowerCase method example  
     System.out.println("toLowerCase method example =: " + str_equals.toLowerCase());  
     // toUpperCase method example  
     System.out.println("toUpperCase method example =: " + str_equals.toUpperCase());  
     // split method example  
     System.out.println("============================================================");  
     stra = str_normal.trim().split(" ");  
     for (int i = 0; i < stra.length; i++) {  
       System.out.println("splited string =: " + i + " " + stra[i]);  
     }  
   }  
 }  

Output
 Befor Trim =:  Welcome to Java Demo Program   
 After Trim =: Welcome to Java Demo Program  
 contains method pass  
 startsWith method pass  
 endsWith method pass  
 isEmpty method fail  
 equals method fail  
 equalsIgnoreCase method pass  
 String length =: 30  
 String replaced =: Welcome to Java Example Program  
 Format method examplet =: 125.59  
 toLowerCase method example =: selenium webdriver  
 toUpperCase method example =: SELENIUM WEBDRIVER  
 ============================================================  
 splited string =: 0 Welcome  
 splited string =: 1 to  
 splited string =: 2 Java  
 splited string =: 3 Demo  
 splited string =: 4 Program  


Selenium Webdriver Demo Source Code
 import java.util.concurrent.TimeUnit;  
 import org.openqa.selenium.By;  
 import org.openqa.selenium.WebDriver;  
 import org.openqa.selenium.WebElement;  
 import org.openqa.selenium.firefox.FirefoxDriver;  
 import org.openqa.selenium.support.ui.Select;  
 public class SeleStringmethodshandler {  
   public static void main(String[] args) throws InterruptedException {  
     String selected_country_name, link_signup, strspilt[];  
     // create objects and variables instantiation     
     WebDriver driver = new FirefoxDriver();  
     // maximize the browser window     
     driver.manage().window().maximize();  
     // launch the firefox browser and open the application url    
     driver.get("http://www.gmail.com/");  
     //Set timeout     
     driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);  
     // trim, equalsIgnoreCase method example   
     Select countres = new Select(driver.findElement(By.id("lang-chooser")));  
     selected_country_name = countres.getFirstSelectedOption().getText();  
     System.out.println("Selected country name =: " + selected_country_name);  
     if (!(selected_country_name.trim().equalsIgnoreCase("English (United States)"))) {  
       countres.selectByValue("en");  
     }  
     // size method example  
     for (int i = 0; i < countres.getOptions().size(); i++) {  
       System.out.println("Country name =: " + i + " " + countres.getOptions().get(i).getText().trim());  
     }  
     // contains method example  
     if (driver.getPageSource().contains("Gmail")) {  
       System.out.println("Gmail is contain in www.gmail.com");  
     } else {  
       System.out.println("Gmail is not contain in www.gmail.com");  
     }  
     link_signup = driver.findElement(By.id("link-signup")).getText().trim();  
     // startsWith method example  
     if (link_signup.startsWith("Create")) {  
       System.out.println("startsWith method pass");  
     }  
     // endsWith method example   
     if (link_signup.endsWith("account")) {  
       System.out.println("endsWith method pass");  
     }  
     // isEmpty method example  
     if (!(link_signup.isEmpty())) {  
       System.out.println("isEmpty method pass");  
     }  
     // equals method example  
     if (link_signup.equals("Create account")) {  
       System.out.println("equals method pass");  
     }  
     // toUpperCase method example   
     System.out.println("toUpperCase method example =: " + link_signup.toUpperCase());  
     // toLowerCase method example  
     System.out.println("toLowerCase method example =: " + link_signup.toLowerCase());  
     // split method example  
     strspilt = link_signup.split(" ");  
     for (int i = 0; i < strspilt.length; i++) {  
       System.out.println(strspilt[i]);  
     }  
     // quit Firefox browser    
     driver.quit();  
   }  
 }