Pages

Thursday 24 January 2013

Search (metacharacters)

The searching with metacharacters.
/*
 * API: regular-expression constructs 
 * docs.oracle.com
 */

package pattern_metacharacters;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 *
 * @author Alex
 */
public class Pattern_Metacharacters {

    /**
     * "." - any character
     * "\\." - dot character
     * "\\d" - digits
     * "\\w" - word
     * "\\s" - space
     */
    public static void main(String[] args) {
        search("pro\\.","p.dat,2.exe,pro3.pdf, pro4.doc,pro5.txt,pro6.pdf"); 
        search("pro.","p.dat,2.exe,pro3.pdf, pro4.doc,pro5.txt,pro6.pdf"); 
        
        search("pro.?","p.dat,2.exe,pro3.pdf, pro4.doc,pro5.txt,pro6.pdf"); //(.?) once or not at all
        search("pro.*","p.dat,2.exe,pro3.pdf, pro4.doc,pro5.txt,pro6.pdf"); //greedy (.*)
        search("pro.+","p.dat,2.exe,pro3.pdf, pro4.doc,pro5.txt,pro6.pdf"); //greedy (.+)
        
        search("pro.*?\\.+\\w\\w\\w","p.dat,2.exe,pro3.pdf, pro4.doc,pro5.txt,pro6.pdf");
        search(".+?\\.+\\w\\w\\w","p.dat,2.exe,pro3.pdf, pro4.doc,pro5.txt,pro6.pdf");
        search(".+.txt","p.dat,2.exe,pro3.pdf, pro4.doc,pro5.txt,pro6.pdf"); //greedy (.+)
        search("\\w{1,6}.pdf","p.dat,2.exe,pro3.pdf, pro4.doc,pro5.txt,pro6.pdf"); 
        
        search("\\d\\d\\d\\s?\\d\\d\\d\\d\\s?\\d\\d\\d\\d?","012 3456 7890, 0123456789 ,");
    }
  
    /**
    * @return number of matching pattern (int)
    */
    public static int search(String pattern, String expression){
        int i = 0;
        Pattern p = Pattern.compile(pattern);
        Matcher m = p.matcher(expression);
        while (m.find()){
           System.out.println(pattern + " found: " + m.start() +" " + m.group());
           i++;
        }
        System.out.println("found: " + i + " time(s)" + "\n");
        return i;
    }
}

This produces such a output:

found: 0 time(s)

pro. found: 12 pro3
pro. found: 22 pro4
pro. found: 31 pro5
pro. found: 40 pro6
found: 4 time(s)

pro.? found: 12 pro3
pro.? found: 22 pro4
pro.? found: 31 pro5
pro.? found: 40 pro6
found: 4 time(s)

pro.* found: 12 pro3.pdf, pro4.doc,pro5.txt,pro6.pdf
found: 1 time(s)

pro.+ found: 12 pro3.pdf, pro4.doc,pro5.txt,pro6.pdf
found: 1 time(s)

pro.*?\.+\w\w\w found: 12 pro3.pdf
pro.*?\.+\w\w\w found: 22 pro4.doc
pro.*?\.+\w\w\w found: 31 pro5.txt
pro.*?\.+\w\w\w found: 40 pro6.pdf
found: 4 time(s)

.+?\.+\w\w\w found: 0 p.dat
.+?\.+\w\w\w found: 5 ,2.exe
.+?\.+\w\w\w found: 11 ,pro3.pdf
.+?\.+\w\w\w found: 20 , pro4.doc
.+?\.+\w\w\w found: 30 ,pro5.txt
.+?\.+\w\w\w found: 39 ,pro6.pdf
found: 6 time(s)

.+.txt found: 0 p.dat,2.exe,pro3.pdf, pro4.doc,pro5.txt
found: 1 time(s)

\w{1,6}.pdf found: 12 pro3.pdf
\w{1,6}.pdf found: 40 pro6.pdf
found: 2 time(s)

\d\d\d\s?\d\d\d\d\s?\d\d\d\d? found: 0 012 3456 7890
\d\d\d\s?\d\d\d\d\s?\d\d\d\d? found: 15 0123456789
found: 2 time(s)


No comments:

Post a Comment