|
| 1 | +open Re |
| 2 | +open Printf |
| 3 | + |
| 4 | +(*****************************************************************************) |
| 5 | +(* *) |
| 6 | +(* Regular Expressions (RegExp) *) |
| 7 | +(* *) |
| 8 | +(* Regular expressions are a way to represent a pattern of text. It's *) |
| 9 | +(* specified as a sequence of characters (with rules, and operators) that *) |
| 10 | +(* represent a full or partial match in a block of text. It's made out of *) |
| 11 | +(* 2 components: a) The pattern itself delimited in [/] slashes; and b) *) |
| 12 | +(* a list of flags that change the way the pattern matching behaves. *) |
| 13 | +(* The 3 most common {e RegExp} flags are: *) |
| 14 | +(* *) |
| 15 | +(* 1. [i]: Case {e insensitive} matching, meaning you don't have to *) |
| 16 | +(* manually specify the casing (lower/uppercase) of the charactes. *) |
| 17 | +(* It will match both lower and uppercase regardless of what you say. *) |
| 18 | +(* 2. [m]: {e Multiline} matching, makes the [^] (start) and [$] (end) *) |
| 19 | +(* markers span the entire string rather than just the first line. *) |
| 20 | +(* 3. [g]: For {e global} matching, which makes it so that a pattern can *) |
| 21 | +(* be executed multiple times in a string. Useful to extract all the *) |
| 22 | +(* substrings that match a given pattern, or during search and replace. *) |
| 23 | +(* *) |
| 24 | +(* There are many RegExp standards, {i Perl} being one of the first and *) |
| 25 | +(* most used in Linux. OCaml's standard library comes with a subset of *) |
| 26 | +(* special characters in the form of the [Str] module, but if a developer *) |
| 27 | +(* needs actual support for the full experience, all it takes is *) |
| 28 | +(* installing the [re] package with [opam] which comes with the [Re] high *) |
| 29 | +(* level module, with [Re.Pcre], [Re.Perl], [Re.Glob], [Re.Posix], and *) |
| 30 | +(* [Re.Emacs] submodules to compile specific "dialects" of RegExp. *) |
| 31 | +(* *) |
| 32 | +(*****************************************************************************) |
| 33 | + |
| 34 | +let text_block = |
| 35 | + "The company reported a profit increase of 10 million dollars in the last \ |
| 36 | + quarter, surpassing expectations. However, they also experienced a decrease \ |
| 37 | + in revenue of -3.2 million dollars due to unexpected market fluctuations. \ |
| 38 | + The CEO announced a new investment plan worth 30.5 million dollars aimed at \ |
| 39 | + expanding operations globally. Additionally, the company recorded a growth \ |
| 40 | + rate of +8.3% in the previous year." |
| 41 | +;; |
| 42 | + |
| 43 | +(** [extract_numbers] returns a list of substrings that match a numeric pattern |
| 44 | + explained as: a [+] or [-] symbol (optional), and one or more digits that |
| 45 | + may or may not be followed by another set of digits preceded by a dot. *) |
| 46 | +let extract_numbers = matches @@ Pcre.regexp {|[+-]?\d+(?:\.\d+)?|} |
| 47 | + |
| 48 | +let _ = |
| 49 | + print_endline "Extracting all numbers in the following text:"; |
| 50 | + print_endline text_block; |
| 51 | + print_endline "---------------------------------------------"; |
| 52 | + extract_numbers text_block |> String.concat ", " |> print_endline; |
| 53 | + print_endline "---------------------------------------------"; |
| 54 | + print_newline () |
| 55 | +;; |
| 56 | + |
| 57 | +(**************************************************************) |
| 58 | +(* *) |
| 59 | +(* Dificultad Extra (Opcional) *) |
| 60 | +(* *) |
| 61 | +(* Crea 3 expresiones regulares (a tu criterio) capaces de: *) |
| 62 | +(* *) |
| 63 | +(* - Validar un email. *) |
| 64 | +(* - Validar un número de teléfono. *) |
| 65 | +(* - Validar una URL. *) |
| 66 | +(* *) |
| 67 | +(**************************************************************) |
| 68 | + |
| 69 | +module Validator : sig |
| 70 | + val email : string -> bool |
| 71 | + (** [email s] is [true] if [s] is a valid email address. *) |
| 72 | + |
| 73 | + val phone_number : string -> bool |
| 74 | + (** [phone_number s] is [true] if [s] is a valid phone number. *) |
| 75 | + |
| 76 | + val url : string -> bool |
| 77 | + (** [url s] is [true] if [s] is a valid URL (only http and https). *) |
| 78 | +end = struct |
| 79 | + let email_re = |
| 80 | + let name = {|[-a-z0-9_.]+|} in |
| 81 | + let domain = {|[-a-z0-9]+|} in |
| 82 | + let tld = {|\.[a-z]{2,63}|} in |
| 83 | + Pcre.regexp ~flags:[ `CASELESS ] @@ "^" ^ name ^ "@" ^ domain ^ tld ^ "$" |
| 84 | + ;; |
| 85 | + |
| 86 | + let phone_re = |
| 87 | + let sep = {|[\s.-]?|} in |
| 88 | + let country = {|(?:\+\d{1,2}\s)?|} in |
| 89 | + let area = {|(?:\(\d{3}\)|\d{3})|} in |
| 90 | + let prefix = {|\d{3}|} in |
| 91 | + let line = {|\d{4}|} in |
| 92 | + Pcre.regexp @@ "^" ^ country ^ area ^ sep ^ prefix ^ sep ^ line ^ "$" |
| 93 | + ;; |
| 94 | + |
| 95 | + let url_re = |
| 96 | + let protocol = {|(?:https?://)?|} in |
| 97 | + let subdomain = {|(?:[-0-9a-z]+\.)?|} in |
| 98 | + let domain = {|[-0-9a-z]+|} in |
| 99 | + let tld = {|(?:\.[a-z]{2,63}){1,2}|} in |
| 100 | + let path = {|(?:/[-\w]*)*|} in |
| 101 | + let qparam = {|[-\w~.]+=[-\w,.%]*|} in |
| 102 | + let query = sprintf {|(?:\?%s(?:&%s)*)?|} qparam qparam in |
| 103 | + Pcre.regexp ~flags:[ `CASELESS ] |
| 104 | + @@ "^" |
| 105 | + ^ protocol |
| 106 | + ^ subdomain |
| 107 | + ^ domain |
| 108 | + ^ tld |
| 109 | + ^ path |
| 110 | + ^ query |
| 111 | + ^ "$" |
| 112 | + ;; |
| 113 | + |
| 114 | + let email = execp email_re |
| 115 | + let phone_number = execp phone_re |
| 116 | + let url = execp url_re |
| 117 | +end |
| 118 | + |
| 119 | +let _ = |
| 120 | + let open Validator in |
| 121 | + let phone_numbers = |
| 122 | + [ "664 730 9673" |
| 123 | + ; "+52 (345)-080-1214" |
| 124 | + ; "055-216-0945" |
| 125 | + ; "(686).233.7676" |
| 126 | + ; "(45)-35-17887" |
| 127 | + ] |
| 128 | + in |
| 129 | + let emails = |
| 130 | + [ "hello@kozmicblog.com" |
| 131 | + ; "new.year@old-wave.com" |
| 132 | + ; "just-AN-email@foo.bar" |
| 133 | + ; "invalid@tld-domain.x" |
| 134 | + ] |
| 135 | + in |
| 136 | + let urls = |
| 137 | + [ "http://kozmicblog.com" |
| 138 | + ; "https://regex101.com/" |
| 139 | + ; "google.com.mx" |
| 140 | + ; "your-site.net/route?param=val¶m2=val2" |
| 141 | + ; "http://]what[..is.this/" |
| 142 | + ] |
| 143 | + in |
| 144 | + let str_of_bool b = if b then "VALID ✅" else "INVALID ❌" in |
| 145 | + List.iter |
| 146 | + (fun s -> |
| 147 | + printf "Phone # Validation | %s | %s\n" s (phone_number s |> str_of_bool)) |
| 148 | + phone_numbers; |
| 149 | + List.iter |
| 150 | + (fun s -> printf "Email Validation | %s | %s\n" s (email s |> str_of_bool)) |
| 151 | + emails; |
| 152 | + List.iter |
| 153 | + (fun s -> printf "URL Validation | %s | %s\n" s (url s |> str_of_bool)) |
| 154 | + urls |
| 155 | +;; |
| 156 | + |
| 157 | +(* Output of running the program [dune exec reto16]: |
| 158 | +
|
| 159 | + Extracting all numbers in the following text: |
| 160 | + The company reported a profit increase of 10 million dollars in the last quarter, surpassing expectations. However, they also experienced a decrease in revenue of -3.2 million dollars due to unexpected market fluctuations. The CEO announced a new investment plan worth 30.5 million dollars aimed at expanding operations globally. Additionally, the company recorded a growth rate of +8.3% in the previous year. |
| 161 | + --------------------------------------------- |
| 162 | + 10, -3.2, 30.5, +8.3 |
| 163 | + --------------------------------------------- |
| 164 | +
|
| 165 | + Phone # Validation | 664 730 9673 | VALID ✅ |
| 166 | + Phone # Validation | +52 (345)-080-1214 | VALID ✅ |
| 167 | + Phone # Validation | 055-216-0945 | VALID ✅ |
| 168 | + Phone # Validation | (686).233.7676 | VALID ✅ |
| 169 | + Phone # Validation | (45)-35-17887 | INVALID ❌ |
| 170 | + Email Validation | hello@kozmicblog.com | VALID ✅ |
| 171 | + Email Validation | new.year@old-wave.com | VALID ✅ |
| 172 | + Email Validation | just-AN-email@foo.bar | VALID ✅ |
| 173 | + Email Validation | invalid@tld-domain.x | INVALID ❌ |
| 174 | + URL Validation | http://kozmicblog.com | VALID ✅ |
| 175 | + URL Validation | https://regex101.com/ | VALID ✅ |
| 176 | + URL Validation | google.com.mx | VALID ✅ |
| 177 | + URL Validation | your-site.net/route?param=val¶m2=val2 | VALID ✅ |
| 178 | + URL Validation | http://]what[..is.this/ | INVALID ❌ |
| 179 | +*) |
0 commit comments