Skip to content

Commit

Permalink
Change Email regex for a more catch all version
Browse files Browse the repository at this point in the history
  • Loading branch information
XavRsl committed Sep 14, 2018
1 parent 2f291fa commit c8c1ec7
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/Support/helpers.php
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ function stringify($data)
*/
function find_emails_in_array($array)
{
$pattern = '/(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))/iD';
$pattern = '/[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,8}/i';

preg_match_all($pattern, implode(' ', $array), $matches);

Expand Down
24 changes: 24 additions & 0 deletions tests/Unit/HelpersTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?php

namespace PubPeerFoundation\PublicationDataExtractor\Test\Unit;

use PubPeerFoundation\PublicationDataExtractor\Test\TestCase;

class HelpersTest extends TestCase
{
/** @test */
public function it_can_find_emails_in_an_array_of_strings()
{
$array = [
'Departments of Biological Chemistry and Chemistry and Biochemistry, UCLA-DOE Institute, HHMI, 611 South Charles E. Young Drive, Los Angeles, CA 90095-1570, USA., Department of Obstetrics and Gynecology, David Geffen School of Medicine, University of California Los Angeles, Los Angeles, CA 90095, USA., Molecular, Cell and Developmental Biology, University of California Los Angeles, Los Angeles, CA 90095, USA., Pasarow Mass Spectrometry Laboratory, Semel Institute, 405 Hilgard Avenue, Los Angeles, CA 90095, USA., Department of Obstetrics and Gynecology, David Geffen School of Medicine, University of California Los Angeles, Los Angeles, CA 90095, USA; Eli and Edythe Broad Center of Regenerative Medicine and Stem Cell Research, University of California Los Angles, Los Angeles, CA 90095, USA; The VA Greater Los Angeles Health Care System, Los Angeles, CA 90073, USA. Electronic address: smemarzadeh@mednet.ucla.edu., Departments of Biological Chemistry and Chemistry and Biochemistry, UCLA-DOE Institute, HHMI, 611 South Charles E. Young Drive, Los Angeles, CA 90095-1570, USA. Electronic address: david@mbi.ucla.edu.',
'Laboratory of Neuroimmunology, Department of Symptom Research, University of Texas MD Anderson Cancer Center, Houston, TX 77030;, Laboratory of Neuroimmunology and Developmental Origins of Disease, University Medical Center Utrecht, Utrecht 3584 EA, The Netherlands;, Departamento de Biología Molecular and Centro de Biología Molecular Severo Ochoa, Universidad Autónoma de Madrid-Consejo Superior de Investigaciones Científicas, Madrid 28049, Spain; Instituto de Investigación Sanitaria La Princesa, Madrid 28049, Spain;, Department of Integrative Biology and Pharmacology and Texas Therapeutics Institute, University of Texas Health Science Center, Houston, TX 77030;, Department of Translational Molecular Pathology, University of Texas MD Anderson Cancer Center, Houston, TX 77030., Laboratory of Neuroimmunology, Department of Symptom Research, University of Texas MD Anderson Cancer Center, Houston, TX 77030; akavelaars@mdanderson.org.',
'Centro de Biología Molecular Severo Ochoa (CSIC-UAM), Madrid 28049, Spain. Instituto de Investigación Sanitaria La Princesa, Madrid 28006, Spain., Laboratory of Neuroimmunology and Developmental Origins of Disease, University Medical Center Utrecht, Utrecht 3584 EA, Netherlands., Laboratory of Neuroimmunology and Developmental Origins of Disease, University Medical Center Utrecht, Utrecht 3584 EA, Netherlands. Laboratory of Neuroimmunology, Division of Internal Medicine, Department of Symptom Research, The University of Texas MD Anderson Cancer Center, Houston, TX 77030, USA., Centro de Biología Molecular Severo Ochoa (CSIC-UAM), Madrid 28049, Spain. Instituto de Investigación Sanitaria La Princesa, Madrid 28006, Spain. cmurga@cbm.csic.es fmayor@cbm.csic.es.',
'Key Laboratory of Health Risk Appraisal for Trace Toxic Chemicals of Zhejiang Province, Ningbo Municipal Center for Disease Control and Prevention, Ningbo, China, Ningbo Key Laboratory of Poison Research and Control, School of Environmental Science and Engineering, Zhejiang Gongshang University, Hangzhou 310018, Ningbo Institute of Technology, Zhejiang University',
'Department of Biochemistry and Molecular Biology II, School of Pharmacy, Complutense University, 28040 Madrid, Spain CIBER de Diabetes y Enfermedades Metabólicas Asociadas (CIBERDEM), 08017 Barcelona, Spain Instituto de Investigaciones Biomédicas Alberto Sols (CSIC-UAM), 28029 Madrid, Spain CIBER de enfermedades neurodegenerativas (CIBERNED), 28049 Madrid, Spain., Departament of Molecular Biology and Centro de Biología Molecular Severo Ochoa (CSIC-UAM), 28049 Madrid, Spain Instituto de Investigación Sanitaria la Princesa, 28006 Madrid, Spain., Centro de Biología Molecular Severo Ochoa (UAM-CSIC), 28049 Madrid, Spain., Department of Biochemistry and Molecular Biology II, School of Pharmacy, Complutense University, 28040 Madrid, Spain CIBER de Diabetes y Enfermedades Metabólicas Asociadas (CIBERDEM), 08017 Barcelona, Spain., CIBER de Diabetes y Enfermedades Metabólicas Asociadas (CIBERDEM), 08017 Barcelona, Spain Hospital Universitari de Tarragona Joan XXIII. IISPV. Universitat Rovira i Virgili, 43007 Tarragona, Spain., Departament of Molecular Biology and Centro de Biología Molecular Severo Ochoa (CSIC-UAM), 28049 Madrid, Spain Instituto de Investigación Sanitaria la Princesa, 28006 Madrid, Spain irianieto@farm.ucm.es cmurga@cbm.uam.es., Department of Biochemistry and Molecular Biology II, School of Pharmacy, Complutense University, 28040 Madrid, Spain CIBER de Diabetes y Enfermedades Metabólicas Asociadas (CIBERDEM), 08017 Barcelona, Spain irianieto@farm.ucm.es cmurga@cbm.uam.es.',
'The State Key Laboratory of Pharmaceutical Biotechnology, School of Life Sciences, Nanjing University, Nanjing, China., The Affiliated Hospital of Nanjing University of Chinese Medicine, Nanjing, China., The Affiliated Hospital of Nanjing University of Chinese Medicine, Nanjing, China. qzhao@nju.edu.cn chenyg666@126.com., The State Key Laboratory of Pharmaceutical Biotechnology, School of Life Sciences, Nanjing University, Nanjing, China. qzhao@nju.edu.cn chenyg666@126.com.',
"Section of Pulmonology and hjfarber@texaschildrens.org., Information Services, Texas Children's Health Plan, Bellaire, Texas., Department of Pediatrics, Baylor College of Medicine and Texas Children's Hospital, Houston, Texas; and.",
];

$this->assertCount(14, find_emails_in_array($array));
}
}

0 comments on commit c8c1ec7

Please sign in to comment.