diff --git a/.gitignore b/.gitignore
index a84e64ae..56a1f382 100644
--- a/.gitignore
+++ b/.gitignore
@@ -118,3 +118,4 @@ tests/input/discrepant_snps[12].csv
 tests/input/empty.txt
 tests/input/ftdna.csv.gz
 tests/input/generic.fa.gz
+tests/input/testvcf.vcf.gz
diff --git a/Pipfile b/Pipfile
index 92a4386b..1eb12647 100644
--- a/Pipfile
+++ b/Pipfile
@@ -7,6 +7,9 @@ verify_ssl = true
 pytest = "*"
 pytest-cov = "*"
 pytest-watch = "*"
+sphinx = "*"
+sphinx-rtd-theme = "*"
+black = "==19.10b0"
 
 [packages]
 snps = {editable = true,path = "."}
diff --git a/Pipfile.lock b/Pipfile.lock
index f2124d55..3d7991ea 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "04d6573b6a023a142b251876572417e84e18882553c682c30b6bcb74cbc983bd"
+            "sha256": "63ccb11406dcb107cca759a568ffdef146d3e3dc4a9833552f5d84b41a6f7bf2"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -25,44 +25,54 @@
         },
         "numpy": {
             "hashes": [
-                "sha256:05dbfe72684cc14b92568de1bc1f41e5f62b00f714afc9adee42f6311738091f",
-                "sha256:0d82cb7271a577529d07bbb05cb58675f2deb09772175fab96dc8de025d8ac05",
-                "sha256:10132aa1fef99adc85a905d82e8497a580f83739837d7cbd234649f2e9b9dc58",
-                "sha256:12322df2e21f033a60c80319c25011194cd2a21294cc66fee0908aeae2c27832",
-                "sha256:16f19b3aa775dddc9814e02a46b8e6ae6a54ed8cf143962b4e53f0471dbd7b16",
-                "sha256:3d0b0989dd2d066db006158de7220802899a1e5c8cf622abe2d0bd158fd01c2c",
-                "sha256:438a3f0e7b681642898fd7993d38e2bf140a2d1eafaf3e89bb626db7f50db355",
-                "sha256:5fd214f482ab53f2cea57414c5fb3e58895b17df6e6f5bca5be6a0bb6aea23bb",
-                "sha256:73615d3edc84dd7c4aeb212fa3748fb83217e00d201875a47327f55363cef2df",
-                "sha256:7bd355ad7496f4ce1d235e9814ec81ee3d28308d591c067ce92e49f745ba2c2f",
-                "sha256:7d077f2976b8f3de08a0dcf5d72083f4af5411e8fddacd662aae27baa2601196",
-                "sha256:a4092682778dc48093e8bda8d26ee8360153e2047826f95a3f5eae09f0ae3abf",
-                "sha256:b458de8624c9f6034af492372eb2fee41a8e605f03f4732f43fc099e227858b2",
-                "sha256:e70fc8ff03a961f13363c2c95ef8285e0cf6a720f8271836f852cc0fa64e97c8",
-                "sha256:ee8e9d7cad5fe6dde50ede0d2e978d81eafeaa6233fb0b8719f60214cf226578",
-                "sha256:f4a4f6aba148858a5a5d546a99280f71f5ee6ec8182a7d195af1a914195b21a2"
-            ],
-            "version": "==1.17.2"
+                "sha256:0b0dd8f47fb177d00fa6ef2d58783c4f41ad3126b139c91dd2f7c4b3fdf5e9a5",
+                "sha256:25ffe71f96878e1da7e014467e19e7db90ae7d4e12affbc73101bcf61785214e",
+                "sha256:26efd7f7d755e6ca966a5c0ac5a930a87dbbaab1c51716ac26a38f42ecc9bc4b",
+                "sha256:28b1180c758abf34a5c3fea76fcee66a87def1656724c42bb14a6f9717a5bdf7",
+                "sha256:2e418f0a59473dac424f888dd57e85f77502a593b207809211c76e5396ae4f5c",
+                "sha256:30c84e3a62cfcb9e3066f25226e131451312a044f1fe2040e69ce792cb7de418",
+                "sha256:4650d94bb9c947151737ee022b934b7d9a845a7c76e476f3e460f09a0c8c6f39",
+                "sha256:4dd830a11e8724c9c9379feed1d1be43113f8bcce55f47ea7186d3946769ce26",
+                "sha256:4f2a2b279efde194877aff1f76cf61c68e840db242a5c7169f1ff0fd59a2b1e2",
+                "sha256:62d22566b3e3428dfc9ec972014c38ed9a4db4f8969c78f5414012ccd80a149e",
+                "sha256:669795516d62f38845c7033679c648903200980d68935baaa17ac5c7ae03ae0c",
+                "sha256:75fcd60d682db3e1f8fbe2b8b0c6761937ad56d01c1dc73edf4ef2748d5b6bc4",
+                "sha256:9395b0a41e8b7e9a284e3be7060db9d14ad80273841c952c83a5afc241d2bd98",
+                "sha256:9e37c35fc4e9410093b04a77d11a34c64bf658565e30df7cbe882056088a91c1",
+                "sha256:a0678793096205a4d784bd99f32803ba8100f639cf3b932dc63b21621390ea7e",
+                "sha256:b46554ad4dafb2927f88de5a1d207398c5385edbb5c84d30b3ef187c4a3894d8",
+                "sha256:c867eeccd934920a800f65c6068acdd6b87e80d45cd8c8beefff783b23cdc462",
+                "sha256:dd0667f5be56fb1b570154c2c0516a528e02d50da121bbbb2cbb0b6f87f59bc2",
+                "sha256:de2b1c20494bdf47f0160bd88ed05f5e48ae5dc336b8de7cfade71abcc95c0b9",
+                "sha256:f1df7b2b7740dd777571c732f98adb5aad5450aee32772f1b39249c8a50386f6",
+                "sha256:ffca69e29079f7880c5392bf675eb8b4146479d976ae1924d01cd92b04cccbcc"
+            ],
+            "version": "==1.17.3"
         },
         "pandas": {
             "hashes": [
-                "sha256:18d91a9199d1dfaa01ad645f7540370ba630bdcef09daaf9edf45b4b1bca0232",
-                "sha256:3f26e5da310a0c0b83ea50da1fd397de2640b02b424aa69be7e0784228f656c9",
-                "sha256:4182e32f4456d2c64619e97c58571fa5ca0993d1e8c2d9ca44916185e1726e15",
-                "sha256:426e590e2eb0e60f765271d668a30cf38b582eaae5ec9b31229c8c3c10c5bc21",
-                "sha256:5eb934a8f0dc358f0e0cdf314072286bbac74e4c124b64371395e94644d5d919",
-                "sha256:717928808043d3ea55b9bcde636d4a52d2236c246f6df464163a66ff59980ad8",
-                "sha256:8145f97c5ed71827a6ec98ceaef35afed1377e2d19c4078f324d209ff253ecb5",
-                "sha256:8744c84c914dcc59cbbb2943b32b7664df1039d99e834e1034a3372acb89ea4d",
-                "sha256:c1ac1d9590d0c9314ebf01591bd40d4c03d710bfc84a3889e5263c97d7891dee",
-                "sha256:cb2e197b7b0687becb026b84d3c242482f20cbb29a9981e43604eb67576da9f6",
-                "sha256:d4001b71ad2c9b84ff18b182cea22b7b6cbf624216da3ea06fb7af28d1f93165",
-                "sha256:d8930772adccb2882989ab1493fa74bd87d47c8ac7417f5dd3dd834ba8c24dc9",
-                "sha256:dfbb0173ee2399bc4ed3caf2d236e5c0092f948aafd0a15fbe4a0e77ee61a958",
-                "sha256:eebfbba048f4fa8ac711b22c78516e16ff8117d05a580e7eeef6b0c2be554c18",
-                "sha256:f1b21bc5cf3dbea53d33615d1ead892dfdae9d7052fa8898083bec88be20dcd2"
-            ],
-            "version": "==0.25.1"
+                "sha256:00dff3a8e337f5ed7ad295d98a31821d3d0fe7792da82d78d7fd79b89c03ea9d",
+                "sha256:22361b1597c8c2ffd697aa9bf85423afa9e1fcfa6b1ea821054a244d5f24d75e",
+                "sha256:255920e63850dc512ce356233081098554d641ba99c3767dde9e9f35630f994b",
+                "sha256:26382aab9c119735908d94d2c5c08020a4a0a82969b7e5eefb92f902b3b30ad7",
+                "sha256:33970f4cacdd9a0ddb8f21e151bfb9f178afb7c36eb7c25b9094c02876f385c2",
+                "sha256:4545467a637e0e1393f7d05d61dace89689ad6d6f66f267f86fff737b702cce9",
+                "sha256:52da74df8a9c9a103af0a72c9d5fdc8e0183a90884278db7f386b5692a2220a4",
+                "sha256:61741f5aeb252f39c3031d11405305b6d10ce663c53bc3112705d7ad66c013d0",
+                "sha256:6a3ac2c87e4e32a969921d1428525f09462770c349147aa8e9ab95f88c71ec71",
+                "sha256:7458c48e3d15b8aaa7d575be60e1e4dd70348efcd9376656b72fecd55c59a4c3",
+                "sha256:78bf638993219311377ce9836b3dc05f627a666d0dbc8cec37c0ff3c9ada673b",
+                "sha256:8153705d6545fd9eb6dd2bc79301bff08825d2e2f716d5dced48daafc2d0b81f",
+                "sha256:89f8fdf8c0ff3ed1e2c6f5c8482cf64fcc9645afd49be0a872a22f46d0bee57b",
+                "sha256:975c461accd14e89d71772e89108a050fa824c0b87a67d34cedf245f6681fc17",
+                "sha256:9962957a27bfb70ab64103d0a7b42fa59c642fb4ed4cb75d0227b7bb9228535d",
+                "sha256:adc3d3a3f9e59a38d923e90e20c4922fc62d1e5a03d083440468c6d8f3f1ae0a",
+                "sha256:bbe3eb765a0b1e578833d243e2814b60c825b7fdbf4cdfe8e8aae8a08ed56ecf",
+                "sha256:df8864824b1fe488cf778c3650ee59c3a0d8f42e53707de167ba6b4f7d35f133",
+                "sha256:e45055c30a608076e31a9fcd780a956ed3b1fa20db61561b8d88b79259f526f7",
+                "sha256:ee50c2142cdcf41995655d499a157d0a812fce55c97d9aad13bc1eef837ed36c"
+            ],
+            "version": "==0.25.3"
         },
         "python-dateutil": {
             "hashes": [
@@ -73,16 +83,10 @@
         },
         "pytz": {
             "hashes": [
-                "sha256:26c0b32e437e54a18161324a2fca3c4b9846b74a8dccddd843113109e1116b32",
-                "sha256:c894d57500a4cd2d5c71114aaab77dbab5eabd9022308ce5ac9bb93a60a6f0c7"
+                "sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d",
+                "sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be"
             ],
-            "version": "==2019.2"
-        },
-        "pyvcf": {
-            "hashes": [
-                "sha256:e9d872513d179d229ab61da47a33f42726e9613784d1cb2bac3f8e2642f6f9d9"
-            ],
-            "version": "==0.6.8"
+            "version": "==2019.3"
         },
         "six": {
             "hashes": [
@@ -97,6 +101,20 @@
         }
     },
     "develop": {
+        "alabaster": {
+            "hashes": [
+                "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
+                "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
+            ],
+            "version": "==0.7.12"
+        },
+        "appdirs": {
+            "hashes": [
+                "sha256:9e5896d1372858f8dd3344faf4e5014d21849c756c8d5701f78f8a103b372d92",
+                "sha256:d8b24664561d0d34ddfaec54636d502d7cea6e29c3eaf68f3df6180863e2166e"
+            ],
+            "version": "==1.4.3"
+        },
         "argh": {
             "hashes": [
                 "sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3",
@@ -113,10 +131,46 @@
         },
         "attrs": {
             "hashes": [
-                "sha256:ec20e7a4825331c1b5ebf261d111e16fa9612c1f7a5e1f884f12bd53a664dfd2",
-                "sha256:f913492e1663d3c36f502e5e9ba6cd13cf19d7fab50aa13239e420fef95e1396"
+                "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c",
+                "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72"
             ],
-            "version": "==19.2.0"
+            "version": "==19.3.0"
+        },
+        "babel": {
+            "hashes": [
+                "sha256:af92e6106cb7c55286b25b38ad7695f8b4efb36a90ba483d7f7a6628c46158ab",
+                "sha256:e86135ae101e31e2c8ec20a4e0c5220f4eed12487d5cf3f78be7e98d3a57fc28"
+            ],
+            "version": "==2.7.0"
+        },
+        "black": {
+            "hashes": [
+                "sha256:1b30e59be925fafc1ee4565e5e08abef6b03fe455102883820fe5ee2e4734e0b",
+                "sha256:c2edb73a08e9e0e6f65a0e6af18b059b8b1cdd5bef997d7a0b181df93dc81539"
+            ],
+            "index": "pypi",
+            "version": "==19.10b0"
+        },
+        "certifi": {
+            "hashes": [
+                "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50",
+                "sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef"
+            ],
+            "version": "==2019.9.11"
+        },
+        "chardet": {
+            "hashes": [
+                "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
+                "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
+            ],
+            "version": "==3.0.4"
+        },
+        "click": {
+            "hashes": [
+                "sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13",
+                "sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7"
+            ],
+            "version": "==7.0"
         },
         "colorama": {
             "hashes": [
@@ -168,6 +222,28 @@
             ],
             "version": "==0.6.2"
         },
+        "docutils": {
+            "hashes": [
+                "sha256:6c4f696463b79f1fb8ba0c594b63840ebd41f059e92b31957c46b74a4599b6d0",
+                "sha256:9e4d7ecfc600058e07ba661411a2b7de2fd0fafa17d1a7f7361cd47b1175c827",
+                "sha256:a2aeea129088da402665e92e0b25b04b073c04b2dce4ab65caaa38b7ce2e1a99"
+            ],
+            "version": "==0.15.2"
+        },
+        "idna": {
+            "hashes": [
+                "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
+                "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c"
+            ],
+            "version": "==2.8"
+        },
+        "imagesize": {
+            "hashes": [
+                "sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8",
+                "sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5"
+            ],
+            "version": "==1.1.0"
+        },
         "importlib-metadata": {
             "hashes": [
                 "sha256:aa18d7378b00b40847790e7c27e11673d7fed219354109d0e7b9e5b25dc3ad26",
@@ -176,6 +252,46 @@
             "markers": "python_version < '3.8'",
             "version": "==0.23"
         },
+        "jinja2": {
+            "hashes": [
+                "sha256:74320bb91f31270f9551d46522e33af46a80c3d619f4a4bf42b3164d30b5911f",
+                "sha256:9fe95f19286cfefaa917656583d020be14e7859c6b0252588391e47db34527de"
+            ],
+            "version": "==2.10.3"
+        },
+        "markupsafe": {
+            "hashes": [
+                "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473",
+                "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161",
+                "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235",
+                "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5",
+                "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff",
+                "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b",
+                "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1",
+                "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e",
+                "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183",
+                "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66",
+                "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1",
+                "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1",
+                "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e",
+                "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b",
+                "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905",
+                "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735",
+                "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d",
+                "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e",
+                "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d",
+                "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c",
+                "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21",
+                "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2",
+                "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5",
+                "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b",
+                "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6",
+                "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f",
+                "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f",
+                "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7"
+            ],
+            "version": "==1.1.1"
+        },
         "more-itertools": {
             "hashes": [
                 "sha256:409cd48d4db7052af495b09dec721011634af3753ae1ef92d2b32f73a745f832",
@@ -190,6 +306,12 @@
             ],
             "version": "==19.2"
         },
+        "pathspec": {
+            "hashes": [
+                "sha256:e285ccc8b0785beadd4c18e5708b12bb8fcf529a1e61215b3feff1d1e559ea5c"
+            ],
+            "version": "==0.6.0"
+        },
         "pathtools": {
             "hashes": [
                 "sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0"
@@ -210,6 +332,13 @@
             ],
             "version": "==1.8.0"
         },
+        "pygments": {
+            "hashes": [
+                "sha256:71e430bc85c88a430f000ac1d9b331d2407f681d6f6aec95e8bcfbc3df5b0127",
+                "sha256:881c4c157e45f30af185c1ffe8d549d48ac9127433f2c380c24b84572ad66297"
+            ],
+            "version": "==2.4.2"
+        },
         "pyparsing": {
             "hashes": [
                 "sha256:6f98a7b9397e206d78cc01df10131398f1c8b8510a2f4d97d9abd82e1aacdd80",
@@ -219,11 +348,11 @@
         },
         "pytest": {
             "hashes": [
-                "sha256:13c1c9b22127a77fc684eee24791efafcef343335d855e3573791c68588fe1a5",
-                "sha256:d8ba7be9466f55ef96ba203fc0f90d0cf212f2f927e69186e1353e30bc7f62e5"
+                "sha256:27abc3fef618a01bebb1f0d6d303d2816a99aa87a5968ebc32fe971be91eb1e6",
+                "sha256:58cee9e09242937e136dbb3dab466116ba20d6b7828c7620f23947f37eb4dae4"
             ],
             "index": "pypi",
-            "version": "==5.2.0"
+            "version": "==5.2.2"
         },
         "pytest-cov": {
             "hashes": [
@@ -240,6 +369,13 @@
             "index": "pypi",
             "version": "==4.2.0"
         },
+        "pytz": {
+            "hashes": [
+                "sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d",
+                "sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be"
+            ],
+            "version": "==2019.3"
+        },
         "pyyaml": {
             "hashes": [
                 "sha256:0113bc0ec2ad727182326b61326afa3d1d8280ae1122493553fd6f4397f33df9",
@@ -258,6 +394,31 @@
             ],
             "version": "==5.1.2"
         },
+        "regex": {
+            "hashes": [
+                "sha256:15454b37c5a278f46f7aa2d9339bda450c300617ca2fca6558d05d870245edc7",
+                "sha256:1ad40708c255943a227e778b022c6497c129ad614bb7a2a2f916e12e8a359ee7",
+                "sha256:5e00f65cc507d13ab4dfa92c1232d004fa202c1d43a32a13940ab8a5afe2fb96",
+                "sha256:604dc563a02a74d70ae1f55208ddc9bfb6d9f470f6d1a5054c4bd5ae58744ab1",
+                "sha256:720e34a539a76a1fedcebe4397290604cc2bdf6f81eca44adb9fb2ea071c0c69",
+                "sha256:7caf47e4a9ac6ef08cabd3442cc4ca3386db141fb3c8b2a7e202d0470028e910",
+                "sha256:7faf534c1841c09d8fefa60ccde7b9903c9b528853ecf41628689793290ca143",
+                "sha256:b4e0406d822aa4993ac45072a584d57aa4931cf8288b5455bbf30c1d59dbad59",
+                "sha256:c31eaf28c6fe75ea329add0022efeed249e37861c19681960f99bbc7db981fb2",
+                "sha256:c7393597191fc2043c744db021643549061e12abe0b3ff5c429d806de7b93b66",
+                "sha256:d2b302f8cdd82c8f48e9de749d1d17f85ce9a0f082880b9a4859f66b07037dc6",
+                "sha256:e3d8dd0ec0ea280cf89026b0898971f5750a7bd92cb62c51af5a52abd020054a",
+                "sha256:ec032cbfed59bd5a4b8eab943c310acfaaa81394e14f44454ad5c9eba4f24a74"
+            ],
+            "version": "==2019.11.1"
+        },
+        "requests": {
+            "hashes": [
+                "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4",
+                "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31"
+            ],
+            "version": "==2.22.0"
+        },
         "six": {
             "hashes": [
                 "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
@@ -265,6 +426,110 @@
             ],
             "version": "==1.12.0"
         },
+        "snowballstemmer": {
+            "hashes": [
+                "sha256:209f257d7533fdb3cb73bdbd24f436239ca3b2fa67d56f6ff88e86be08cc5ef0",
+                "sha256:df3bac3df4c2c01363f3dd2cfa78cce2840a79b9f1c2d2de9ce8d31683992f52"
+            ],
+            "version": "==2.0.0"
+        },
+        "sphinx": {
+            "hashes": [
+                "sha256:31088dfb95359384b1005619827eaee3056243798c62724fd3fa4b84ee4d71bd",
+                "sha256:52286a0b9d7caa31efee301ec4300dbdab23c3b05da1c9024b4e84896fb73d79"
+            ],
+            "index": "pypi",
+            "version": "==2.2.1"
+        },
+        "sphinx-rtd-theme": {
+            "hashes": [
+                "sha256:00cf895504a7895ee433807c62094cf1e95f065843bf3acd17037c3e9a2becd4",
+                "sha256:728607e34d60456d736cc7991fd236afb828b21b82f956c5ea75f94c8414040a"
+            ],
+            "index": "pypi",
+            "version": "==0.4.3"
+        },
+        "sphinxcontrib-applehelp": {
+            "hashes": [
+                "sha256:edaa0ab2b2bc74403149cb0209d6775c96de797dfd5b5e2a71981309efab3897",
+                "sha256:fb8dee85af95e5c30c91f10e7eb3c8967308518e0f7488a2828ef7bc191d0d5d"
+            ],
+            "version": "==1.0.1"
+        },
+        "sphinxcontrib-devhelp": {
+            "hashes": [
+                "sha256:6c64b077937330a9128a4da74586e8c2130262f014689b4b89e2d08ee7294a34",
+                "sha256:9512ecb00a2b0821a146736b39f7aeb90759834b07e81e8cc23a9c70bacb9981"
+            ],
+            "version": "==1.0.1"
+        },
+        "sphinxcontrib-htmlhelp": {
+            "hashes": [
+                "sha256:4670f99f8951bd78cd4ad2ab962f798f5618b17675c35c5ac3b2132a14ea8422",
+                "sha256:d4fd39a65a625c9df86d7fa8a2d9f3cd8299a3a4b15db63b50aac9e161d8eff7"
+            ],
+            "version": "==1.0.2"
+        },
+        "sphinxcontrib-jsmath": {
+            "hashes": [
+                "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178",
+                "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"
+            ],
+            "version": "==1.0.1"
+        },
+        "sphinxcontrib-qthelp": {
+            "hashes": [
+                "sha256:513049b93031beb1f57d4daea74068a4feb77aa5630f856fcff2e50de14e9a20",
+                "sha256:79465ce11ae5694ff165becda529a600c754f4bc459778778c7017374d4d406f"
+            ],
+            "version": "==1.0.2"
+        },
+        "sphinxcontrib-serializinghtml": {
+            "hashes": [
+                "sha256:c0efb33f8052c04fd7a26c0a07f1678e8512e0faec19f4aa8f2473a8b81d5227",
+                "sha256:db6615af393650bf1151a6cd39120c29abaf93cc60db8c48eb2dddbfdc3a9768"
+            ],
+            "version": "==1.1.3"
+        },
+        "toml": {
+            "hashes": [
+                "sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c",
+                "sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e"
+            ],
+            "version": "==0.10.0"
+        },
+        "typed-ast": {
+            "hashes": [
+                "sha256:1170afa46a3799e18b4c977777ce137bb53c7485379d9706af8a59f2ea1aa161",
+                "sha256:18511a0b3e7922276346bcb47e2ef9f38fb90fd31cb9223eed42c85d1312344e",
+                "sha256:262c247a82d005e43b5b7f69aff746370538e176131c32dda9cb0f324d27141e",
+                "sha256:2b907eb046d049bcd9892e3076c7a6456c93a25bebfe554e931620c90e6a25b0",
+                "sha256:354c16e5babd09f5cb0ee000d54cfa38401d8b8891eefa878ac772f827181a3c",
+                "sha256:48e5b1e71f25cfdef98b013263a88d7145879fbb2d5185f2a0c79fa7ebbeae47",
+                "sha256:4e0b70c6fc4d010f8107726af5fd37921b666f5b31d9331f0bd24ad9a088e631",
+                "sha256:630968c5cdee51a11c05a30453f8cd65e0cc1d2ad0d9192819df9978984529f4",
+                "sha256:66480f95b8167c9c5c5c87f32cf437d585937970f3fc24386f313a4c97b44e34",
+                "sha256:71211d26ffd12d63a83e079ff258ac9d56a1376a25bc80b1cdcdf601b855b90b",
+                "sha256:7954560051331d003b4e2b3eb822d9dd2e376fa4f6d98fee32f452f52dd6ebb2",
+                "sha256:838997f4310012cf2e1ad3803bce2f3402e9ffb71ded61b5ee22617b3a7f6b6e",
+                "sha256:95bd11af7eafc16e829af2d3df510cecfd4387f6453355188342c3e79a2ec87a",
+                "sha256:bc6c7d3fa1325a0c6613512a093bc2a2a15aeec350451cbdf9e1d4bffe3e3233",
+                "sha256:cc34a6f5b426748a507dd5d1de4c1978f2eb5626d51326e43280941206c209e1",
+                "sha256:d755f03c1e4a51e9b24d899561fec4ccaf51f210d52abdf8c07ee2849b212a36",
+                "sha256:d7c45933b1bdfaf9f36c579671fec15d25b06c8398f113dab64c18ed1adda01d",
+                "sha256:d896919306dd0aa22d0132f62a1b78d11aaf4c9fc5b3410d3c666b818191630a",
+                "sha256:fdc1c9bbf79510b76408840e009ed65958feba92a88833cdceecff93ae8fff66",
+                "sha256:ffde2fbfad571af120fcbfbbc61c72469e72f550d676c3342492a9dfdefb8f12"
+            ],
+            "version": "==1.4.0"
+        },
+        "urllib3": {
+            "hashes": [
+                "sha256:3de946ffbed6e6746608990594d08faac602528ac7015ac28d33cee6a45b7398",
+                "sha256:9a107b99a5393caf59c7aa3c1249c16e6879447533d0887f4336dde834c7be86"
+            ],
+            "version": "==1.25.6"
+        },
         "watchdog": {
             "hashes": [
                 "sha256:965f658d0732de3188211932aeb0bb457587f04f63ab4c1e33eab878e9de961d"
diff --git a/README.rst b/README.rst
index 4619fa9f..dd2f8d4d 100644
--- a/README.rst
+++ b/README.rst
@@ -34,7 +34,6 @@ Dependencies
 - `numpy <http://www.numpy.org>`_
 - `pandas <http://pandas.pydata.org>`_
 - `atomicwrites <https://github.com/untitaker/python-atomicwrites>`_
-- `PyVCF <https://github.com/jamescasbon/PyVCF>`_
 
 Installation
 ------------
@@ -48,7 +47,14 @@ Examples
 --------
 Download Example Data
 `````````````````````
-Let's download some example data from `openSNP <https://opensnp.org>`_:
+First, let's setup logging to get some helpful output:
+
+>>> import logging, sys
+>>> logger = logging.getLogger()
+>>> logger.setLevel(logging.DEBUG)
+>>> logger.addHandler(logging.StreamHandler(sys.stdout))
+
+Now we're ready to download some example data from `openSNP <https://opensnp.org>`_:
 
 >>> from snps.resources import Resources
 >>> r = Resources()
@@ -63,7 +69,8 @@ Load a `23andMe <https://www.23andme.com>`_ raw data file:
 >>> from snps import SNPs
 >>> s = SNPs('resources/662.23andme.340.txt.gz')
 
-The loaded SNPs are available via a ``pandas.DataFrame``:
+The ``SNPs`` class accepts a path to a file or a bytes object. A ``Reader`` class attempts to
+infer the data source and load the SNPs. The loaded SNPs are available via a ``pandas.DataFrame``:
 
 >>> df = s.snps
 >>> df.columns.values
diff --git a/docs/conf.py b/docs/conf.py
index 1197d4e4..1275ee70 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -126,7 +126,7 @@ def __getattr__(cls, name):
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ["_static"]
+html_static_path = []
 
 # Custom sidebar templates, must be a dictionary that maps document names
 # to template names.
diff --git a/docs/index.rst b/docs/index.rst
index 13ae28d2..8d9790f9 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -15,6 +15,7 @@
    README <readme>
    output_files
    snps_banner
+   snps
 
 Indices and tables
 ==================
diff --git a/docs/modules.rst b/docs/modules.rst
deleted file mode 100644
index f459e487..00000000
--- a/docs/modules.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-snps
-====
-
-.. toctree::
-   :maxdepth: 4
-
-   snps
diff --git a/docs/snps.rst b/docs/snps.rst
index 78836ba0..9658411b 100644
--- a/docs/snps.rst
+++ b/docs/snps.rst
@@ -1,11 +1,19 @@
 snps package
 ============
 
+Module
+------
+
+.. automodule:: snps
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 Submodules
 ----------
 
 snps\.ensembl module
---------------------
+~~~~~~~~~~~~~~~~~~~~
 
 .. automodule:: snps.ensembl
     :members:
@@ -13,7 +21,7 @@ snps\.ensembl module
     :show-inheritance:
 
 snps\.io module
----------------
+~~~~~~~~~~~~~~~
 
 .. automodule:: snps.io
     :members:
@@ -21,7 +29,7 @@ snps\.io module
     :show-inheritance:
 
 snps\.resources module
-----------------------
+~~~~~~~~~~~~~~~~~~~~~~
 
 .. automodule:: snps.resources
     :members:
@@ -29,18 +37,9 @@ snps\.resources module
     :show-inheritance:
 
 snps\.utils module
-------------------
+~~~~~~~~~~~~~~~~~~
 
 .. automodule:: snps.utils
     :members:
     :undoc-members:
     :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: snps
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/setup.cfg b/setup.cfg
index c6fee1a8..fb025f86 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -21,16 +21,8 @@ addopts =
     --tb=short
 
 # http://coverage.readthedocs.io/en/latest/
-[coverage:paths]
-source =
-    src
-    */site-packages
-
 [coverage:run]
 branch = true
-source =
-    src
-    tests
 omit = */snps/_version.py
 
 [coverage:report]
diff --git a/setup.py b/setup.py
index f12864c6..1bd247b7 100644
--- a/setup.py
+++ b/setup.py
@@ -118,7 +118,7 @@
         "Issue Tracker": "https://github.com/apriha/snps/issues",
     },
     keywords="snps dna chromosomes bioinformatics",
-    install_requires=["numpy", "pandas", "atomicwrites", "PyVCF"],
+    install_requires=["numpy", "pandas", "atomicwrites"],
     python_requires=">=3.5",
     platforms=["any"],
 )
diff --git a/src/snps/__init__.py b/src/snps/__init__.py
index f14e8bf4..dcfe84e4 100644
--- a/src/snps/__init__.py
+++ b/src/snps/__init__.py
@@ -1,4 +1,4 @@
-""" snps
+""" `snps`
 
 tools for reading, writing, merging, and remapping SNPs
 
@@ -53,6 +53,10 @@
 # set version string with Versioneer
 from snps._version import get_versions
 
+import logging
+
+logger = logging.getLogger(__name__)
+
 __version__ = get_versions()["version"]
 del get_versions
 
@@ -67,6 +71,7 @@ def __init__(
         resources_dir="resources",
         parallelize=False,
         processes=os.cpu_count(),
+        rsids=(),
     ):
         """ Object used to read and parse genotype / raw data files.
 
@@ -86,6 +91,8 @@ def __init__(
             utilize multiprocessing to speedup calculations
         processes : int
             processes to launch if multiprocessing
+        rsids : tuple, optional
+            rsids to extract if loading a VCF file
         """
         self._file = file
         self._only_detect_source = only_detect_source
@@ -99,7 +106,9 @@ def __init__(
 
         if file:
 
-            self._snps, self._source = self._read_raw_data(file, only_detect_source)
+            self._snps, self._source = self._read_raw_data(
+                file, only_detect_source, rsids
+            )
 
             if not self._snps.empty:
                 self.sort_snps()
@@ -115,7 +124,7 @@ def __init__(
                     self._assign_par_snps()
 
     def __repr__(self):
-        return "SNPs({!r})".format(self._file)
+        return "SNPs({!r})".format(self._file[0:50])
 
     @property
     def source(self):
@@ -210,6 +219,19 @@ def sex(self):
         """
         return self.determine_sex()
 
+    @property
+    def unannotated_vcf(self):
+        """ Indicates if VCF file is unannotated.
+
+        Returns
+        -------
+        bool
+        """
+        if self.snp_count == 0 and self.source == "vcf":
+            return True
+
+        return False
+
     def get_summary(self):
         """ Get summary of ``SNPs``.
 
@@ -269,23 +291,23 @@ def save_snps(self, filename="", vcf=False, atomic=True, **kwargs):
             snps=self, filename=filename, vcf=vcf, atomic=atomic, **kwargs
         )
 
-    def _read_raw_data(self, file, only_detect_source):
-        return Reader.read_file(file, only_detect_source, self._resources)
+    def _read_raw_data(self, file, only_detect_source, rsids):
+        return Reader.read_file(file, only_detect_source, self._resources, rsids)
 
     def _assign_par_snps(self):
         """ Assign PAR SNPs to the X or Y chromosome using SNP position.
 
         References
         -----
-        .. [1] National Center for Biotechnology Information, Variation Services, RefSNP,
+        1. National Center for Biotechnology Information, Variation Services, RefSNP,
            https://api.ncbi.nlm.nih.gov/variation/v0/
-        .. [2] Yates et. al. (doi:10.1093/bioinformatics/btu613),
+        2. Yates et. al. (doi:10.1093/bioinformatics/btu613),
            `<http://europepmc.org/search/?query=DOI:10.1093/bioinformatics/btu613>`_
-        .. [3] Zerbino et. al. (doi.org/10.1093/nar/gkx1098), https://doi.org/10.1093/nar/gkx1098
-        .. [4] Sherry ST, Ward MH, Kholodov M, Baker J, Phan L, Smigielski EM, Sirotkin K.
+        3. Zerbino et. al. (doi.org/10.1093/nar/gkx1098), https://doi.org/10.1093/nar/gkx1098
+        4. Sherry ST, Ward MH, Kholodov M, Baker J, Phan L, Smigielski EM, Sirotkin K.
            dbSNP: the NCBI database of genetic variation. Nucleic Acids Res. 2001 Jan 1;
            29(1):308-11.
-        .. [5] Database of Single Nucleotide Polymorphisms (dbSNP). Bethesda (MD): National Center
+        5. Database of Single Nucleotide Polymorphisms (dbSNP). Bethesda (MD): National Center
            for Biotechnology Information, National Library of Medicine. dbSNP accession:
            rs28736870, rs113313554, and rs758419898 (dbSNP Build ID: 151). Available from:
            http://www.ncbi.nlm.nih.gov/SNP/
@@ -319,7 +341,7 @@ def _assign_par_snps(self):
                                 break
 
                 except Exception as err:
-                    print(err)
+                    logger.warning(err)
 
     def _assign_snp(self, rsid, alleles, chrom):
         # only assign SNP if positions match (i.e., same build)
@@ -359,13 +381,13 @@ def detect_build(self):
 
         References
         ----------
-        .. [1] Yates et. al. (doi:10.1093/bioinformatics/btu613),
+        1. Yates et. al. (doi:10.1093/bioinformatics/btu613),
            `<http://europepmc.org/search/?query=DOI:10.1093/bioinformatics/btu613>`_
-        .. [2] Zerbino et. al. (doi.org/10.1093/nar/gkx1098), https://doi.org/10.1093/nar/gkx1098
-        .. [3] Sherry ST, Ward MH, Kholodov M, Baker J, Phan L, Smigielski EM, Sirotkin K.
+        2. Zerbino et. al. (doi.org/10.1093/nar/gkx1098), https://doi.org/10.1093/nar/gkx1098
+        3. Sherry ST, Ward MH, Kholodov M, Baker J, Phan L, Smigielski EM, Sirotkin K.
            dbSNP: the NCBI database of genetic variation. Nucleic Acids Res. 2001
            Jan 1;29(1):308-11.
-        .. [4] Database of Single Nucleotide Polymorphisms (dbSNP). Bethesda (MD): National Center
+        4. Database of Single Nucleotide Polymorphisms (dbSNP). Bethesda (MD): National Center
            for Biotechnology Information, National Library of Medicine. dbSNP accession: rs3094315,
            rs11928389, rs2500347, rs964481, and rs2341354 (dbSNP Build ID: 151). Available from:
            http://www.ncbi.nlm.nih.gov/SNP/
@@ -598,7 +620,7 @@ def remap_snps(self, target_assembly, complement_bases=True):
 
         References
         ----------
-        .. [1] Ensembl, Assembly Map Endpoint,
+        1. Ensembl, Assembly Map Endpoint,
            http://rest.ensembl.org/documentation/info/assembly_map
         """
         chromosomes_remapped = []
@@ -607,7 +629,7 @@ def remap_snps(self, target_assembly, complement_bases=True):
         snps = self.snps
 
         if snps.empty:
-            print("No SNPs to remap")
+            logger.debug("No SNPs to remap")
             return chromosomes_remapped, chromosomes_not_remapped
         else:
             chromosomes = snps["chrom"].unique()
@@ -616,7 +638,7 @@ def remap_snps(self, target_assembly, complement_bases=True):
         valid_assemblies = ["NCBI36", "GRCh37", "GRCh38", 36, 37, 38]
 
         if target_assembly not in valid_assemblies:
-            print("Invalid target assembly")
+            logger.debug("Invalid target assembly")
             return chromosomes_remapped, chromosomes_not_remapped
 
         if isinstance(target_assembly, int):
@@ -655,7 +677,7 @@ def remap_snps(self, target_assembly, complement_bases=True):
                     }
                 )
             else:
-                print(
+                logger.debug(
                     "Chromosome {} not remapped; "
                     "removing chromosome from SNPs for consistency".format(chrom)
                 )
@@ -712,11 +734,13 @@ def _remapper(self, task):
             mapped_region = mapping["mapped"]["seq_region_name"]
 
             if orig_region != mapped_region:
-                print("discrepant chroms")
+                logger.debug("discrepant chroms")
                 continue
 
             if orig_range_len != mapped_range_len:
-                print("discrepant coords")  # observed when mapping NCBI36 -> GRCh38
+                logger.debug(
+                    "discrepant coords"
+                )  # observed when mapping NCBI36 -> GRCh38
                 continue
 
             # find the SNPs that are being remapped for this mapping
@@ -898,7 +922,7 @@ def _load_snps_helper(
         discrepant_genotypes_threshold,
         save_output,
     ):
-        print("Loading " + os.path.relpath(file))
+        logger.debug("Loading " + os.path.relpath(file))
         discrepant_positions, discrepant_genotypes = self._add_snps(
             SNPs(file),
             discrepant_snp_positions_threshold,
@@ -1044,12 +1068,12 @@ def _add_snps(
         source = [s.strip() for s in snps._source.split(",")]
 
         if not snps._build_detected:
-            print("build not detected, assuming build {}".format(snps._build))
+            logger.debug("build not detected, assuming build {}".format(snps._build))
 
         if not self._build:
             self._build = build
         elif self._build != build:
-            print(
+            logger.debug(
                 "build / assembly mismatch between current build of SNPs and SNPs being loaded"
             )
 
@@ -1073,7 +1097,7 @@ def _add_snps(
                 prefix = "{}_".format(clean_str(self._name))
 
             if 0 < len(discrepant_positions) < discrepant_snp_positions_threshold:
-                print(
+                logger.debug(
                     "{} SNP positions were discrepant; keeping original positions".format(
                         str(len(discrepant_positions))
                     )
@@ -1089,7 +1113,7 @@ def _add_snps(
                         ),
                     )
             elif len(discrepant_positions) >= discrepant_snp_positions_threshold:
-                print(
+                logger.debug(
                     "too many SNPs differ in position; ensure same genome build is being used"
                 )
                 return discrepant_positions, discrepant_genotypes
@@ -1138,7 +1162,7 @@ def _add_snps(
             ]
 
             if 0 < len(discrepant_genotypes) < discrepant_genotypes_threshold:
-                print(
+                logger.debug(
                     "{} SNP genotypes were discrepant; marking those as null".format(
                         str(len(discrepant_genotypes))
                     )
@@ -1154,7 +1178,7 @@ def _add_snps(
                         ),
                     )
             elif len(discrepant_genotypes) >= discrepant_genotypes_threshold:
-                print(
+                logger.debug(
                     "too many SNPs differ in their genotype; ensure file is for same "
                     "individual"
                 )
diff --git a/src/snps/ensembl.py b/src/snps/ensembl.py
index 15e6b763..03bbdb67 100644
--- a/src/snps/ensembl.py
+++ b/src/snps/ensembl.py
@@ -6,9 +6,9 @@
 
 References
 ----------
-.. [1] Yates et. al. (doi:10.1093/bioinformatics/btu613),
+1. Yates et. al. (doi:10.1093/bioinformatics/btu613),
    `<http://europepmc.org/search/?query=DOI:10.1093/bioinformatics/btu613>`_
-.. [2] Zerbino et. al. (doi.org/10.1093/nar/gkx1098), https://doi.org/10.1093/nar/gkx1098
+2. Zerbino et. al. (doi.org/10.1093/nar/gkx1098), https://doi.org/10.1093/nar/gkx1098
 
 """
 
diff --git a/src/snps/io.py b/src/snps/io.py
index 367ee4df..22a35277 100644
--- a/src/snps/io.py
+++ b/src/snps/io.py
@@ -1,3 +1,7 @@
+""" Classes for reading and writing SNPs.
+
+"""
+
 """
 BSD 3-Clause License
 
@@ -41,16 +45,19 @@
 
 import numpy as np
 import pandas as pd
-import vcf
 
 import snps
 from snps.utils import save_df_as_csv, clean_str
 
+import logging
+
+logger = logging.getLogger(__name__)
+
 
 class Reader:
     """ Class for reading and parsing raw data / genotype files. """
 
-    def __init__(self, file="", only_detect_source=False, resources=None):
+    def __init__(self, file="", only_detect_source=False, resources=None, rsids=()):
         """ Initialize a `Reader`.
 
         Parameters
@@ -61,10 +68,14 @@ def __init__(self, file="", only_detect_source=False, resources=None):
             only detect the source of the data
         resources : Resources
             instance of Resources
+        rsids : tuple, optional
+            rsids to extract if loading a VCF file
+
         """
         self._file = file
         self._only_detect_source = only_detect_source
         self._resources = resources
+        self._rsids = rsids
 
     def __call__(self):
         """ Read and parse a raw data / genotype file.
@@ -147,7 +158,7 @@ def __call__(self):
             elif first_line.startswith("rsid"):
                 return self.read_generic_csv(file)
             elif "vcf" in comments.lower():
-                return self.read_vcf(file)
+                return self.read_vcf(file, self._rsids)
             elif ("Genes for Good" in comments) | ("PLINK" in comments):
                 return self.read_genes_for_good(file)
             elif "CODIGO46" in comments:
@@ -155,11 +166,11 @@ def __call__(self):
             else:
                 return pd.DataFrame(), ""
         except Exception as err:
-            print(err)
+            logger.warning(err)
             return pd.DataFrame(), ""
 
     @classmethod
-    def read_file(cls, file, only_detect_source, resources):
+    def read_file(cls, file, only_detect_source, resources, rsids):
         """ Read `file`.
 
         Parameters
@@ -170,13 +181,15 @@ def read_file(cls, file, only_detect_source, resources):
             only detect the source of the data
         resources : Resources
             instance of Resources
+        rsids : tuple
+            rsids to extract if loading a VCF file
 
         Returns
         -------
         tuple : (pandas.DataFrame, str)
             dataframe of parsed SNPs, detected source of SNPs
         """
-        r = cls(file, only_detect_source, resources)
+        r = cls(file, only_detect_source, resources, rsids)
         return r()
 
     def _extract_comments(self, f, decode):
@@ -287,7 +300,6 @@ def read_ftdna(self, file):
         )
 
         # remove incongruous data
-        df = df.drop(df.loc[df["chrom"] == "0"].index)
         df = df.drop(
             df.loc[df.index == "RSID"].index
         )  # second header for concatenated data
@@ -651,19 +663,28 @@ def read_generic_csv(self, file):
 
         return df, "generic"
 
-    def read_vcf(self, file):
+    def read_vcf(self, file, rsids=()):
         """ Read and parse VCF file.
 
         Notes
         -----
-        This function uses the PyVCF python module to parse the genotypes from VCF files:
-        https://pyvcf.readthedocs.io/en/latest/index.html
+        This method attempts to read and parse a VCF file or buffer, optionally
+        compressed with gzip. Some assumptions are made throughout this process:
 
+            * SNPs that are not annotated with an RSID are skipped
+            * If the VCF contains multiple samples, only the first sample is used to
+              lookup the genotype
+            * Insertions and deletions are skipped
+            * If a sample allele is not specified, the genotype is reported as NaN
+            * If a sample allele refers to a REF or ALT allele that is not specified,
+              the genotype is reported as NaN
 
         Parameters
         ----------
-        file : str
-            path to file
+        file : str or bytes
+            path to file or bytes to load
+        rsids : tuple, optional
+            rsids to extract if loading a VCF file
 
         Returns
         -------
@@ -676,51 +697,83 @@ def read_vcf(self, file):
         if self._only_detect_source:
             return pd.DataFrame(), "vcf"
 
-        df = pd.DataFrame(columns=["rsid", "chrom", "pos", "genotype"])
-        df = df.astype(
-            {"rsid": object, "chrom": object, "pos": np.int64, "genotype": object}
-        )
+        if not isinstance(file, io.BytesIO):
+            with open(file, "rb") as f:
+                return self._parse_vcf(f, rsids)
+        else:
+            return self._parse_vcf(file, rsids)
 
-        with open(file, "r") as f:
-            vcf_reader = vcf.Reader(f)
+    def _parse_vcf(self, buffer, rsids):
+        rows = []
+        first_four_bytes = buffer.read(4)
+        buffer.seek(0)
 
-            # snps does not yet support multi-sample vcf.
-            if len(vcf_reader.samples) > 1:
-                print(
-                    "Multiple samples detected in the vcf file, please use a single sample vcf."
-                )
-                return df, "vcf"
+        if self.is_gzip(first_four_bytes):
+            f = gzip.open(buffer)
+        else:
+            f = buffer
 
-            for i, record in enumerate(vcf_reader):
-                # assign null genotypes if either allele is None
-                # Could capture full genotype, if REF is None, but genotype is 1/1 or
-                # if ALT is None, but genotype is 0/0
-                if record.REF is None or record.ALT[0] is None:
-                    genotype = np.nan
+        with io.TextIOWrapper(io.BufferedReader(f)) as file:
+
+            for line in file:
+
+                line_strip = line.strip("\n")
+                if line_strip.startswith("#"):
+                    continue
+                rsid = line_strip.split("\t")[2]
                 # skip SNPs with missing rsIDs.
-                elif record.ID is None:
+                if rsid == ".":
                     continue
+                if rsids:
+                    if rsid not in rsids:
+                        continue
+
+                line_split = line_strip.split("\t")
+
+                # snps does not yet support multi-sample vcf.
+                if len(line_split) > 10:
+                    logger.debug("Multiple samples detected in the vcf file")
+
+                ref = line_split[3]
+                alt = line_split[4]
+                zygote = line_split[9]
+                zygote = zygote.split(":")[0]
+
+                ref_alt = [ref] + alt.split(",")
+
                 # skip insertions and deletions
-                elif len(record.REF) > 1 or len(record.ALT[0]) > 1:
+                if sum(map(len, ref_alt)) > len(ref_alt):
                     continue
+
+                zygote1, zygote2 = zygote.replace("|", " ").replace("/", " ").split(" ")
+                if zygote1 == "." or zygote2 == ".":
+                    # assign null genotypes if either allele is None
+                    genotype = np.nan
+                elif (zygote1 == "0" or zygote2 == "0") and ref == ".":
+                    # sample allele specifies REF allele, which is None
+                    genotype = np.nan
+                elif (zygote1 == "1" or zygote2 == "1") and alt == ".":
+                    # sample allele specifies ALT allele, which is None
+                    genotype = np.nan
                 else:
-                    alleles = record.genotype(vcf_reader.samples[0]).gt_bases
-                    a1 = alleles[0]
-                    a2 = alleles[-1]
-                    genotype = "{}{}".format(a1, a2)
-
-                record_info = {
-                    "rsid": record.ID,
-                    "chrom": "{}".format(record.CHROM).strip("chr"),
-                    "pos": record.POS,
-                    "genotype": genotype,
-                }
-                # append the record to the DataFrame
-                df = df.append(
-                    pd.DataFrame([record_info]), ignore_index=True, sort=False
-                )
+                    # Could capture full genotype, if REF is None, but genotype is 1/1 or
+                    # if ALT is None, but genotype is 0/0
+                    genotype = ref_alt[int(zygote1)] + ref_alt[int(zygote2)]
+
+                record_array = [
+                    rsid,
+                    "{}".format(line_split[0]).strip("chr"),
+                    line_split[1],
+                    genotype,
+                ]
+                rows.append(record_array)
+
+            df = pd.DataFrame(rows, columns=["rsid", "chrom", "pos", "genotype"])
+            df = df.astype(
+                {"rsid": object, "chrom": object, "pos": np.int64, "genotype": object}
+            )
 
-        df.set_index("rsid", inplace=True, drop=True)
+            df.set_index("rsid", inplace=True, drop=True)
 
         return df, "vcf"
 
@@ -827,7 +880,7 @@ def _write_vcf(self):
 
         References
         ----------
-        .. [1] The Variant Call Format (VCF) Version 4.2 Specification, 8 Mar 2019,
+        1. The Variant Call Format (VCF) Version 4.2 Specification, 8 Mar 2019,
            https://samtools.github.io/hts-specs/VCFv4.2.pdf
 
         Returns
diff --git a/src/snps/resources.py b/src/snps/resources.py
index e50c5e58..dbdc7ad2 100644
--- a/src/snps/resources.py
+++ b/src/snps/resources.py
@@ -2,14 +2,14 @@
 
 References
 ----------
-.. [1] International Human Genome Sequencing Consortium. Initial sequencing and
+1. International Human Genome Sequencing Consortium. Initial sequencing and
    analysis of the human genome. Nature. 2001 Feb 15;409(6822):860-921.
    http://dx.doi.org/10.1038/35057062
-.. [2] hg19 (GRCh37): Hiram Clawson, Brooke Rhead, Pauline Fujita, Ann Zweig, Katrina
+2. hg19 (GRCh37): Hiram Clawson, Brooke Rhead, Pauline Fujita, Ann Zweig, Katrina
    Learned, Donna Karolchik and Robert Kuhn, https://genome.ucsc.edu/cgi-bin/hgGateway?db=hg19
-.. [3] Yates et. al. (doi:10.1093/bioinformatics/btu613),
+3. Yates et. al. (doi:10.1093/bioinformatics/btu613),
    `<http://europepmc.org/search/?query=DOI:10.1093/bioinformatics/btu613>`_
-.. [4] Zerbino et. al. (doi.org/10.1093/nar/gkx1098), https://doi.org/10.1093/nar/gkx1098
+4. Zerbino et. al. (doi.org/10.1093/nar/gkx1098), https://doi.org/10.1093/nar/gkx1098
 
 """
 
@@ -62,6 +62,10 @@
 from snps.ensembl import EnsemblRestClient
 from snps.utils import create_dir, Singleton
 
+import logging
+
+logger = logging.getLogger(__name__)
+
 
 class Resources(metaclass=Singleton):
     """ Object used to manage resources required by `snps`. """
@@ -131,7 +135,7 @@ def get_reference_sequences(
         valid_assemblies = ["NCBI36", "GRCh37", "GRCh38"]
 
         if assembly not in valid_assemblies:
-            print("Invalid assembly")
+            logger.debug("Invalid assembly")
             return {}
 
         if not self._reference_chroms_available(assembly, chroms):
@@ -182,7 +186,7 @@ def download_example_datasets(self):
 
         References
         ----------
-        .. [1] Greshake B, Bayer PE, Rausch H, Reda J (2014), "openSNP-A Crowdsourced Web Resource
+        1. Greshake B, Bayer PE, Rausch H, Reda J (2014), "openSNP-A Crowdsourced Web Resource
            for Personal Genomics," PLOS ONE, 9(3): e89204,
            https://doi.org/10.1371/journal.pone.0089204
         """
@@ -297,7 +301,7 @@ def _load_assembly_mapping_data(filename):
 
             return assembly_mapping_data
         except Exception as err:
-            print(err)
+            logger.warning(err)
             return {}
 
     def _get_paths_reference_sequences(
@@ -331,7 +335,7 @@ def _get_paths_reference_sequences(
 
         References
         ----------
-        .. [1] Daniel R. Zerbino, Premanand Achuthan, Wasiu Akanni, M. Ridwan Amode,
+        1. Daniel R. Zerbino, Premanand Achuthan, Wasiu Akanni, M. Ridwan Amode,
            Daniel Barrell, Jyothish Bhai, Konstantinos Billis, Carla Cummins, Astrid Gall,
            Carlos García Giro´n, Laurent Gil, Leo Gordon, Leanne Haggerty, Erin Haskell,
            Thibaut Hourlier, Osagie G. Izuogu, Sophie H. Janacek, Thomas Juettemann,
@@ -346,11 +350,11 @@ def _get_paths_reference_sequences(
            Ensembl 2018.
            PubMed PMID: 29155950.
            doi:10.1093/nar/gkx1098
-        .. [2] NCBI 36, Oct 2005, Ensembl release 54, Database version: 54.36p
-        .. [3] GRCh37.p13 (Genome Reference Consortium Human Reference 37),
+        2. NCBI 36, Oct 2005, Ensembl release 54, Database version: 54.36p
+        3. GRCh37.p13 (Genome Reference Consortium Human Reference 37),
            INSDC Assembly GCA_000001405.14, Feb 2009, Ensembl GRCh37 release 96, Database
            version: 96.37
-        .. [4] GRCh38.p12 (Genome Reference Consortium Human Build 38),
+        4. GRCh38.p12 (Genome Reference Consortium Human Build 38),
            INSDC Assembly GCA_000001405.27, Dec 2013, Ensembl release 96, Database
            version: 96.38
         """
@@ -426,9 +430,9 @@ def _get_path_assembly_mapping_data(
 
         References
         ----------
-        .. [1] Ensembl, Assembly Information Endpoint,
+        1. Ensembl, Assembly Information Endpoint,
            https://rest.ensembl.org/documentation/info/assembly_info
-        .. [2] Ensembl, Assembly Map Endpoint,
+        2. Ensembl, Assembly Map Endpoint,
            http://rest.ensembl.org/documentation/info/assembly_map
 
         """
@@ -472,14 +476,14 @@ def _get_path_assembly_mapping_data(
         if not os.path.exists(destination) or not self._all_chroms_in_tar(
             chroms, destination
         ):
-            print("Downloading {}".format(os.path.relpath(destination)))
+            logger.debug("Downloading {}".format(os.path.relpath(destination)))
 
             try:
                 self._download_assembly_mapping_data(
                     destination, chroms, source_assembly, target_assembly, retries
                 )
             except Exception as err:
-                print(err)
+                logger.warning(err)
                 return ""
 
         return destination
@@ -527,7 +531,7 @@ def _all_chroms_in_tar(self, chroms, filename):
                 if chrom + ".json" not in members:
                     return False
         except Exception as err:
-            print(err)
+            logger.warning(err)
             return False
 
         return True
@@ -554,7 +558,7 @@ def _load_codigo46_resources(self, rsid_map, chrpos_map):
 
             return d
         except Exception as err:
-            print(err)
+            logger.warning(err)
             return {}
 
     def _get_path_codigo46_rsid_map(self):
@@ -613,7 +617,7 @@ def _download_file(self, url, filename, compress=False, timeout=30):
                     else:
                         f.write(data)
             except urllib.error.URLError as err:
-                print(err)
+                logger.warning(err)
                 destination = ""
                 # try HTTP if an FTP error occurred
                 if "ftp://" in url:
@@ -624,7 +628,7 @@ def _download_file(self, url, filename, compress=False, timeout=30):
                         timeout=timeout,
                     )
             except Exception as err:
-                print(err)
+                logger.warning(err)
                 return ""
 
         return destination
@@ -638,7 +642,7 @@ def _print_download_msg(path):
         path : str
             path to file being downloaded
         """
-        print("Downloading " + os.path.relpath(path))
+        logger.debug("Downloading " + os.path.relpath(path))
 
 
 class ReferenceSequence:
@@ -664,7 +668,7 @@ def __init__(self, ID="", url="", path="", assembly="", species="", taxonomy="")
 
         References
         ----------
-        .. [1] The Variant Call Format (VCF) Version 4.2 Specification, 8 Mar 2019,
+        1. The Variant Call Format (VCF) Version 4.2 Specification, 8 Mar 2019,
            https://samtools.github.io/hts-specs/VCFv4.2.pdf
         """
         self._ID = ID
diff --git a/src/snps/utils.py b/src/snps/utils.py
index 502d418e..ac8dc0d9 100644
--- a/src/snps/utils.py
+++ b/src/snps/utils.py
@@ -1,3 +1,7 @@
+""" Utility classes and functions.
+
+"""
+
 """
 BSD 3-Clause License
 
@@ -41,6 +45,9 @@
 import pandas as pd
 
 import snps
+import logging
+
+logger = logging.getLogger(__name__)
 
 
 class Parallelizer:
@@ -106,7 +113,7 @@ def create_dir(path):
     try:
         os.makedirs(path, exist_ok=True)
     except Exception as err:
-        print(err)
+        logger.warning(err)
         return False
 
     if os.path.exists(path):
@@ -156,7 +163,7 @@ def save_df_as_csv(
                 destination = filename
             else:
                 destination = os.path.join(path, filename)
-                print("Saving " + os.path.relpath(destination))
+                logger.debug("Saving " + os.path.relpath(destination))
 
             if prepend_info:
                 s = (
@@ -190,10 +197,10 @@ def save_df_as_csv(
 
             return destination
         except Exception as err:
-            print(err)
+            logger.warning(err)
             return ""
     else:
-        print("no data to save...")
+        logger.debug("no data to save...")
         return ""
 
 
diff --git a/tests/input/generic.fa b/tests/input/generic.fa
index d7dc0167..1766fe91 100644
--- a/tests/input/generic.fa
+++ b/tests/input/generic.fa
@@ -1,3 +1,3 @@
->generic test sequence:1:1:110
+>generic test sequence:1:1:117
 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
-NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGGCCGGACN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGGCCGGACNNNNNNNN
diff --git a/tests/input/testvcf.vcf b/tests/input/testvcf.vcf
index b05e1ed4..d072e89f 100644
--- a/tests/input/testvcf.vcf
+++ b/tests/input/testvcf.vcf
@@ -4,12 +4,20 @@
 ##
 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLEID
-1	101	rs1	A	G	.	.	.	GT	0/0
-1	102	rs2	G	C	.	.	.	GT	1/1
-1	103	rs3	G	T	.	.	.	GT	0/0
+1	101	rs1	A	.	.	.	.	GT	0/0
+1	102	rs2	.	C	.	.	.	GT	1/1
+1	103	rs3	G	T	.	.	.	GT	0|0
 1	104	rs4	C	T	.	.	.	GT	1/1
-1   105   rs5 C   .   .   .   .   GT  ./.
+1	105	rs5	C	.	.	.	.	GT	./.
 1	106	rs6	G	C	.	.	.	GT	0/1
 1	107	rs7	G	T,C	.	.	.	GT	1/2
-1   108   rs8 A   T   .   .   .   GT  0|1
-1   109   .   C   T   .   .   .   GT  0/1
+1	108	rs8	A	T	.	.	.	GT	0/1
+1	109	.	C	T	.	.	.	GT	0/1
+1	110	rs10	A	AGC	.	.	.	GT	0/1
+1	111	rs11	AGC	A	.	.	.	GT	0/1
+1	112	rs12	.	A	.	.	.	GT	0/1
+1	113	rs13	.	A	.	.	.	GT	1/0
+1	114	rs14	A	.	.	.	.	GT	0/1
+1	115	rs15	A	.	.	.	.	GT	1/0
+1	116	rs16	A	A	.	.	.	GT	0/.
+1	117	rs17	A	A	.	.	.	GT	./0
diff --git a/tests/input/unannotated_testvcf.vcf b/tests/input/unannotated_testvcf.vcf
new file mode 100644
index 00000000..73fcce47
--- /dev/null
+++ b/tests/input/unannotated_testvcf.vcf
@@ -0,0 +1,15 @@
+##fileformat=VCFv4.1
+##fileDate=20190527
+##
+##
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLEID
+1	101	.	A	G	.	.	.	GT	0/0
+1	102	.	G	C	.	.	.	GT	1/1
+1	103	.	G	T	.	.	.	GT	0/0
+1	104	.	C	T	.	.	.	GT	1/1
+1	105	.	C	.	.	.	.	GT	./.
+1	106	.	G	C	.	.	.	GT	0/1
+1	107	.	G	T,C	.	.	.	GT	1/2
+1	108	.	A	T	.	.	.	GT	0/1
+1	109	.	C	T	.	.	.	GT	0/1
diff --git a/tests/test_resources.py b/tests/test_resources.py
index 7427a7d1..49841965 100644
--- a/tests/test_resources.py
+++ b/tests/test_resources.py
@@ -291,7 +291,7 @@ def test_reference_sequence_generic_load_sequence(self):
             seq.sequence,
             np.array(
                 bytearray(
-                    "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGGCCGGACN",
+                    "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGGCCGGACNNNNNNNN",
                     encoding="utf-8",
                     errors="strict",
                 ),
@@ -299,7 +299,7 @@ def test_reference_sequence_generic_load_sequence(self):
             ),
         )
         assert list("AGGCCGGAC") == list(map(chr, seq.sequence[100:109]))
-        assert seq.md5 == "dc86fbda2f6febd77622407beae66b9a"
+        assert seq.md5 == "6ac6176535ad0e38aba2d05d786c39b6"
         assert seq.start == 1
-        assert seq.end == 110
-        assert seq.length == 110
+        assert seq.end == 117
+        assert seq.length == 117
diff --git a/tests/test_snps_collection.py b/tests/test_snps_collection.py
index 3fc2ce60..9b25fe8a 100644
--- a/tests/test_snps_collection.py
+++ b/tests/test_snps_collection.py
@@ -46,15 +46,26 @@
 from tests import BaseSNPsTestCase
 
 
-class TestIndividual(BaseSNPsTestCase):
+class TestSNPsCollection(BaseSNPsTestCase):
     def generic_snps(self):
         return self.create_snp_df(
-            rsid=["rs1", "rs2", "rs3", "rs4", "rs5", "rs6", "rs7", "rs8"],
-            chrom=["1", "1", "1", "1", "1", "1", "1", "1"],
-            pos=[101, 102, 103, 104, 105, 106, 107, 108],
+            rsid=["rs" + str(i) for i in range(1, 9)],
+            chrom=["1"] * 8,
+            pos=list(range(101, 109)),
             genotype=["AA", "CC", "GG", "TT", np.nan, "GC", "TC", "AT"],
         )
 
+    def generic_snps_vcf(self):
+        df = self.generic_snps()
+        return df.append(
+            self.create_snp_df(
+                rsid=["rs" + str(i) for i in range(12, 18)],
+                chrom=["1"] * 6,
+                pos=list(range(112, 118)),
+                genotype=[np.nan] * 6,
+            )
+        )
+
     def snps_NCBI36(self):
         return self.create_snp_df(
             rsid=["rs3094315", "rs2500347", "rsIndelTest", "rs11928389"],
@@ -202,7 +213,113 @@ def test_snps_vcf(self):
         # phased snps, and snps with missing rsID
         s = SNPs("tests/input/testvcf.vcf")
         assert s.source == "vcf"
-        pd.testing.assert_frame_equal(s.snps, self.generic_snps())
+        assert not s.unannotated_vcf
+        pd.testing.assert_frame_equal(s.snps, self.generic_snps_vcf())
+
+    def test_snps_vcf_rsids(self):
+        # https://samtools.github.io/hts-specs/VCFv4.2.pdf
+        # this tests for homozygous snps, heterozygous snps, multiallelic snps,
+        # phased snps, and snps with missing rsID
+        rsids = ["rs1", "rs2"]
+        s = SNPs("tests/input/testvcf.vcf", rsids=rsids)
+        assert s.source == "vcf"
+        assert not s.unannotated_vcf
+        pd.testing.assert_frame_equal(s.snps, self.generic_snps_vcf().loc[rsids])
+
+    def test_snps_vcf_gz(self):
+        # https://samtools.github.io/hts-specs/VCFv4.2.pdf
+        # this tests for homozygous snps, heterozygous snps, multiallelic snps,
+        # phased snps, and snps with missing rsID
+        with open("tests/input/testvcf.vcf", "rb") as f_in:
+            with atomic_write(
+                "tests/input/testvcf.vcf.gz", mode="wb", overwrite=True
+            ) as f_out:
+                with gzip.open(f_out, "wb") as f_gzip:
+                    shutil.copyfileobj(f_in, f_gzip)
+
+        s = SNPs("tests/input/testvcf.vcf.gz")
+        assert s.source == "vcf"
+        pd.testing.assert_frame_equal(s.snps, self.generic_snps_vcf())
+
+    def test_snps_vcf_gz_rsids(self):
+        # https://samtools.github.io/hts-specs/VCFv4.2.pdf
+        # this tests for homozygous snps, heterozygous snps, multiallelic snps,
+        # phased snps, and snps with missing rsID
+        with open("tests/input/testvcf.vcf", "rb") as f_in:
+            with atomic_write(
+                "tests/input/testvcf.vcf.gz", mode="wb", overwrite=True
+            ) as f_out:
+                with gzip.open(f_out, "wb") as f_gzip:
+                    shutil.copyfileobj(f_in, f_gzip)
+
+        rsids = ["rs1", "rs2"]
+        s = SNPs("tests/input/testvcf.vcf.gz", rsids=rsids)
+        assert s.source == "vcf"
+        pd.testing.assert_frame_equal(s.snps, self.generic_snps_vcf().loc[rsids])
+
+    def test_snps_unannotated_vcf(self):
+        # https://samtools.github.io/hts-specs/VCFv4.2.pdf
+        # this tests for homozygous snps, heterozygous snps, multiallelic snps,
+        # phased snps, and snps with missing rsID
+        s = SNPs("tests/input/unannotated_testvcf.vcf")
+        assert s.source == "vcf"
+        assert s.unannotated_vcf
+
+    def test_snps_vcf_buffer(self):
+        with open("tests/input/testvcf.vcf", "r") as f:
+            snps_vcf_buffer = SNPs(f.read().encode("utf-8"))
+        # https://samtools.github.io/hts-specs/VCFv4.2.pdf
+        # this tests for homozygous snps, heterozygous snps, multiallelic snps,
+        # phased snps, and snps with missing rsID
+        assert snps_vcf_buffer.source == "vcf"
+        pd.testing.assert_frame_equal(snps_vcf_buffer.snps, self.generic_snps_vcf())
+
+    def test_snps_vcf_buffer_rsids(self):
+        with open("tests/input/testvcf.vcf", "r") as f:
+            rsids = ["rs1", "rs2"]
+            df = SNPs(f.read().encode("utf-8"), rsids=rsids)
+        # https://samtools.github.io/hts-specs/VCFv4.2.pdf
+        # this tests for homozygous snps, heterozygous snps, multiallelic snps,
+        # phased snps, and snps with missing rsID
+        assert df.source == "vcf"
+        pd.testing.assert_frame_equal(df.snps, self.generic_snps_vcf().loc[rsids])
+
+    def test_snps_vcf_buffer_gz(self):
+        with open("tests/input/testvcf.vcf", "rb") as f_in:
+            with atomic_write(
+                "tests/input/testvcf.vcf.gz", mode="wb", overwrite=True
+            ) as f_out:
+                with gzip.open(f_out, "wb") as f_gzip:
+                    shutil.copyfileobj(f_in, f_gzip)
+
+        with open("tests/input/testvcf.vcf.gz", "rb") as f:
+            data = f.read()
+            s = SNPs(data)
+        os.remove("tests/input/testvcf.vcf.gz")
+        # https://samtools.github.io/hts-specs/VCFv4.2.pdf
+        # this tests for homozygous snps, heterozygous snps, multiallelic snps,
+        # phased snps, and snps with missing rsID
+        assert s.source == "vcf"
+        pd.testing.assert_frame_equal(s.snps, self.generic_snps_vcf())
+
+    def test_snps_vcf_buffer_gz_rsids(self):
+        with open("tests/input/testvcf.vcf", "rb") as f_in:
+            with atomic_write(
+                "tests/input/testvcf.vcf.gz", mode="wb", overwrite=True
+            ) as f_out:
+                with gzip.open(f_out, "wb") as f_gzip:
+                    shutil.copyfileobj(f_in, f_gzip)
+
+        with open("tests/input/testvcf.vcf.gz", "rb") as f:
+            rsids = ["rs1", "rs2"]
+            data = f.read()
+            s = SNPs(data, rsids=rsids)
+        os.remove("tests/input/testvcf.vcf.gz")
+        # https://samtools.github.io/hts-specs/VCFv4.2.pdf
+        # this tests for homozygous snps, heterozygous snps, multiallelic snps,
+        # phased snps, and snps with missing rsID
+        assert s.source == "vcf"
+        pd.testing.assert_frame_equal(s.snps, self.generic_snps_vcf().loc[rsids])
 
     def test_source_lineage_file(self):
         sc = SNPsCollection("tests/input/GRCh37.csv")
@@ -475,7 +592,7 @@ def test_save_snps_vcf(self):
 
         assert os.path.relpath(s.save_snps(vcf=True)) == "output/vcf_GRCh37.vcf"
         s = SNPs("output/vcf_GRCh37.vcf")
-        pd.testing.assert_frame_equal(s.snps, self.generic_snps())
+        pd.testing.assert_frame_equal(s.snps, self.generic_snps_vcf())
 
     def test_save_snps_specify_file(self):
         s = SNPs("tests/input/GRCh37.csv")