-
-
Notifications
You must be signed in to change notification settings - Fork 183
/
Copy path04_06.sql
28 lines (28 loc) · 901 Bytes
/
04_06.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#standardSQL
# 04_06: Pages with source[sizes]
SELECT
client,
COUNTIF(hasSizes) AS hasSizes,
COUNTIF(hasSrcSet) AS hasSrcSet,
COUNTIF(hasPicture) AS hasPicture,
COUNT(0) AS total,
ROUND(COUNTIF(hasSizes) * 100 / COUNT(0), 2) AS pctSizes,
ROUND(COUNTIF(hasSrcSet) * 100 / COUNT(0), 2) AS pctSrcSet,
ROUND(COUNTIF(hasPicture) * 100 / COUNT(0), 2) AS pctPicture,
ROUND(COUNTIF(hasPicture OR hasSrcSet OR hasSizes) * 100 / COUNT(0), 2) AS anyRespImg
FROM (
SELECT
client,
REGEXP_CONTAINS(body, r'(?is)<(?:img|source)[^>]*sizes=[\'"]?([^\'"]*)') AS hasSizes,
REGEXP_CONTAINS(body, r'(?is)<(?:img|source)[^>]*srcset=[\'"]?([^\'"]*)') AS hasSrcSet,
REGEXP_CONTAINS(body, r'(?si)<picture.*?<img.*?/picture>') AS hasPicture
FROM
`httparchive.almanac.summary_response_bodies`
WHERE
date = '2019-07-01' AND
firstHtml
)
GROUP BY
client
ORDER BY
client DESC