Skip to content

Commit

Permalink
✨ Added web-based API for getting scraped data
Browse files Browse the repository at this point in the history
  • Loading branch information
lukecarr committed Dec 17, 2021
1 parent 1692b35 commit e079691
Show file tree
Hide file tree
Showing 12 changed files with 3,461 additions and 0 deletions.
34 changes: 34 additions & 0 deletions web/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.

# dependencies
/node_modules
/.pnp
.pnp.js

# testing
/coverage

# next.js
/.next/
/out/

# production
/build

# misc
.DS_Store
*.pem

# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*

# local env files
.env.local
.env.development.local
.env.test.local
.env.production.local

# vercel
.vercel
34 changes: 34 additions & 0 deletions web/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
This is a [Next.js](https://nextjs.org/) project bootstrapped with [`create-next-app`](https://github.com/vercel/next.js/tree/canary/packages/create-next-app).

## Getting Started

First, run the development server:

```bash
npm run dev
# or
yarn dev
```

Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.

You can start editing the page by modifying `pages/index.js`. The page auto-updates as you edit the file.

[API routes](https://nextjs.org/docs/api-routes/introduction) can be accessed on [http://localhost:3000/api/hello](http://localhost:3000/api/hello). This endpoint can be edited in `pages/api/hello.js`.

The `pages/api` directory is mapped to `/api/*`. Files in this directory are treated as [API routes](https://nextjs.org/docs/api-routes/introduction) instead of React pages.

## Learn More

To learn more about Next.js, take a look at the following resources:

- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.

You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js/) - your feedback and contributions are welcome!

## Deploy on Vercel

The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.

Check out our [Next.js deployment documentation](https://nextjs.org/docs/deployment) for more details.
5 changes: 5 additions & 0 deletions web/next-env.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/// <reference types="next" />
/// <reference types="next/image-types/global" />

// NOTE: This file should not be edited
// see https://nextjs.org/docs/basic-features/typescript for more information.
3 changes: 3 additions & 0 deletions web/next.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module.exports = {
reactStrictMode: true,
}
31 changes: 31 additions & 0 deletions web/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"name": "dfe-attendance",
"private": true,
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "next lint"
},
"dependencies": {
"@keyv/redis": "^2.2.1",
"adm-zip": "^0.5.9",
"axios": "^0.24.0",
"cheerio": "^1.0.0-rc.10",
"keyv": "^4.0.4",
"next": "12.0.7",
"react": "17.0.2",
"react-dom": "17.0.2"
},
"devDependencies": {
"@types/adm-zip": "^0.4.34",
"@types/keyv": "^3.1.3",
"@types/react": "^17.0.37",
"eslint": "8.4.1",
"eslint-config-next": "12.0.7",
"typescript": "^4.5.4"
},
"eslintConfig": {
"extends": "next/core-web-vitals"
}
}
20 changes: 20 additions & 0 deletions web/pages/_app.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import type { AppProps } from 'next/app'
import Head from 'next/head'

import 'styles/globals.css'

const App = ({ Component, pageProps }: AppProps) => {
const title = (Component as any).title ? `${(Component as any).title} :: DfE Attendance Scraper` : 'DfE Attendance Scraper'

return (
<>
<Head>
<title>{title}</title>
</Head>

<Component {...pageProps} />
</>
)
}

export default App
32 changes: 32 additions & 0 deletions web/pages/api/daily.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import type { NextApiRequest, NextApiResponse } from 'next'
import axios from 'axios'
import * as cheerio from 'cheerio'
import AdmZip from 'adm-zip'
import Keyv from 'keyv'

const cache = typeof process.env.REDIS_URI !== 'undefined' ? new Keyv(process.env.REDIS_URI) : new Keyv({ store: new Map() })

const getCsvText = async () => {
const { data } = await axios.get('https://explore-education-statistics.service.gov.uk/find-statistics/attendance-in-education-and-early-years-settings-during-the-coronavirus-covid-19-outbreak')

const $ = cheerio.load(data)

const downloadUrl = $('a[href]').filter(function () { return $(this).text() === 'Download all data' }).first().attr('href')

const { data: zipData } = await axios.get(downloadUrl, { responseType: 'arraybuffer' })
const zip = new AdmZip(zipData)
const csvText = zip.readAsText('data/table_1b_daily_attendance_in_state_schools_during_covid_19_.csv', 'utf8')

await cache.set('daily_attendance', csvText, 1000 * 60 * 60 * 24)

return csvText
}

export default async function handler(_: NextApiRequest, res: NextApiResponse) {
const csvText = await cache.get('daily_attendance') ?? await getCsvText()

res.status(200)
.setHeader('Content-Type', 'text/csv')
.setHeader('Content-Disposition', 'attachment;filename=daily_attendance.csv')
.send(csvText)
}
30 changes: 30 additions & 0 deletions web/pages/index.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import styles from 'styles/Home.module.css'

const Home = () => {
return (
<div className={styles.container}>
<main className={styles.main}>
<h1 className={styles.title}>
DfE Attendance Scraper
</h1>

<p className={styles.description}>
API endpoints for obtaining attendance data scraped from the DfE&apos;s statistics website!
<br /><br />
<strong>This website is not affiliated or associated with the DfE!</strong>
</p>

<div className={styles.grid}>
<a href="/api/daily" className={styles.card}>
<h2>Daily Attendance &rarr;</h2>
<p>Daily attendance data for schools from Sep 2020 to present!</p>
</a>
</div>
</main>
</div>
)
}

Home.title = 'Home'

export default Home
75 changes: 75 additions & 0 deletions web/styles/Home.module.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
.container {
padding: 0 2rem;
}

.main {
min-height: 100vh;
padding: 4rem 0;
flex: 1;
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
}

.title {
margin: 0;
line-height: 1.15;
font-size: 4rem;
}

.title,
.description {
text-align: center;
}

.description {
margin: 4rem 0;
line-height: 1.5;
font-size: 1.5rem;
}

.grid {
display: flex;
align-items: center;
justify-content: center;
flex-wrap: wrap;
max-width: 800px;
}

.card {
margin: 1rem;
padding: 1.5rem;
text-align: left;
color: inherit;
text-decoration: none;
border: 1px solid #eaeaea;
border-radius: 10px;
transition: color 0.15s ease, border-color 0.15s ease;
max-width: 300px;
}

.card:hover,
.card:focus,
.card:active {
color: #0070f3;
border-color: #0070f3;
}

.card h2 {
margin: 0 0 1rem 0;
font-size: 1.5rem;
}

.card p {
margin: 0;
font-size: 1.25rem;
line-height: 1.5;
}

@media (max-width: 600px) {
.grid {
width: 100%;
flex-direction: column;
}
}
16 changes: 16 additions & 0 deletions web/styles/globals.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
html,
body {
padding: 0;
margin: 0;
font-family: -apple-system, BlinkMacSystemFont, Segoe UI, Roboto, Oxygen,
Ubuntu, Cantarell, Fira Sans, Droid Sans, Helvetica Neue, sans-serif;
}

a {
color: inherit;
text-decoration: none;
}

* {
box-sizing: border-box;
}
32 changes: 32 additions & 0 deletions web/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"compilerOptions": {
"target": "es5",
"lib": [
"dom",
"dom.iterable",
"esnext"
],
"allowJs": true,
"skipLibCheck": true,
"strict": false,
"forceConsistentCasingInFileNames": true,
"noEmit": true,
"incremental": true,
"esModuleInterop": true,
"allowSyntheticDefaultImports": true,
"module": "esnext",
"moduleResolution": "node",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"baseUrl": "."
},
"include": [
"next-env.d.ts",
"**/*.ts",
"**/*.tsx"
],
"exclude": [
"node_modules"
]
}
Loading

0 comments on commit e079691

Please sign in to comment.