Skip to content

Commit

Permalink
It Works!!!, need test and Terraform scripts updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Dieg0Code committed Aug 9, 2024
1 parent 3599b09 commit a715ace
Show file tree
Hide file tree
Showing 11 changed files with 127 additions and 37 deletions.
1 change: 0 additions & 1 deletion .env

This file was deleted.

5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
/terraform/.terraform
/terraform/.terraform
/.aws-sam
/main
/.env
17 changes: 16 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,2 +1,17 @@
compile_lambda:
GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o main main.go
set GOOS=linux&& set GOARCH=amd64&& set CGO_ENABLED=0&& go build -o main main.go
start_db:
docker run -d --name dynamodb -p 8000:8000 amazon/dynamodb-local
create_table:
aws dynamodb create-table \
--table-name products \
--attribute-definitions \
AttributeName=ProductID,AttributeType=S \
--key-schema \
AttributeName=ProductID,KeyType=HASH \
--provisioned-throughput \
ReadCapacityUnits=5,WriteCapacityUnits=5 \
--endpoint-url http://localhost:8000
stop_db:
docker stop dynamodb
docker rm dynamodb
66 changes: 66 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Serverless Api Scraper

Serverless API. Scrape data from a supermarket website and store it in a dynamoDB database.

- `[GET] /api/v1/products` - Get all products

```json
{
"code": 200,
"status": "OK",
"message": "Success getting all products",
"data": [
{
"product_id": "uuid",
"name": "Producto 1",
"category": "category 1",
"original_price": 899,
"discounted_price": 0
},
{
"product_id": "uuid",
"name": "Producto 2",
"category": "category 2",
"original_price": 999,
"discounted_price": 0
}
]
}
```

- `[GET] /api/v1/products/{ProductID}` - Get a product by ID

```json
{
"code": 200,
"status": "OK",
"message": "Success getting product",
"data": {
"product_id": "uuid",
"name": "Producto 1",
"category": "category 1",
"original_price": 899,
"discounted_price": 0
}
}
```

- `[POST] /api/v1/products` - Update Data (this will take a while, 1 min aprox)

```json
{
"update_data" : true
}
```

```json
{
"code": 200,
"status": "OK",
"message": "Success updating data",
"data": null
}
```

- Use Terraform to deploy the infrastructure.
- Tested with SAM CLI (Serverless Application Model Command Line Interface)
8 changes: 4 additions & 4 deletions api/controller/product_controller_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ type ProductControllerImpl struct {
func (p *ProductControllerImpl) GetAll(ctx *gin.Context) {
productResponse, err := p.ProductService.GetAll()
if err != nil {
logrus.WithError(err).Error("Error getting all products")
logrus.WithError(err).Error("[ProductControllerImpl.GetAll] Error getting all products")
errorResponse := response.BaseResponse{
Code: 500,
Status: "Internal Server Error",
Expand Down Expand Up @@ -55,7 +55,7 @@ func (p *ProductControllerImpl) GetByID(ctx *gin.Context) {

productResponse, err := p.ProductService.GetByID(productId)
if err != nil {
logrus.WithError(err).Error("Error getting product by ID")
logrus.WithError(err).Error("[ProductControllerImpl.GetByID] Error getting product by ID")
errorResponse := response.BaseResponse{
Code: 500,
Status: "Internal Server Error",
Expand All @@ -82,7 +82,7 @@ func (p *ProductControllerImpl) UpdateData(ctx *gin.Context) {
updateReq := request.UpdateDataRequest{}
err := ctx.BindJSON(&updateReq)
if err != nil {
logrus.WithError(err).Error("Error binding request")
logrus.WithError(err).Error("[ProductControllerImpl.UpdateData] Error binding request")
errorResponse := response.BaseResponse{
Code: 400,
Status: "Bad Request",
Expand All @@ -96,7 +96,7 @@ func (p *ProductControllerImpl) UpdateData(ctx *gin.Context) {

success, err := p.ProductService.UpdateData(updateReq)
if err != nil {
logrus.WithError(err).Error("Error updating data")
logrus.WithError(err).Error("[ProductControllerImpl.UpdateData] Error updating data")
errorResponse := response.BaseResponse{
Code: 500,
Status: "Internal Server Error",
Expand Down
6 changes: 1 addition & 5 deletions api/db/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@ import (
)

func NewDynamoDB(region string) *dynamodb.DynamoDB {
var dynamoEndpoint string
if os.Getenv("AWS_SAM_LOCAL") == "true" {
dynamoEndpoint = "http://localhost:8000"
}

dynamoEndpoint := os.Getenv("DYNAMO_ENDPOINT")
sess := session.Must(session.NewSession(&aws.Config{
Region: aws.String(region),
Endpoint: aws.String(dynamoEndpoint),
Expand Down
14 changes: 7 additions & 7 deletions api/repository/product_repository_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func (p *ProductRepositoryImpl) DeleteAll() error {

result, err := p.db.Scan(scanInput)
if err != nil {
logrus.WithError(err).Error("error scanning products")
logrus.WithError(err).Error("[ProductRepositoryImpl.DeleteAll] error scanning products")
return errors.New("error scanning products")
}

Expand All @@ -45,7 +45,7 @@ func (p *ProductRepositoryImpl) DeleteAll() error {

_, err := p.db.DeleteItem(deleteInput)
if err != nil {
logrus.WithError(err).Error("error deleting product")
logrus.WithError(err).Error("[ProductRepositoryImpl.DeleteAll] error deleting products")
return errors.New("error deleting products")
}
}
Expand Down Expand Up @@ -78,7 +78,7 @@ func (p *ProductRepositoryImpl) Create(product models.Product) (models.Product,

_, err := p.db.PutItem(input)
if err != nil {
logrus.WithError(err).Error("error creating product")
logrus.WithError(err).Error("[ProductRepositoryImpl.Create] error creating product")
return models.Product{}, errors.New("error creating product")
}

Expand All @@ -93,14 +93,14 @@ func (p *ProductRepositoryImpl) GetAll() ([]models.Product, error) {

result, err := p.db.Scan(input)
if err != nil {
logrus.WithError(err).Error("error scanning products")
logrus.WithError(err).Error("[ProductRepositoryImpl.GetAll] error getting products")
return nil, errors.New("error getting products")
}

var products []models.Product
err = dynamodbattribute.UnmarshalListOfMaps(result.Items, &products)
if err != nil {
logrus.WithError(err).Error("error unmarshalling products")
logrus.WithError(err).Error("[ProductRepositoryImpl.GetAll] error unmarshalling products")
return nil, errors.New("error getting products")
}

Expand All @@ -120,7 +120,7 @@ func (p *ProductRepositoryImpl) GetByID(id string) (models.Product, error) {

result, err := p.db.GetItem(input)
if err != nil {
logrus.WithError(err).Error("error getting product")
logrus.WithError(err).Error("[ProductRepositoryImpl.GetByID] error getting product")
return models.Product{}, errors.New("error getting product")
}

Expand All @@ -131,7 +131,7 @@ func (p *ProductRepositoryImpl) GetByID(id string) (models.Product, error) {
var product models.Product
err = dynamodbattribute.UnmarshalMap(result.Item, &product)
if err != nil {
logrus.WithError(err).Error("error unmarshalling product")
logrus.WithError(err).Error("[ProductRepositoryImpl.GetByID] error unmarshalling product")
return models.Product{}, errors.New("error getting product")
}

Expand Down
8 changes: 4 additions & 4 deletions api/service/product_service_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ type ProductServiceImpl struct {
func (p *ProductServiceImpl) GetAll() ([]response.ProductResponse, error) {
result, err := p.ProductRepository.GetAll()
if err != nil {
logrus.WithError(err).Error("Error getting all products")
logrus.WithError(err).Error("[ProductServiceImpl.GetAll] Error getting all products")
return nil, err
}

Expand All @@ -43,7 +43,7 @@ func (p *ProductServiceImpl) GetAll() ([]response.ProductResponse, error) {
func (p *ProductServiceImpl) GetByID(productID string) (response.ProductResponse, error) {
result, err := p.ProductRepository.GetByID(productID)
if err != nil {
logrus.WithError(err).Error("Error getting product by ID")
logrus.WithError(err).Error("[ProductServiceImpl.GetByID] Error getting product by ID")
return response.ProductResponse{}, err
}

Expand All @@ -65,14 +65,14 @@ func (p *ProductServiceImpl) UpdateData(udateData request.UpdateDataRequest) (bo
if udateData.UpdateData {
err := p.ProductRepository.DeleteAll()
if err != nil {
logrus.WithError(err).Error("Error deleting all products")
logrus.WithError(err).Error("[ProductServiceImpl.UpdateData] Error deleting all products")
return false, err
}

for _, categoryInfo := range utils.Categories {
products, err := p.Scraper.ScrapeData(BaseURL, categoryInfo.MaxPage, categoryInfo.Category)
if err != nil {
logrus.WithError(err).Error("Error scraping data")
logrus.WithError(err).Error("[ProductServiceImpl.UpdateData] Error scraping data")
return false, err
}
for _, product := range products {
Expand Down
2 changes: 2 additions & 0 deletions api/utils/scraper_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

"github.com/dieg0code/serverles-api-scraper/api/models"
"github.com/gocolly/colly/v2"
"github.com/sirupsen/logrus"
)

type ScraperImpl struct{}
Expand Down Expand Up @@ -53,6 +54,7 @@ func (s *ScraperImpl) ScrapeData(baseURL string, maxPage int, category string) (
})

for i := 1; i <= maxPage; i++ {
logrus.Infof("Scraping page %d", i)
collector.Visit(fmt.Sprintf("https://%s/%s/page/%d/", baseURL, category, i))
}

Expand Down
22 changes: 15 additions & 7 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,13 @@ import (
"github.com/dieg0code/serverles-api-scraper/api/router"
"github.com/dieg0code/serverles-api-scraper/api/service"
"github.com/dieg0code/serverles-api-scraper/api/utils"
"github.com/joho/godotenv"
"github.com/sirupsen/logrus"
)

var r *router.Router

func init() {
// load env vars
err := godotenv.Load()
if err != nil {
logrus.WithError(err).Error("Error loading .env file")
}
logrus.Info("Initializing serverless API scraper")

db := db.NewDynamoDB("sa-east-1")
productRepo := repository.NewProductRepositoryImpl(db, "products")
Expand All @@ -31,12 +26,25 @@ func init() {
productController := controller.NewProductControllerImpl(productService)
r = router.NewRouter(productController)
r.InitRoutes()

logrus.Info("Serverless API scraper initialized Successfully")
}

func Handler(ctx context.Context, req events.APIGatewayProxyRequest) (events.APIGatewayProxyResponse, error) {
return r.Handler(ctx, req)
logrus.Info("Handling request:", req)
response, err := r.Handler(ctx, req)
if err != nil {
logrus.Error("Error handling request:", err)
return events.APIGatewayProxyResponse{
StatusCode: 500,
Body: `{"error": "Internal Server Error"}`,
}, err
}
logrus.Info("Request handled successfully")
return response, nil
}

func main() {
logrus.Info("Starting serverless API scraper")
lambda.Start(Handler)
}
15 changes: 8 additions & 7 deletions template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ AWSTemplateFormatVersion: '2010-09-09'
Transform: AWS::Serverless-2016-10-31

Resources:
MyGoLambdaFunction:
ServerlessApiScraper:
Type: AWS::Serverless::Function
Properties:
Handler: main
Expand All @@ -12,13 +12,14 @@ Resources:
Architectures:
- x86_64
MemorySize: 128
Timeout: 30
Timeout: 150
Environment:
Variables:
TABLE_NAME: "products" # Aquí puedes definir tus variables de entorno
TABLE_NAME: "products"
DYNAMO_ENDPOINT: "http://host.docker.internal:8000"
Policies:
- AWSLambdaBasicExecutionRole # Políticas básicas para ejecutar la función Lambda
- AWSLambdaDynamoDBExecutionRole # Si necesitas acceso a DynamoDB
- AWSLambdaBasicExecutionRole
- AWSLambdaDynamoDBExecutionRole
Events:
ApiGateway:
Type: Api
Expand All @@ -27,6 +28,6 @@ Resources:
Method: ANY

Outputs:
MyLambdaFunctionArn:
ServerlessApiScraperArn:
Description: "ARN de la función Lambda desplegada"
Value: !GetAtt MyGoLambdaFunction.Arn
Value: !GetAtt ServerlessApiScraper.Arn

0 comments on commit a715ace

Please sign in to comment.