Skip to content

Commit 389ad5d

Browse files
committed
Dev ops changes for the k8s migrations
rpc endpoints building cd system for the CI
1 parent 2e00907 commit 389ad5d

26 files changed

+1379
-117
lines changed

Makefile

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
# Start docker containers
2-
start:
3-
docker-compose up -d
1+
PROTO_SRC_DIR := protos
2+
PROTO_OUT_DIR := pkg/generated
43

5-
# Stop docker containers
6-
stop:
7-
docker-compose down
4+
buildSpider:
5+
docker buildx build --platform linux/amd64 -f cmd/spider/Dockerfile -t spider .
6+
docker tag spider:latest 967991486854.dkr.ecr.eu-west-1.amazonaws.com/spider:latest
7+
docker push 967991486854.dkr.ecr.eu-west-1.amazonaws.com/spider:latest

README.md

+6-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
![Example of a graph](/assets/example.png "Example of a graph")
55

66
## Overview 🌐
7-
![Overview of architecture](/assets/overview.png "Overview")
87

98
### 1. Spider
109

@@ -30,7 +29,12 @@ This front end which allows for the Database to be searched
3029
## Design considerations
3130

3231
- Why Not use OS/Elastic search
33-
-
32+
33+
## Domain specific language
34+
35+
For this project I have decided to created a domain specfic language for creating indexes and for querying the database.
36+
Overall this was a good opportunity to learn more about compiler design and to create a tool which would leverage a
37+
massive amount of flexibility
3438

3539

3640

cmd/spider/Dockerfile

+28-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,28 @@
1-
FROM go.1.22
2-
LABEL authors="alexcollie"
3-
# Go lang installation
4-
ENTRYPOINT ["top", "-b"]
1+
FROM golang:latest
2+
3+
# Set working directory
4+
WORKDIR /app
5+
6+
# Copy go.mod and go.sum
7+
COPY go.mod go.sum ./
8+
9+
# Download dependencies
10+
RUN go mod download
11+
12+
# Copy the entire project
13+
COPY . .
14+
15+
# Build the application
16+
#RUN go build -o spider cmd/spider/main.go
17+
RUN GOOS=linux GOARCH=amd64 go build -o spider cmd/spider/main.go
18+
19+
20+
# Expose ports
21+
EXPOSE 9090 9090
22+
# Used for health check
23+
EXPOSE 8080 8080
24+
# Used for metrics
25+
EXPOSE 80 80
26+
27+
# Run the application
28+
CMD ["./spider"]

cmd/spider/deployment.yaml

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: spider-deployment
5+
annotations:
6+
iam.amazonaws.com/role: ${IAM_ROLE}
7+
labels:
8+
app: spider
9+
spec:
10+
replicas: 2
11+
selector:
12+
matchLabels:
13+
app: spider
14+
template:
15+
metadata:
16+
labels:
17+
app: spider
18+
annotations:
19+
prometheus.io/scrape: "true"
20+
prometheus.io/path: /metrics
21+
prometheus.io/port: "8080"
22+
spec:
23+
imagePullSecrets:
24+
- name: regcred
25+
containers:
26+
- name: spider
27+
image: ${SPIDER_IMAGE}
28+
ports:
29+
- name: grpc
30+
containerPort: 9090
31+
- name: metrics
32+
containerPort: 8080
33+
livenessProbe:
34+
httpGet:
35+
path: /health
36+
port: 8080
37+
initialDelaySeconds: 10
38+
periodSeconds: 20
39+
resources:
40+
requests:
41+
cpu: "100m"
42+
memory: "128Mi"
43+
limits:
44+
cpu: "500m"
45+
memory: "128Mi"

cmd/spider/handler/flow_test.go

-39
This file was deleted.

cmd/spider/handler/rpc.go

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package handler
2+
3+
import (
4+
"context"
5+
"webcrawler/cmd/spider/pkg/db"
6+
"webcrawler/pkg/generated/service/spider"
7+
)
8+
9+
type RpcServer struct {
10+
spider.UnimplementedSpiderServer
11+
12+
db db.Db
13+
}
14+
15+
func (c *RpcServer) mustEmbedUnimplementedSpiderServer() {
16+
//TODO implement me
17+
panic("implement me")
18+
}
19+
20+
func NewRPCServer(db db.Db) *RpcServer {
21+
return &RpcServer{
22+
db: db,
23+
}
24+
}
25+
26+
func (c *RpcServer) GetSeenList(ctx context.Context, request *spider.SeenListRequest) (*spider.SeenListResponse, error) {
27+
pages, err := c.db.Page.GetAllPages(ctx)
28+
if err != nil {
29+
return nil, err
30+
}
31+
var response []*spider.Page
32+
for _, page := range pages {
33+
response = append(response, &spider.Page{
34+
Url: page.Url,
35+
Body: page.Body,
36+
Links: page.Links,
37+
})
38+
}
39+
40+
return &spider.SeenListResponse{
41+
SeenSites: response,
42+
}, nil
43+
}

cmd/spider/main.go

+34-5
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,20 @@ import (
44
"context"
55
"fmt"
66
"github.com/joho/godotenv"
7+
"github.com/prometheus/client_golang/prometheus/promhttp"
8+
"go.opentelemetry.io/otel"
9+
"go.opentelemetry.io/otel/exporters/prometheus"
10+
"go.opentelemetry.io/otel/sdk/metric"
11+
"google.golang.org/grpc"
12+
"google.golang.org/grpc/reflection"
713
"log"
14+
"net"
815
"net/http"
916
"webcrawler/cmd/spider/handler"
1017
"webcrawler/cmd/spider/pkg/db"
1118
"webcrawler/pkg/config"
19+
"webcrawler/pkg/generated/service/spider"
20+
"webcrawler/pkg/health"
1221
)
1322

1423
func main() {
@@ -66,15 +75,35 @@ func main() {
6675
if err != nil {
6776
panic(err)
6877
}
78+
// Initialize OpenTelemetry
79+
exporter, err := prometheus.New()
80+
if err != nil {
81+
log.Fatalf("Failed to create Prometheus exporter: %v", err)
82+
}
83+
meterProvider := metric.NewMeterProvider(metric.WithReader(exporter))
84+
otel.SetMeterProvider(meterProvider)
6985

86+
// K8s health check
87+
http.HandleFunc("/health", health.Ok)
88+
http.Handle("/metrics", promhttp.Handler())
89+
90+
// Start HTTP server in a separate goroutine
91+
go func() {
92+
log.Fatal(http.ListenAndServe("0.0.0.0:8080", nil))
93+
}()
7094
go func() {
7195
server.Scan(ctx)
7296
}()
73-
http.HandleFunc("/seen", func(writer http.ResponseWriter, request *http.Request) {
7497

75-
})
98+
// GRPC server
99+
var opts []grpc.ServerOption
100+
lis, err := net.Listen("tcp", fmt.Sprintf("0.0.0.0:%d", 9090))
101+
if err != nil {
102+
panic(err)
103+
}
104+
grpcServer := grpc.NewServer(opts...)
105+
spider.RegisterSpiderServer(grpcServer, handler.NewRPCServer(server.Db))
106+
reflection.Register(grpcServer)
107+
grpcServer.Serve(lis)
76108

77-
http.HandleFunc("/health", func(writer http.ResponseWriter, request *http.Request) {
78-
fmt.Fprintf(writer, "OK")
79-
})
80109
}

cmd/spider/pkg/db/integration_test.go

+130
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
package db
2+
3+
import (
4+
"context"
5+
"database/sql"
6+
"fmt"
7+
"github.com/go-faker/faker/v4"
8+
"github.com/stretchr/testify/require"
9+
"testing"
10+
"webcrawler/cmd/spider/pkg/db/page"
11+
"webcrawler/cmd/spider/pkg/db/queue"
12+
"webcrawler/cmd/spider/pkg/site"
13+
dbx "webcrawler/pkg/db"
14+
"webcrawler/pkg/sqlx"
15+
test_containers "webcrawler/pkg/test-containers"
16+
)
17+
18+
type testDb struct {
19+
conn *sql.DB
20+
name string
21+
}
22+
23+
func Test_DbInteraction(t *testing.T) {
24+
ctx := context.Background()
25+
sqliteConn, err := dbx.NewSqlite()
26+
require.NoError(t, err)
27+
defer sqliteConn.Close()
28+
sqliteTestDb := testDb{
29+
conn: sqliteConn,
30+
name: "sqlite",
31+
}
32+
defer func() {
33+
_, err := sqliteConn.Exec(sqlx.DropSeenPages)
34+
require.NoError(t, err)
35+
_, err = sqliteConn.Exec(sqlx.DropQueue)
36+
require.NoError(t, err)
37+
}()
38+
39+
mariaConn, testContainer, err := test_containers.NewMarina(ctx)
40+
require.NoError(t, err)
41+
defer testContainer.Terminate(ctx)
42+
mariaTestDb := testDb{
43+
conn: mariaConn,
44+
name: "maria",
45+
}
46+
47+
for _, connType := range []testDb{sqliteTestDb, mariaTestDb} {
48+
conn := connType.conn
49+
t.Run(fmt.Sprintf("conn name:%s basic testing Seen pages", connType.name), func(t *testing.T) {
50+
sitePage := site.Page{
51+
Url: faker.URL(),
52+
Title: faker.Sentence(),
53+
Body: faker.Paragraph(),
54+
}
55+
_, err = conn.Exec(sqlx.CreateSeenTable)
56+
require.NoError(t, err)
57+
58+
_, err = conn.Exec(sqlx.CreateSeenTableIndex)
59+
require.NoError(t, err)
60+
61+
require.NoError(t, err)
62+
if conn == nil {
63+
t.Errorf("Expected connection to be created")
64+
}
65+
db := page.Db{Sql: conn}
66+
err = db.SavePage(ctx, sitePage)
67+
require.NoError(t, err)
68+
69+
page, err := db.GetPage(ctx, sitePage.Url)
70+
require.NoError(t, err)
71+
require.Equal(t, &sitePage, page)
72+
73+
//Page which does not exist
74+
page, err = db.GetPage(ctx, faker.URL())
75+
require.NoError(t, err)
76+
require.Nil(t, page)
77+
78+
// Update page
79+
sitePage.Title = faker.Sentence()
80+
sitePage.Body = faker.Paragraph()
81+
err = db.UpdatePage(ctx, sitePage)
82+
require.NoError(t, err)
83+
page, err = db.GetPage(ctx, sitePage.Url)
84+
require.NoError(t, err)
85+
require.Equal(t, &sitePage, page)
86+
87+
// Remove a page
88+
err = db.DeletePage(ctx, sitePage.Url)
89+
require.NoError(t, err)
90+
// Check if the page is removed
91+
page, err = db.GetPage(ctx, sitePage.Url)
92+
require.NoError(t, err)
93+
})
94+
t.Run("queue testing", func(t *testing.T) {
95+
96+
_, err = conn.Exec(sqlx.CreateQueueTable)
97+
require.NoError(t, err)
98+
99+
db := queue.Db{Sql: conn}
100+
101+
err = db.AddLink(ctx, "https://example.com")
102+
require.NoError(t, err)
103+
err = db.AddLink(ctx, "https://example.com/2")
104+
require.NoError(t, err)
105+
106+
links, err := db.GetExplore(ctx)
107+
require.NoError(t, err)
108+
require.Len(t, links, 2)
109+
require.Equal(t, "https://example.com", links[0])
110+
require.Equal(t, "https://example.com/2", links[1])
111+
err = db.RemoveLink(ctx, "https://example.com")
112+
require.NoError(t, err)
113+
links, err = db.GetExplore(ctx)
114+
require.NoError(t, err)
115+
require.Len(t, links, 1)
116+
require.Equal(t, "https://example.com/2", links[0])
117+
118+
err = db.RemoveLink(ctx, "https://example.com/2")
119+
require.NoError(t, err)
120+
links, err = db.GetExplore(ctx)
121+
require.NoError(t, err)
122+
require.Len(t, links, 0)
123+
124+
links = []string{"https://example.com", "https://example.com/2", "https://example.com/3"}
125+
err = db.AddLinks(ctx, links)
126+
require.NoError(t, err)
127+
links, err = db.GetExplore(ctx)
128+
})
129+
}
130+
}

0 commit comments

Comments
 (0)