Skip to content

Commit 55c9058

Browse files
authored
Replace the etcdctl proc call with etcd client. (#1970)
Fixes #1945 Signed-off-by: Ye Cao <[email protected]>
1 parent e1e1b26 commit 55c9058

18 files changed

+320
-410
lines changed

docker/Dockerfile.vineyardd

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ RUN export arch="$PLATFORM" && \
2929
curl -LO https://github.com/etcd-io/etcd/releases/download/v3.5.9/etcd-v3.5.9-linux-$arch.tar.gz && \
3030
tar zxf etcd-v3.5.9-linux-$arch.tar.gz && \
3131
mv /tmp/etcd-v3.5.9-linux-$arch/etcd /usr/bin/etcd && \
32-
mv /tmp/etcd-v3.5.9-linux-$arch/etcdctl /usr/bin/etcdctl && \
3332
curl -LO https://dl.k8s.io/release/v1.24.0/bin/linux/$arch/kubectl && \
3433
chmod +x kubectl && \
3534
mv /tmp/kubectl /usr/bin/kubectl
@@ -86,7 +85,6 @@ SHELL ["/bin/bash", "-c"]
8685
COPY --from=builder /usr/bin/bash-linux /bin/bash
8786
COPY --from=builder /usr/bin/dumb-init /usr/bin/dumb-init
8887
COPY --from=builder /usr/bin/etcd /usr/bin/etcd
89-
COPY --from=builder /usr/bin/etcdctl /usr/bin/etcdctl
9088
COPY --from=builder /usr/bin/kubectl /usr/bin/kubectl
9189
COPY --from=builder /work/v6d/build/bin/vineyardd /usr/local/bin/vineyardd
9290
RUN ln -s /busybox/env /usr/bin/env

k8s/test/e2e/Makefile

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ load-vineyardd-image:
2929
@docker push localhost:5001/vineyardd:latest
3030
.PHONY: load-vineyardd-image
3131

32+
load-vineyard-python-dev-image:
33+
@docker tag ghcr.io/v6d-io/v6d/vineyard-python-dev:latest localhost:5001/vineyard-python-dev:latest
34+
@docker push localhost:5001/vineyard-python-dev:latest
35+
3236
load-vineyard-operator-image:
3337
@docker tag vineyardcloudnative/vineyard-operator:latest localhost:5001/vineyard-operator:latest
3438
@docker push localhost:5001/vineyard-operator:latest
@@ -248,13 +252,13 @@ e2e-tests-failover: prepare-e2e-test install-vineyard-cluster
248252

249253
############# etcd failover testing #############################################
250254

251-
e2e-tests-three-etcd-nodes-failover: prepare-e2e-test build-local-cluster load-vineyardd-image
255+
e2e-tests-three-etcd-nodes-failover: prepare-e2e-test build-local-cluster load-vineyardd-image load-vineyard-python-dev-image
252256
@echo "Running three etcd nodes failover e2e test..."
253257
@cd ${ROOT_DIR} && ${GOBIN}/e2e run --config=${E2E_DIR}/etcd-failover/three-etcd-nodes-failover-e2e.yaml
254258
@echo "three etcd nodes failover e2e test passed."
255259
@make delete-local-cluster
256260

257-
e2e-tests-five-etcd-nodes-failover: prepare-e2e-test build-local-cluster load-vineyardd-image
261+
e2e-tests-five-etcd-nodes-failover: prepare-e2e-test build-local-cluster load-vineyardd-image load-vineyard-python-dev-image
258262
@echo "Running five etcd nodes failover e2e test..."
259263
@cd ${ROOT_DIR} && ${GOBIN}/e2e run --config=${E2E_DIR}/etcd-failover/five-etcd-nodes-failover-e2e.yaml
260264
@echo "five etcd nodes failover e2e test passed."

k8s/test/e2e/etcd-failover/consumer.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@ spec:
1212
restartPolicy: Never
1313
containers:
1414
- name: consumer
15-
image: python:3.10
15+
image: localhost:5001/vineyard-python-dev:latest
16+
imagePullPolicy: IfNotPresent
1617
command:
1718
- bash
1819
- -c
1920
- |
20-
pip install vineyard numpy pandas --index-url https://pypi.tuna.tsinghua.edu.cn/simple;
2121
cat << EOF >> consumer.py
2222
import vineyard
2323
client = vineyard.connect(host="vineyardd-svc.default.svc.cluster.local",port=9600)

k8s/test/e2e/etcd-failover/five-etcd-nodes-failover-e2e.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ setup:
3636
done
3737
kubectl delete pod "vineyardd-$num1" -n default --force
3838
kubectl delete pod "vineyardd-$num2" -n default --force
39+
kubectl rollout status statefulset/vineyardd
3940
# wait for the instance quit messages to be propagated
40-
sleep 240
41+
sleep 360
4142
kubectl rollout status statefulset/vineyardd
4243
done
4344
- name: install consumer

k8s/test/e2e/etcd-failover/producer.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ spec:
2727
restartPolicy: Never
2828
containers:
2929
- name: producer
30-
image: python:3.10
30+
image: localhost:5001/vineyard-python-dev:latest
31+
imagePullPolicy: IfNotPresent
3132
command:
3233
- bash
3334
- -c
3435
- |
35-
pip install vineyard numpy pandas --index-url https://pypi.tuna.tsinghua.edu.cn/simple;
3636
cat << EOF >> producer.py
3737
import vineyard
3838
import numpy as np
@@ -42,4 +42,4 @@ spec:
4242
client.put(data, persist=True, name="test_data");
4343
client.close()
4444
EOF
45-
python producer.py;
45+
python producer.py;

k8s/test/e2e/etcd-failover/three-etcd-nodes-failover-e2e.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ setup:
3232
kubectl delete pod vineyardd-$(shuf -i 0-2 -n 1) -n default --force
3333
kubectl rollout status statefulset/vineyardd
3434
# wait for the instance quit messages to be propagated
35-
sleep 60
35+
sleep 120
3636
kubectl rollout status statefulset/vineyardd
3737
done
3838
- name: install consumer

src/server/services/etcd_meta_service.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -435,8 +435,8 @@ Status EtcdMetaService::preStart(const bool create_new_instance) {
435435
return etcd_launcher_->LaunchEtcdServer(etcd_, meta_sync_lock_);
436436
}
437437

438-
Status EtcdMetaService::RemoveMember(const std::string member_id) {
439-
auto status = etcd_launcher_->RemoveMember(member_id);
438+
Status EtcdMetaService::RemoveMember(const uint64_t& member_id) {
439+
auto status = etcd_launcher_->RemoveMember(etcd_, member_id);
440440
if (!status.ok()) {
441441
LOG(ERROR) << "Failed to remove member " << member_id
442442
<< " from etcd: " << status.ToString();
@@ -449,7 +449,7 @@ Status EtcdMetaService::UpdateEndpoint() {
449449
if (etcd_launcher_ == nullptr) {
450450
return Status::Invalid("etcd launcher is not initialized");
451451
}
452-
return etcd_launcher_->UpdateEndpoint();
452+
return etcd_launcher_->UpdateEndpoint(etcd_);
453453
}
454454

455455
} // namespace vineyard

src/server/services/etcd_meta_service.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,9 @@ class EtcdMetaService : public IMetaService {
131131

132132
void TryReleaseLock(std::string key, callback_t<bool>) override;
133133

134-
Status RemoveMember(std::string member_id);
134+
Status RemoveMember(const uint64_t& member_id);
135135

136-
std::string GetMemberID() { return etcd_launcher_->GetMemberID(); }
136+
const uint64_t GetMemberID() { return etcd_launcher_->GetMemberID(); }
137137

138138
Status UpdateEndpoint();
139139

src/server/services/meta_service.cc

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -555,11 +555,12 @@ void IMetaService::registerToEtcd() {
555555
self->meta_["my_nodename"] = nodename;
556556

557557
self->instances_list_.emplace(rank);
558-
auto etcd_member_id = self->GetEtcdMemberID();
558+
uint64_t etcd_member_id = self->GetEtcdMemberID();
559559
std::string key = "/instances/" + self->server_ptr_->instance_name();
560560
ops.emplace_back(op_t::Put(key + "/hostid", self_host_id));
561-
if (etcd_member_id != "") {
562-
ops.emplace_back(op_t::Put(key + "/member_id", etcd_member_id));
561+
if (etcd_member_id != 0) {
562+
ops.emplace_back(
563+
op_t::Put(key + "/member_id", std::to_string(etcd_member_id)));
563564
}
564565
ops.emplace_back(op_t::Put(key + "/hostname", hostname));
565566
ops.emplace_back(op_t::Put(key + "/nodename", nodename));
@@ -1218,7 +1219,8 @@ void IMetaService::instanceUpdate(const op_t& op, const bool from_remote) {
12181219
}
12191220
// reset the etcd client
12201221
VINEYARD_CHECK_OK(this->probe());
1221-
instance_to_member_id_[instance_id] = member_id;
1222+
uint64_t member_id_ = std::stoull(member_id);
1223+
instance_to_member_id_[instance_id] = member_id_;
12221224
} else if (op.op != op_t::op_type_t::kDel) {
12231225
if (from_remote) {
12241226
LOG(ERROR) << "Unknown op type: " << op.ToString();
@@ -1265,20 +1267,20 @@ Status IMetaService::daemonWatchHandler(
12651267
return callback_after_update(Status::OK(), rev);
12661268
}
12671269

1268-
Status IMetaService::RemoveEtcdMember(const std::string& member_id) {
1270+
Status IMetaService::RemoveEtcdMember(const uint64_t& member_id) {
12691271
return callIfEtcdMetaService(
12701272
[&member_id](std::shared_ptr<EtcdMetaService> etcd_meta_service) {
12711273
return etcd_meta_service->RemoveMember(member_id);
12721274
},
12731275
Status::OK());
12741276
}
12751277

1276-
std::string IMetaService::GetEtcdMemberID() {
1278+
const uint64_t IMetaService::GetEtcdMemberID() {
12771279
return callIfEtcdMetaService(
12781280
[](std::shared_ptr<EtcdMetaService> etcd_meta_service) {
12791281
return etcd_meta_service->GetMemberID();
12801282
},
1281-
std::string());
1283+
(uint64_t) 0);
12821284
}
12831285

12841286
Status IMetaService::UpdateEtcdEndpoint() {

src/server/services/meta_service.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,9 @@ class IMetaService : public std::enable_shared_from_this<IMetaService> {
149149

150150
virtual void TryReleaseLock(std::string key, callback_t<bool> callback) = 0;
151151

152-
Status RemoveEtcdMember(const std::string& member_id);
152+
Status RemoveEtcdMember(const uint64_t& member_id);
153153

154-
std::string GetEtcdMemberID();
154+
const uint64_t GetEtcdMemberID();
155155

156156
Status UpdateEtcdEndpoint();
157157

@@ -262,7 +262,7 @@ class IMetaService : public std::enable_shared_from_this<IMetaService> {
262262

263263
std::unique_ptr<asio::steady_timer> heartbeat_timer_;
264264
std::set<InstanceID> instances_list_;
265-
std::map<InstanceID, std::string> instance_to_member_id_;
265+
std::map<InstanceID, uint64_t> instance_to_member_id_;
266266
int64_t target_latest_time_ = 0;
267267
size_t timeout_count_ = 0;
268268

src/server/util/etcd_launcher.cc

Lines changed: 23 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -114,25 +114,6 @@ Status checkEtcdCmd(const std::string& etcd_cmd) {
114114
return Status::OK();
115115
}
116116

117-
Status checkEtcdctlCommand(const std::string& etcdctl_cmd) {
118-
if (etcdctl_cmd.empty()) {
119-
std::string error_message =
120-
"Failed to find etcdctl binary, please specify its path using the "
121-
"`--etcdctl_cmd` argument and try again.";
122-
LOG(WARNING) << error_message;
123-
return Status::EtcdError("Failed to find etcdctl binary");
124-
}
125-
if (!ghc::filesystem::exists(ghc::filesystem::path(etcdctl_cmd))) {
126-
std::string error_message =
127-
"The etcd binary '" + etcdctl_cmd +
128-
"' does not exist, please specify the correct path using "
129-
"the `--etcdctl_cmd` argument and try again.";
130-
LOG(WARNING) << error_message;
131-
return Status::EtcdError("The etcdctl binary does not exist");
132-
}
133-
return Status::OK();
134-
}
135-
136117
EtcdLauncher::EtcdLauncher(const json& etcd_spec,
137118
const uint32_t& rpc_socket_port,
138119
const bool create_new_instance)
@@ -166,15 +147,6 @@ Status EtcdLauncher::LaunchEtcdServer(
166147
return Status::OK();
167148
}
168149

169-
// resolve etcdctl binary
170-
std::string etcdctl_cmd = etcd_spec_.value("etcdctl_cmd", "");
171-
if (etcdctl_cmd.empty()) {
172-
etcdctl_cmd = lookupCommand(etcd_spec_, "etcdctl");
173-
}
174-
RETURN_ON_ERROR(checkEtcdctlCommand(etcdctl_cmd));
175-
etcdctl_ = std::make_shared<Etcdctl>(etcdctl_cmd);
176-
LOG(INFO) << "Found etcdctl at: " << etcdctl_cmd;
177-
178150
bool skip_launch_etcd = etcd_spec_.value("skip_launch_etcd", true);
179151
bool etcd_cluster_existing = false;
180152
// create_new_instance_ is a flag to indicate whether we should launch an etcd
@@ -267,25 +239,25 @@ Status EtcdLauncher::LaunchEtcdServer(
267239
if (etcd_cluster_existing) {
268240
std::string cluster_name;
269241

270-
std::vector<json> all_members = etcdctl_->listMembers(etcd_endpoint);
271-
std::vector<json> members = etcdctl_->listHealthyMembers(all_members);
242+
std::vector<json> all_members = listMembers(etcd_client);
243+
std::vector<json> members = listHealthyMembers(all_members);
272244
if (members.size() == 0) {
273245
return Status::EtcdError("No healthy members found via etcdctl");
274246
}
275247

276-
existing_members = etcdctl_->listMembersName(members);
248+
existing_members = listMembersName(members);
277249
new_member_name = generateMemberName(existing_members);
278-
peer_urls = etcdctl_->listPeerURLs(members);
250+
peer_urls = listPeerURLs(members);
279251
if (peer_urls.size() == 0) {
280252
return Status::EtcdError("No peer urls found via etcdctl");
281253
}
282-
std::vector<std::string> client_urls = etcdctl_->listClientURLs(members);
254+
std::vector<std::string> client_urls = listClientURLs(members);
283255
if (peer_urls.size() == 0) {
284256
return Status::EtcdError("No client urls found via etcdctl");
285257
}
286258

287259
endpoint = boost::algorithm::join(client_urls, ",");
288-
if (!etcdctl_->addMember(new_member_name, peer_endpoint, endpoint).ok()) {
260+
if (!addMember(etcd_client, peer_endpoint).ok()) {
289261
return Status::EtcdError("Failed to add new member to the etcd cluster");
290262
}
291263

@@ -378,8 +350,7 @@ Status EtcdLauncher::LaunchEtcdServer(
378350
retries < max_probe_retries) {
379351
etcd_client.reset(new etcd::Client(etcd_endpoints_));
380352
if (probeEtcdServer(etcd_client, sync_lock)) {
381-
etcd_member_id_ =
382-
etcdctl_->findMemberID(peer_endpoint, etcd_endpoints_);
353+
etcd_member_id_ = findMemberID(etcd_client, peer_endpoint);
383354
// reset the etcd watcher
384355
break;
385356
}
@@ -388,25 +359,28 @@ Status EtcdLauncher::LaunchEtcdServer(
388359
}
389360
if (!etcd_proc_) {
390361
return handleEtcdFailure(
391-
peer_endpoint,
362+
etcd_client, peer_endpoint,
392363
"Failed to wait until etcd ready: operation has been interrupted");
393364
} else if (err) {
394365
return handleEtcdFailure(
395-
peer_endpoint, "Failed to wait until etcd ready: " + err.message());
366+
etcd_client, peer_endpoint,
367+
"Failed to wait until etcd ready: " + err.message());
396368
} else if (retries >= max_probe_retries) {
397369
return handleEtcdFailure(
398-
peer_endpoint, "Etcd has been launched but failed to connect to it");
370+
etcd_client, peer_endpoint,
371+
"Etcd has been launched but failed to connect to it");
399372
} else {
400373
return Status::OK();
401374
}
402375
}
403376
}
404377

405-
Status EtcdLauncher::handleEtcdFailure(const std::string& peer_urls,
406-
const std::string& errMessage) {
407-
auto member_id = etcdctl_->findMemberID(peer_urls, etcd_endpoints_);
408-
RETURN_ON_ERROR(etcdctl_->removeMember(etcd_member_id_, etcd_endpoints_));
409-
etcd_member_id_.clear();
378+
Status EtcdLauncher::handleEtcdFailure(
379+
std::unique_ptr<etcd::Client>& etcd_client, const std::string& peer_urls,
380+
const std::string& errMessage) {
381+
auto member_id = findMemberID(etcd_client, peer_urls);
382+
RETURN_ON_ERROR(removeMember(etcd_client, member_id));
383+
etcd_member_id_ = 0;
410384
return Status::IOError(errMessage);
411385
}
412386

@@ -488,10 +462,11 @@ bool EtcdLauncher::probeEtcdServer(std::unique_ptr<etcd::Client>& etcd_client,
488462
return etcd_client && response.is_ok();
489463
}
490464

491-
Status EtcdLauncher::UpdateEndpoint() {
492-
auto all_members = etcdctl_->listMembers(etcd_endpoints_);
493-
auto members = etcdctl_->listHealthyMembers(all_members);
494-
auto client_urls = etcdctl_->listClientURLs(members);
465+
Status EtcdLauncher::UpdateEndpoint(
466+
std::unique_ptr<etcd::Client>& etcd_client) {
467+
auto all_members = listMembers(etcd_client);
468+
auto members = listHealthyMembers(all_members);
469+
auto client_urls = listClientURLs(members);
495470
etcd_endpoints_ = boost::algorithm::join(client_urls, ",");
496471
return Status::OK();
497472
}

src/server/util/etcd_launcher.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ limitations under the License.
2929
#include "etcd/Client.hpp"
3030

3131
#include "common/util/status.h"
32-
#include "server/util/etcdctl.h"
32+
#include "server/util/etcd_member.h"
3333

3434
namespace vineyard {
3535

@@ -48,21 +48,23 @@ class EtcdLauncher {
4848
std::string const& key);
4949

5050
private:
51-
Status handleEtcdFailure(const std::string& member_name,
51+
Status handleEtcdFailure(std::unique_ptr<etcd::Client>& etcd_client,
52+
const std::string& member_name,
5253
const std::string& errMessage);
5354

5455
Status parseEndpoint();
5556

5657
std::string generateMemberName(
5758
const std::vector<std::string>& existing_members_name);
5859

59-
std::string GetMemberID() { return etcd_member_id_; }
60+
const uint64_t GetMemberID() { return etcd_member_id_; }
6061

61-
Status RemoveMember(const std::string member_id) {
62-
return etcdctl_->removeMember(member_id, etcd_endpoints_);
62+
Status RemoveMember(std::unique_ptr<etcd::Client>& etcd_client,
63+
const uint64_t& member_id) {
64+
return removeMember(etcd_client, member_id);
6365
}
6466

65-
Status UpdateEndpoint();
67+
Status UpdateEndpoint(std::unique_ptr<etcd::Client>& etcd_client);
6668

6769
Status initHostInfo();
6870

@@ -75,11 +77,9 @@ class EtcdLauncher {
7577
std::set<std::string> local_hostnames_;
7678
std::set<std::string> local_ip_addresses_;
7779

78-
std::string etcd_member_id_;
80+
uint64_t etcd_member_id_;
7981
std::string etcd_endpoints_;
8082

81-
std::shared_ptr<Etcdctl> etcdctl_;
82-
8383
std::unique_ptr<boost::process::child> etcd_proc_;
8484

8585
friend class EtcdMetaService;

0 commit comments

Comments
 (0)