|
| 1 | +package cmd |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "fmt" |
| 6 | + "net/http" |
| 7 | + "os" |
| 8 | + "regexp" |
| 9 | + "time" |
| 10 | + |
| 11 | + "github.com/schollz/progressbar/v3" |
| 12 | + "github.com/spf13/cobra" |
| 13 | + "github.com/treeverse/lakefs/pkg/api" |
| 14 | +) |
| 15 | + |
| 16 | +const importSummaryTemplate = `Import of {{ .Objects | yellow }} object(s) into "{{.Branch}}" completed. |
| 17 | +MetaRange ID: {{.MetaRangeID|yellow}} |
| 18 | +Commit ID: {{.Commit.Id|yellow}} |
| 19 | +Message: {{.Commit.Message}} |
| 20 | +Timestamp: {{.Commit.CreationDate|date}} |
| 21 | +Parents: {{.Commit.Parents|join ", "}} |
| 22 | +` |
| 23 | + |
| 24 | +var importCmd = &cobra.Command{ |
| 25 | + Use: "import --from <object store URI> --to <lakeFS path URI> [--merge]", |
| 26 | + Short: "Import data from external source to an imported branch (with optional merge)", |
| 27 | + Run: func(cmd *cobra.Command, args []string) { |
| 28 | + flags := cmd.Flags() |
| 29 | + merge := MustBool(flags.GetBool("merge")) |
| 30 | + noProgress := MustBool(flags.GetBool("no-progress")) |
| 31 | + from := MustString(flags.GetString("from")) |
| 32 | + to := MustString(flags.GetString("to")) |
| 33 | + toURI := MustParsePathURI("to", to) |
| 34 | + message := MustString(flags.GetString("message")) |
| 35 | + metadata, err := getKV(cmd, "meta") |
| 36 | + if err != nil { |
| 37 | + DieErr(err) |
| 38 | + } |
| 39 | + |
| 40 | + ctx := cmd.Context() |
| 41 | + client := getClient() |
| 42 | + verifySourceMatchConfiguredStorage(ctx, client, from) |
| 43 | + |
| 44 | + // verify target branch exists before we try to create and import into the associated imported branch |
| 45 | + if err, ok := branchExists(ctx, client, toURI.Repository, toURI.Ref); err != nil { |
| 46 | + DieErr(err) |
| 47 | + } else if !ok { |
| 48 | + DieFmt("Target branch '%s', does not exists!", toURI.Ref) |
| 49 | + } |
| 50 | + |
| 51 | + // setup progress bar - based on `progressbar.Default` defaults + control visibility |
| 52 | + bar := newImportProgressBar(!noProgress) |
| 53 | + var ( |
| 54 | + sum int |
| 55 | + continuationToken *string |
| 56 | + after string |
| 57 | + ranges = make([]api.RangeMetadata, 0) |
| 58 | + ) |
| 59 | + for { |
| 60 | + rangeResp, err := client.IngestRangeWithResponse(ctx, toURI.Repository, api.IngestRangeJSONRequestBody{ |
| 61 | + After: after, |
| 62 | + ContinuationToken: continuationToken, |
| 63 | + FromSourceURI: from, |
| 64 | + Prepend: api.StringValue(toURI.Path), |
| 65 | + }) |
| 66 | + DieOnErrorOrUnexpectedStatusCode(rangeResp, err, http.StatusCreated) |
| 67 | + if rangeResp.JSON201 == nil { |
| 68 | + Die("Bad response from server", 1) |
| 69 | + } |
| 70 | + if rangeResp.JSON201.Range != nil { |
| 71 | + rangeInfo := *rangeResp.JSON201.Range |
| 72 | + ranges = append(ranges, rangeInfo) |
| 73 | + sum += rangeInfo.Count |
| 74 | + _ = bar.Add(rangeInfo.Count) |
| 75 | + } |
| 76 | + |
| 77 | + continuationToken = rangeResp.JSON201.Pagination.ContinuationToken |
| 78 | + after = rangeResp.JSON201.Pagination.LastKey |
| 79 | + if !rangeResp.JSON201.Pagination.HasMore { |
| 80 | + break |
| 81 | + } |
| 82 | + } |
| 83 | + _ = bar.Clear() |
| 84 | + |
| 85 | + // create metarange with all the ranges we created |
| 86 | + metaRangeResp, err := client.CreateMetaRangeWithResponse(ctx, toURI.Repository, api.CreateMetaRangeJSONRequestBody{ |
| 87 | + Ranges: ranges, |
| 88 | + }) |
| 89 | + DieOnErrorOrUnexpectedStatusCode(metaRangeResp, err, http.StatusCreated) |
| 90 | + if metaRangeResp.JSON201 == nil { |
| 91 | + Die("Bad response from server", 1) |
| 92 | + } |
| 93 | + |
| 94 | + importedBranchID := formatImportedBranchID(toURI.Ref) |
| 95 | + ensureBranchExists(ctx, client, toURI.Repository, importedBranchID, toURI.Ref) |
| 96 | + |
| 97 | + // commit metarange to the imported branch |
| 98 | + commitResp, err := client.CommitWithResponse(ctx, toURI.Repository, importedBranchID, &api.CommitParams{ |
| 99 | + SourceMetarange: metaRangeResp.JSON201.Id, |
| 100 | + }, api.CommitJSONRequestBody{ |
| 101 | + Message: message, |
| 102 | + Metadata: &api.CommitCreation_Metadata{ |
| 103 | + AdditionalProperties: metadata, |
| 104 | + }, |
| 105 | + }) |
| 106 | + DieOnErrorOrUnexpectedStatusCode(commitResp, err, http.StatusCreated) |
| 107 | + if commitResp.JSON201 == nil { |
| 108 | + Die("Bad response from server", 1) |
| 109 | + } |
| 110 | + Write(importSummaryTemplate, struct { |
| 111 | + Objects int |
| 112 | + MetaRangeID string |
| 113 | + Branch string |
| 114 | + Commit *api.Commit |
| 115 | + }{ |
| 116 | + Objects: sum, |
| 117 | + MetaRangeID: api.StringValue(metaRangeResp.JSON201.Id), |
| 118 | + Branch: importedBranchID, |
| 119 | + Commit: commitResp.JSON201, |
| 120 | + }) |
| 121 | + |
| 122 | + // merge to target branch if needed |
| 123 | + if merge { |
| 124 | + mergeImportedBranch(ctx, client, toURI.Repository, importedBranchID, toURI.Ref) |
| 125 | + } |
| 126 | + }, |
| 127 | +} |
| 128 | + |
| 129 | +func newImportProgressBar(visible bool) *progressbar.ProgressBar { |
| 130 | + const ( |
| 131 | + barSpinnerType = 14 |
| 132 | + barWidth = 10 |
| 133 | + barThrottle = 65 * time.Millisecond |
| 134 | + ) |
| 135 | + bar := progressbar.NewOptions64( |
| 136 | + -1, |
| 137 | + progressbar.OptionSetDescription("Importing"), |
| 138 | + progressbar.OptionSetWriter(os.Stderr), |
| 139 | + progressbar.OptionSetWidth(barWidth), |
| 140 | + progressbar.OptionThrottle(barThrottle), |
| 141 | + progressbar.OptionShowCount(), |
| 142 | + progressbar.OptionShowIts(), |
| 143 | + progressbar.OptionSetItsString("object"), |
| 144 | + progressbar.OptionOnCompletion(func() { |
| 145 | + _, _ = fmt.Fprint(os.Stderr, "\n") |
| 146 | + }), |
| 147 | + progressbar.OptionSpinnerType(barSpinnerType), |
| 148 | + progressbar.OptionFullWidth(), |
| 149 | + progressbar.OptionSetVisibility(visible), |
| 150 | + ) |
| 151 | + _ = bar.RenderBlank() |
| 152 | + return bar |
| 153 | +} |
| 154 | + |
| 155 | +func verifySourceMatchConfiguredStorage(ctx context.Context, client *api.ClientWithResponses, source string) { |
| 156 | + storageConfResp, err := client.GetStorageConfigWithResponse(ctx) |
| 157 | + DieOnErrorOrUnexpectedStatusCode(storageConfResp, err, http.StatusOK) |
| 158 | + storageConfig := storageConfResp.JSON200 |
| 159 | + if storageConfig == nil { |
| 160 | + Die("Bad response from server", 1) |
| 161 | + } |
| 162 | + if storageConfig.BlockstoreNamespaceValidityRegex == "" { |
| 163 | + return |
| 164 | + } |
| 165 | + matched, err := regexp.MatchString(storageConfig.BlockstoreNamespaceValidityRegex, source) |
| 166 | + if err != nil { |
| 167 | + DieErr(err) |
| 168 | + } |
| 169 | + if !matched { |
| 170 | + DieFmt("import source '%s' doesn't match current configured storage '%s'", source, storageConfig.BlockstoreType) |
| 171 | + } |
| 172 | +} |
| 173 | + |
| 174 | +func mergeImportedBranch(ctx context.Context, client *api.ClientWithResponses, repository, fromBranch, toBranch string) { |
| 175 | + mergeResp, err := client.MergeIntoBranchWithResponse(ctx, repository, fromBranch, toBranch, api.MergeIntoBranchJSONRequestBody{}) |
| 176 | + DieOnErrorOrUnexpectedStatusCode(mergeResp, err, http.StatusOK) |
| 177 | + if mergeResp.JSON200 == nil { |
| 178 | + Die("Bad response from server", 1) |
| 179 | + } |
| 180 | + Write(mergeCreateTemplate, struct { |
| 181 | + Merge FromTo |
| 182 | + Result *api.MergeResult |
| 183 | + }{ |
| 184 | + Merge: FromTo{ |
| 185 | + FromRef: fromBranch, |
| 186 | + ToRef: toBranch, |
| 187 | + }, |
| 188 | + Result: mergeResp.JSON200, |
| 189 | + }) |
| 190 | +} |
| 191 | + |
| 192 | +func branchExists(ctx context.Context, client *api.ClientWithResponses, repository string, branch string) (error, bool) { |
| 193 | + resp, err := client.GetBranchWithResponse(ctx, repository, branch) |
| 194 | + if err != nil { |
| 195 | + return err, false |
| 196 | + } |
| 197 | + if resp.JSON200 != nil { |
| 198 | + return nil, true |
| 199 | + } |
| 200 | + if resp.JSON404 != nil { |
| 201 | + return nil, false |
| 202 | + } |
| 203 | + return RetrieveError(resp, err), false |
| 204 | +} |
| 205 | + |
| 206 | +func ensureBranchExists(ctx context.Context, client *api.ClientWithResponses, repository, branch, sourceBranch string) { |
| 207 | + if err, ok := branchExists(ctx, client, repository, branch); err != nil { |
| 208 | + DieErr(err) |
| 209 | + } else if ok { |
| 210 | + return |
| 211 | + } |
| 212 | + createBranchResp, err := client.CreateBranchWithResponse(ctx, repository, api.CreateBranchJSONRequestBody{ |
| 213 | + Name: branch, |
| 214 | + Source: sourceBranch, |
| 215 | + }) |
| 216 | + DieOnErrorOrUnexpectedStatusCode(createBranchResp, err, http.StatusCreated) |
| 217 | +} |
| 218 | + |
| 219 | +func formatImportedBranchID(branch string) string { |
| 220 | + return "_" + branch + "_imported" |
| 221 | +} |
| 222 | + |
| 223 | +//nolint:gochecknoinits,gomnd |
| 224 | +func init() { |
| 225 | + importCmd.Flags().String("from", "", "prefix to read from (e.g. \"s3://bucket/sub/path/\"). must not be in a storage namespace") |
| 226 | + _ = importCmd.MarkFlagRequired("from") |
| 227 | + importCmd.Flags().String("to", "", "lakeFS path to load objects into (e.g. \"lakefs://repo/branch/sub/path/\")") |
| 228 | + _ = importCmd.MarkFlagRequired("to") |
| 229 | + importCmd.Flags().Bool("merge", false, "merge imported branch into target branch") |
| 230 | + importCmd.Flags().Bool("no-progress", false, "switch off the progress output") |
| 231 | + importCmd.Flags().StringP("message", "m", "Import objects", "commit message") |
| 232 | + importCmd.Flags().StringSlice("meta", []string{}, "key value pair in the form of key=value") |
| 233 | + rootCmd.AddCommand(importCmd) |
| 234 | +} |
0 commit comments