|
| 1 | +(* |
| 2 | + * Copyright (c) 2006-2009 Citrix Systems Inc. |
| 3 | + * Copyright (c) 2010 Thomas Gazagnaire <[email protected]> |
| 4 | + * Copyright (c) 2014-2016 Anil Madhavapeddy <[email protected]> |
| 5 | + * Copyright (c) 2016 David Kaloper Meršinjak |
| 6 | + * Copyright (c) 2018 Romain Calascibetta <[email protected]> |
| 7 | + * Copyright (c) 2021 pukkamustard <[email protected]> |
| 8 | + * |
| 9 | + * Permission to use, copy, modify, and distribute this software for any |
| 10 | + * purpose with or without fee is hereby granted, provided that the above |
| 11 | + * copyright notice and this permission notice appear in all copies. |
| 12 | + * |
| 13 | + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| 14 | + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 15 | + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
| 16 | + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 17 | + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| 18 | + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
| 19 | + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 20 | + * |
| 21 | + *) |
| 22 | + |
| 23 | +type alphabet = { emap : int array; dmap : int array } |
| 24 | +type sub = string * int * int |
| 25 | + |
| 26 | +let ( // ) x y = |
| 27 | + if y < 1 then raise Division_by_zero; |
| 28 | + if x > 0 then 1 + ((x - 1) / y) else 0 |
| 29 | +[@@inline] |
| 30 | + |
| 31 | +let unsafe_get_uint8 input off = String.unsafe_get input off |> Char.code |
| 32 | +let unsafe_set_uint8 input off v = v |> Char.chr |> Bytes.unsafe_set input off |
| 33 | +let none = -1 |
| 34 | + |
| 35 | +(* We mostly want to have an optional array for [dmap] (e.g. [int option |
| 36 | + array]). So we consider the [none] value as [-1]. *) |
| 37 | + |
| 38 | +let make_alphabet alphabet = |
| 39 | + if String.length alphabet <> 32 then |
| 40 | + invalid_arg "Length of alphabet must be 32"; |
| 41 | + if String.contains alphabet '=' then |
| 42 | + invalid_arg "Alphabet can not contain padding character"; |
| 43 | + let emap = |
| 44 | + Array.init (String.length alphabet) (fun i -> Char.code alphabet.[i]) |
| 45 | + in |
| 46 | + let dmap = Array.make 256 none in |
| 47 | + String.iteri (fun idx chr -> dmap.(Char.code chr) <- idx) alphabet; |
| 48 | + { emap; dmap } |
| 49 | + |
| 50 | +let length_alphabet { emap; _ } = Array.length emap |
| 51 | +let alphabet { emap; _ } = emap |
| 52 | +let default_alphabet = make_alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567" |
| 53 | +let padding = int_of_char '=' |
| 54 | +let error_msgf fmt = Format.ksprintf (fun err -> Error (`Msg err)) fmt |
| 55 | + |
| 56 | +let encode_sub pad { emap; _ } ?(off = 0) ?len input = |
| 57 | + let len = |
| 58 | + match len with Some len -> len | None -> String.length input - off |
| 59 | + in |
| 60 | + |
| 61 | + if len < 0 || off < 0 || off > String.length input - len then |
| 62 | + error_msgf "Invalid bounds" |
| 63 | + else |
| 64 | + let n = len in |
| 65 | + let n' = n // 5 * 8 in |
| 66 | + let res = Bytes.make n' (Char.chr 0) in |
| 67 | + |
| 68 | + let emap i = Array.unsafe_get emap i in |
| 69 | + |
| 70 | + (* the bit magic - takes 5 bytes and reads 5-bits at a time *) |
| 71 | + let emit b1 b2 b3 b4 b5 i = |
| 72 | + unsafe_set_uint8 res i (emap ((0b11111000 land b1) lsr 3)); |
| 73 | + unsafe_set_uint8 res (i + 1) |
| 74 | + (emap (((0b00000111 land b1) lsl 2) lor ((0b11000000 land b2) lsr 6))); |
| 75 | + unsafe_set_uint8 res (i + 2) (emap ((0b00111110 land b2) lsr 1)); |
| 76 | + unsafe_set_uint8 res (i + 3) |
| 77 | + (emap (((0b00000001 land b2) lsl 4) lor ((0b11110000 land b3) lsr 4))); |
| 78 | + unsafe_set_uint8 res (i + 4) |
| 79 | + (emap (((0b00001111 land b3) lsl 1) lor ((0b10000000 land b4) lsr 7))); |
| 80 | + unsafe_set_uint8 res (i + 5) (emap ((0b01111100 land b4) lsr 2)); |
| 81 | + unsafe_set_uint8 res (i + 6) |
| 82 | + (emap (((0b00000011 land b4) lsl 3) lor ((0b11100000 land b5) lsr 5))); |
| 83 | + unsafe_set_uint8 res (i + 7) (emap (0b00011111 land b5)) |
| 84 | + in |
| 85 | + |
| 86 | + let rec enc j i = |
| 87 | + if i = len then () |
| 88 | + else if i = n - 1 then emit (unsafe_get_uint8 input (off + i)) 0 0 0 0 j |
| 89 | + else if i = n - 2 then |
| 90 | + emit |
| 91 | + (unsafe_get_uint8 input (off + i)) |
| 92 | + (unsafe_get_uint8 input (off + i + 1)) |
| 93 | + 0 0 0 j |
| 94 | + else if i = n - 3 then |
| 95 | + emit |
| 96 | + (unsafe_get_uint8 input (off + i)) |
| 97 | + (unsafe_get_uint8 input (off + i + 1)) |
| 98 | + (unsafe_get_uint8 input (off + i + 2)) |
| 99 | + 0 0 j |
| 100 | + else if i = n - 4 then |
| 101 | + emit |
| 102 | + (unsafe_get_uint8 input (off + i)) |
| 103 | + (unsafe_get_uint8 input (off + i + 1)) |
| 104 | + (unsafe_get_uint8 input (off + i + 2)) |
| 105 | + (unsafe_get_uint8 input (off + i + 3)) |
| 106 | + 0 j |
| 107 | + else ( |
| 108 | + emit |
| 109 | + (unsafe_get_uint8 input (off + i)) |
| 110 | + (unsafe_get_uint8 input (off + i + 1)) |
| 111 | + (unsafe_get_uint8 input (off + i + 2)) |
| 112 | + (unsafe_get_uint8 input (off + i + 3)) |
| 113 | + (unsafe_get_uint8 input (off + i + 4)) |
| 114 | + j; |
| 115 | + enc (j + 8) (i + 5)) |
| 116 | + in |
| 117 | + |
| 118 | + let rec unsafe_fix = function |
| 119 | + | 0 -> () |
| 120 | + | i -> |
| 121 | + unsafe_set_uint8 res (n' - i) padding; |
| 122 | + unsafe_fix (i - 1) |
| 123 | + in |
| 124 | + |
| 125 | + enc 0 0; |
| 126 | + |
| 127 | + (* amount of padding required *) |
| 128 | + let pad_to_write = |
| 129 | + match n mod 5 with 0 -> 0 | 1 -> 6 | 2 -> 4 | 3 -> 3 | 4 -> 1 | _ -> 0 |
| 130 | + in |
| 131 | + |
| 132 | + if pad then ( |
| 133 | + unsafe_fix pad_to_write; |
| 134 | + Ok (Bytes.unsafe_to_string res, 0, n')) |
| 135 | + else Ok (Bytes.unsafe_to_string res, 0, n' - pad_to_write) |
| 136 | + |
| 137 | +let encode ?(pad = true) ?(alphabet = default_alphabet) ?off ?len input = |
| 138 | + match encode_sub pad alphabet ?off ?len input with |
| 139 | + | Ok (res, off, len) -> Ok (String.sub res off len) |
| 140 | + | Error _ as err -> err |
| 141 | + |
| 142 | +let encode_string ?pad ?alphabet input = |
| 143 | + match encode ?pad ?alphabet input with |
| 144 | + | Ok res -> res |
| 145 | + | Error _ -> assert false |
| 146 | + |
| 147 | +let encode_sub ?(pad = true) ?(alphabet = default_alphabet) ?off ?len input = |
| 148 | + encode_sub pad alphabet ?off ?len input |
| 149 | + |
| 150 | +let encode_exn ?pad ?alphabet ?off ?len input = |
| 151 | + match encode ?pad ?alphabet ?off ?len input with |
| 152 | + | Ok v -> v |
| 153 | + | Error (`Msg err) -> invalid_arg err |
| 154 | + |
| 155 | +let decode_sub { dmap; _ } ?(off = 0) ?len input = |
| 156 | + let len = |
| 157 | + match len with Some len -> len | None -> String.length input - off |
| 158 | + in |
| 159 | + |
| 160 | + if len < 0 || off < 0 || off > String.length input - len then |
| 161 | + error_msgf "Invalid bounds" |
| 162 | + else |
| 163 | + let n = len // 8 * 8 in |
| 164 | + let n' = n // 8 * 5 in |
| 165 | + let res = Bytes.create n' in |
| 166 | + |
| 167 | + let get_uint8 t i = |
| 168 | + if i < len then Char.code (String.unsafe_get t (off + i)) else padding |
| 169 | + in |
| 170 | + |
| 171 | + let set_uint8 t off v = |
| 172 | + (* Format.printf "set_uint8 %d\n" (v land 0xff); *) |
| 173 | + if off < 0 || off >= Bytes.length t then () |
| 174 | + else unsafe_set_uint8 t off (v land 0xff) |
| 175 | + in |
| 176 | + |
| 177 | + let emit b0 b1 b2 b3 b4 b5 b6 b7 j = |
| 178 | + set_uint8 res j ((b0 lsl 3) lor (b1 lsr 2)); |
| 179 | + set_uint8 res (j + 1) ((b1 lsl 6) lor (b2 lsl 1) lor (b3 lsr 4)); |
| 180 | + set_uint8 res (j + 2) ((b3 lsl 4) lor (b4 lsr 1)); |
| 181 | + set_uint8 res (j + 3) ((b4 lsl 7) lor (b5 lsl 2) lor (b6 lsr 3)); |
| 182 | + set_uint8 res (j + 4) ((b6 lsl 5) lor b7) |
| 183 | + in |
| 184 | + |
| 185 | + let dmap i = Array.unsafe_get dmap i in |
| 186 | + |
| 187 | + let get_uint8_with_padding t i padding = |
| 188 | + let x = get_uint8 t i in |
| 189 | + if x = 61 then (0, padding) |
| 190 | + else |
| 191 | + let v = dmap x in |
| 192 | + if v >= 0 then (v, 0) else raise Not_found |
| 193 | + in |
| 194 | + |
| 195 | + let rec dec j i = |
| 196 | + if i = n then 0 |
| 197 | + else |
| 198 | + let b0, pad0 = get_uint8_with_padding input i 5 in |
| 199 | + let b1, pad1 = get_uint8_with_padding input (i + 1) 5 in |
| 200 | + let b2, pad2 = get_uint8_with_padding input (i + 2) 4 in |
| 201 | + let b3, pad3 = get_uint8_with_padding input (i + 3) 4 in |
| 202 | + let b4, pad4 = get_uint8_with_padding input (i + 4) 3 in |
| 203 | + let b5, pad5 = get_uint8_with_padding input (i + 5) 2 in |
| 204 | + let b6, pad6 = get_uint8_with_padding input (i + 6) 2 in |
| 205 | + let b7, pad7 = get_uint8_with_padding input (i + 7) 1 in |
| 206 | + let pad = |
| 207 | + List.fold_left max 0 |
| 208 | + [ pad0; pad1; pad2; pad3; pad4; pad5; pad6; pad7 ] |
| 209 | + in |
| 210 | + |
| 211 | + (* Format.printf "emit %d %d %d %d %d %d %d %d\n" b0 b1 b2 b3 b4 b5 b6 b7; *) |
| 212 | + emit b0 b1 b2 b3 b4 b5 b6 b7 j; |
| 213 | + if pad == 0 then dec (j + 5) (i + 8) else pad |
| 214 | + in |
| 215 | + |
| 216 | + match dec 0 0 with |
| 217 | + | pad -> Ok (Bytes.unsafe_to_string res, 0, n' - pad) |
| 218 | + | exception Not_found -> error_msgf "Malformed input" |
| 219 | + |
| 220 | +let decode ?(alphabet = default_alphabet) ?off ?len input = |
| 221 | + match decode_sub alphabet ?off ?len input with |
| 222 | + | Ok (res, off, len) -> Ok (String.sub res off len) |
| 223 | + | Error _ as err -> err |
| 224 | + |
| 225 | +let decode_sub ?(alphabet = default_alphabet) ?off ?len input = |
| 226 | + decode_sub alphabet ?off ?len input |
| 227 | + |
| 228 | +let decode_exn ?alphabet ?off ?len input = |
| 229 | + match decode ?alphabet ?off ?len input with |
| 230 | + | Ok res -> res |
| 231 | + | Error (`Msg err) -> invalid_arg err |
0 commit comments