|
| 1 | +/** |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one |
| 3 | + * or more contributor license agreements. See the NOTICE file |
| 4 | + * distributed with this work for additional information |
| 5 | + * regarding copyright ownership. The ASF licenses this file |
| 6 | + * to you under the Apache License, Version 2.0 (the |
| 7 | + * "License"); you may not use this file except in compliance |
| 8 | + * with the License. You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, |
| 13 | + * software distributed under the License is distributed on an |
| 14 | + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | + * KIND, either express or implied. See the License for the |
| 16 | + * specific language governing permissions and limitations |
| 17 | + * under the License. |
| 18 | + */ |
| 19 | +package org.apache.heron.streamlet.impl; |
| 20 | + |
| 21 | +import java.util.LinkedList; |
| 22 | +import java.util.List; |
| 23 | +import java.util.Set; |
| 24 | +import java.util.logging.Logger; |
| 25 | + |
| 26 | +import org.apache.heron.api.topology.TopologyBuilder; |
| 27 | +import org.apache.heron.streamlet.StreamletBase; |
| 28 | + |
| 29 | +import static org.apache.heron.streamlet.impl.utils.StreamletUtils.checkNotBlank; |
| 30 | +import static org.apache.heron.streamlet.impl.utils.StreamletUtils.require; |
| 31 | + |
| 32 | +/** |
| 33 | + * A Streamlet is a (potentially unbounded) ordered collection of tuples. |
| 34 | + * Streamlets originate from pub/sub systems(such Pulsar/Kafka), or from |
| 35 | + * static data(such as csv files, HDFS files), or for that matter any other |
| 36 | + * source. They are also created by transforming existing Streamlets using |
| 37 | + * operations such as map/flatMap, etc. |
| 38 | + * Besides the tuples, a Streamlet has the following properties associated with it |
| 39 | + * a) name. User assigned or system generated name to refer the streamlet |
| 40 | + * b) nPartitions. Number of partitions that the streamlet is composed of. Thus the |
| 41 | + * ordering of the tuples in a Streamlet is wrt the tuples within a partition. |
| 42 | + * This allows the system to distribute each partition to different nodes across the cluster. |
| 43 | + * A bunch of transformations can be done on Streamlets(like map/flatMap, etc.). Each |
| 44 | + * of these transformations operate on every tuple of the Streamlet and produce a new |
| 45 | + * Streamlet. One can think of a transformation attaching itself to the stream and processing |
| 46 | + * each tuple as they go by. Thus the parallelism of any operator is implicitly determined |
| 47 | + * by the number of partitions of the stream that it is operating on. If a particular |
| 48 | + * transformation wants to operate at a different parallelism, one can repartition the |
| 49 | + * Streamlet before doing the transformation. |
| 50 | + */ |
| 51 | +public abstract class StreamletBaseImpl<R> implements StreamletBase<R> { |
| 52 | + private static final Logger LOG = Logger.getLogger(StreamletBaseImpl.class.getName()); |
| 53 | + protected String name; |
| 54 | + protected int nPartitions; |
| 55 | + private List<StreamletBaseImpl<?>> children; |
| 56 | + private boolean built; |
| 57 | + |
| 58 | + /** |
| 59 | + * Only used by the implementors |
| 60 | + */ |
| 61 | + protected StreamletBaseImpl() { |
| 62 | + this.name = null; |
| 63 | + this.nPartitions = -1; |
| 64 | + this.children = new LinkedList<>(); |
| 65 | + this.built = false; |
| 66 | + } |
| 67 | + |
| 68 | + protected enum StreamletNamePrefix { |
| 69 | + CONSUMER("consumer"), |
| 70 | + COUNT("count"), |
| 71 | + CUSTOM("custom"), |
| 72 | + CUSTOM_BASIC("customBasic"), |
| 73 | + CUSTOM_WINDOW("customWindow"), |
| 74 | + FILTER("filter"), |
| 75 | + FLATMAP("flatmap"), |
| 76 | + JOIN("join"), |
| 77 | + KEYBY("keyBy"), |
| 78 | + LOGGER("logger"), |
| 79 | + MAP("map"), |
| 80 | + SOURCE("generator"), |
| 81 | + REDUCE("reduce"), |
| 82 | + REMAP("remap"), |
| 83 | + SINK("sink"), |
| 84 | + SPLIT("split"), |
| 85 | + SPOUT("spout"), |
| 86 | + SUPPLIER("supplier"), |
| 87 | + TRANSFORM("transform"), |
| 88 | + UNION("union"); |
| 89 | + |
| 90 | + private final String prefix; |
| 91 | + |
| 92 | + StreamletNamePrefix(final String prefix) { |
| 93 | + this.prefix = prefix; |
| 94 | + } |
| 95 | + |
| 96 | + @Override |
| 97 | + public String toString() { |
| 98 | + return prefix; |
| 99 | + } |
| 100 | + } |
| 101 | + |
| 102 | + /** |
| 103 | + * Sets the name of the Streamlet. |
| 104 | + * @param sName The name given by the user for this streamlet |
| 105 | + * @return Returns back the Streamlet with changed name |
| 106 | + */ |
| 107 | + @Override |
| 108 | + public StreamletBase<R> setName(String sName) { |
| 109 | + checkNotBlank(sName, "Streamlet name cannot be null/blank"); |
| 110 | + |
| 111 | + this.name = sName; |
| 112 | + return this; |
| 113 | + } |
| 114 | + |
| 115 | + /** |
| 116 | + * Gets the name of the Streamlet. |
| 117 | + * @return Returns the name of the Streamlet |
| 118 | + */ |
| 119 | + @Override |
| 120 | + public String getName() { |
| 121 | + return name; |
| 122 | + } |
| 123 | + |
| 124 | + private String defaultNameCalculator(StreamletNamePrefix prefix, Set<String> stageNames) { |
| 125 | + int index = 1; |
| 126 | + String calculatedName; |
| 127 | + while (true) { |
| 128 | + calculatedName = new StringBuilder(prefix.toString()).append(index).toString(); |
| 129 | + if (!stageNames.contains(calculatedName)) { |
| 130 | + break; |
| 131 | + } |
| 132 | + index++; |
| 133 | + } |
| 134 | + LOG.info("Calculated stage Name as " + calculatedName); |
| 135 | + return calculatedName; |
| 136 | + } |
| 137 | + |
| 138 | + /** |
| 139 | + * Sets a default unique name to the Streamlet by type if it is not set. |
| 140 | + * Otherwise, just checks its uniqueness. |
| 141 | + * @param prefix The name prefix of this streamlet |
| 142 | + * @param stageNames The collections of created streamlet/stage names |
| 143 | + */ |
| 144 | + protected void setDefaultNameIfNone(StreamletNamePrefix prefix, Set<String> stageNames) { |
| 145 | + if (getName() == null) { |
| 146 | + setName(defaultNameCalculator(prefix, stageNames)); |
| 147 | + } |
| 148 | + if (stageNames.contains(getName())) { |
| 149 | + throw new RuntimeException(String.format( |
| 150 | + "The stage name %s is used multiple times in the same topology", getName())); |
| 151 | + } |
| 152 | + stageNames.add(getName()); |
| 153 | + } |
| 154 | + |
| 155 | + /** |
| 156 | + * Sets the number of partitions of the streamlet |
| 157 | + * @param numPartitions The user assigned number of partitions |
| 158 | + * @return Returns back the Streamlet with changed number of partitions |
| 159 | + */ |
| 160 | + @Override |
| 161 | + public StreamletBase<R> setNumPartitions(int numPartitions) { |
| 162 | + require(numPartitions > 0, "Streamlet's partitions number should be > 0"); |
| 163 | + |
| 164 | + this.nPartitions = numPartitions; |
| 165 | + return this; |
| 166 | + } |
| 167 | + |
| 168 | + /** |
| 169 | + * Gets the number of partitions of this Streamlet. |
| 170 | + * @return the number of partitions of this Streamlet |
| 171 | + */ |
| 172 | + @Override |
| 173 | + public int getNumPartitions() { |
| 174 | + return nPartitions; |
| 175 | + } |
| 176 | + |
| 177 | + public <T> void addChild(StreamletBaseImpl<T> child) { |
| 178 | + children.add(child); |
| 179 | + } |
| 180 | + |
| 181 | + /** |
| 182 | + * Gets all the children of this streamlet. |
| 183 | + * Children of a streamlet are streamlets that are resulting from transformations of elements of |
| 184 | + * this and potentially other streamlets. |
| 185 | + * @return The kid streamlets |
| 186 | + */ |
| 187 | + public List<StreamletBaseImpl<?>> getChildren() { |
| 188 | + return children; |
| 189 | + } |
| 190 | + |
| 191 | + public void build(TopologyBuilder bldr, Set<String> stageNames) { |
| 192 | + if (built) { |
| 193 | + throw new RuntimeException("Logic Error While building " + getName()); |
| 194 | + } |
| 195 | + |
| 196 | + if (doBuild(bldr, stageNames)) { |
| 197 | + built = true; |
| 198 | + for (StreamletBaseImpl<?> streamlet : getChildren()) { |
| 199 | + streamlet.build(bldr, stageNames); |
| 200 | + } |
| 201 | + } |
| 202 | + } |
| 203 | + |
| 204 | + public boolean isBuilt() { |
| 205 | + return built; |
| 206 | + } |
| 207 | + |
| 208 | + public boolean isFullyBuilt() { |
| 209 | + if (!isBuilt()) { |
| 210 | + return false; |
| 211 | + } |
| 212 | + for (StreamletBaseImpl<?> child : children) { |
| 213 | + if (!child.isFullyBuilt()) { |
| 214 | + return false; |
| 215 | + } |
| 216 | + } |
| 217 | + return true; |
| 218 | + } |
| 219 | + |
| 220 | + // This is the main interface that every Streamlet implementation should implement |
| 221 | + // The main tasks are generally to make sure that appropriate names/partitions are |
| 222 | + // computed and add a spout/bolt to the TopologyBuilder |
| 223 | + protected abstract boolean doBuild(TopologyBuilder bldr, Set<String> stageNames); |
| 224 | +} |
0 commit comments