|
26 | 26 | from typing import List
|
27 | 27 | from typing import Mapping
|
28 | 28 | from typing import Union
|
| 29 | +from urllib.parse import urlparse |
29 | 30 |
|
30 | 31 | try:
|
31 | 32 | import vineyard
|
@@ -103,9 +104,6 @@ def save_to(self, path, **kwargs):
|
103 | 104 | def load_from(cls, path, sess, **kwargs):
|
104 | 105 | raise NotImplementedError
|
105 | 106 |
|
106 |
| - def archive(self, path, **kwargs): |
107 |
| - raise NotImplementedError |
108 |
| - |
109 | 107 | @abstractmethod
|
110 | 108 | def project(self, vertices, edges):
|
111 | 109 | raise NotImplementedError
|
@@ -433,16 +431,6 @@ def to_dataframe(self, selector, vertex_range=None):
|
433 | 431 | op = dag_utils.graph_to_dataframe(self, selector, vertex_range)
|
434 | 432 | return ResultDAGNode(self, op)
|
435 | 433 |
|
436 |
| - def archive(self, path): |
437 |
| - """Archive the graph to gar format with graph yaml file path. |
438 |
| -
|
439 |
| - Args: |
440 |
| - path (str): The graph yaml file path describe how to archive the graph. |
441 |
| - """ |
442 |
| - check_argument(self.graph_type == graph_def_pb2.ARROW_PROPERTY) |
443 |
| - op = dag_utils.archive_graph(self, path) |
444 |
| - return ArchivedGraph(self._session, op) |
445 |
| - |
446 | 434 | def to_directed(self):
|
447 | 435 | op = dag_utils.to_directed(self)
|
448 | 436 | graph_dag_node = GraphDAGNode(self._session, op)
|
@@ -1082,49 +1070,101 @@ def _check_unmodified(self):
|
1082 | 1070 | self.signature == self._saved_signature, "Graph has been modified!"
|
1083 | 1071 | )
|
1084 | 1072 |
|
1085 |
| - def save_to(self, path, **kwargs): |
1086 |
| - """Serialize graph to a location. |
1087 |
| - The meta and data of graph is dumped to specified location, |
1088 |
| - and can be restored by `Graph.load_from` in other sessions. |
1089 |
| -
|
1090 |
| - Each worker will write a `path_{worker_id}.meta` file and |
1091 |
| - a `path_{worker_id}` file to storage. |
1092 |
| - Args: |
1093 |
| - path (str): supported storages are local, hdfs, oss, s3 |
1094 |
| - """ |
1095 |
| - |
1096 |
| - op = dag_utils.save_graph_to(self, path, self._vineyard_id, **kwargs) |
1097 |
| - self._session.dag.add_op(op) |
1098 |
| - return self._session._wrapper(op) |
| 1073 | + @staticmethod |
| 1074 | + def _load_from_graphar(path, sess, **kwargs): |
| 1075 | + # graphar now only support global vertex map. |
| 1076 | + vertex_map = utils.vertex_map_type_to_enum("global") |
| 1077 | + config = { |
| 1078 | + types_pb2.OID_TYPE: utils.s_to_attr( |
| 1079 | + "int64_t" |
| 1080 | + ), # grahar use vertex index as oid, so it always be int64_t |
| 1081 | + types_pb2.VID_TYPE: utils.s_to_attr("uint64_t"), |
| 1082 | + types_pb2.IS_FROM_VINEYARD_ID: utils.b_to_attr(False), |
| 1083 | + types_pb2.IS_FROM_GAR: utils.b_to_attr(True), |
| 1084 | + types_pb2.VERTEX_MAP_TYPE: utils.i_to_attr(vertex_map), |
| 1085 | + types_pb2.COMPACT_EDGES: utils.b_to_attr(False), |
| 1086 | + types_pb2.GRAPH_INFO_PATH: utils.s_to_attr(path), |
| 1087 | + } |
| 1088 | + op = dag_utils.create_graph( |
| 1089 | + sess.session_id, graph_def_pb2.ARROW_PROPERTY, inputs=[], attrs=config |
| 1090 | + ) |
| 1091 | + return sess._wrapper(GraphDAGNode(sess, op)) |
1099 | 1092 |
|
1100 | 1093 | @classmethod
|
1101 |
| - def load_from(cls, path, sess, **kwargs): |
1102 |
| - """Construct a `Graph` by deserialize from `path`. |
1103 |
| - It will read all serialization files, which is dumped by |
1104 |
| - `Graph.serialize`. |
1105 |
| - If any serialize file doesn't exists or broken, will error out. |
| 1094 | + def load_from(cls, uristring, sess=None, **kwargs): |
| 1095 | + """Load a ArrowProperty graph from with a certain data source. The data source |
| 1096 | + can be vineyard serialized files or graphar files. |
1106 | 1097 |
|
1107 | 1098 | Args:
|
1108 |
| - path (str): Path contains the serialization files. |
1109 |
| - sess (`graphscope.Session`): The target session |
1110 |
| - that the graph will be construct in |
1111 |
| -
|
| 1099 | + uristring (str): URI contains the description of the data source or |
| 1100 | + path contains the serialization files, |
| 1101 | + example: "graphar+file:///tmp/graphar/xxx" |
| 1102 | + sess (`graphscope.Session`): The target session that the graph |
| 1103 | + will be construct, if None, use the default session. |
| 1104 | + kwargs: Other arguments that will be passed to the data source loader. |
1112 | 1105 | Returns:
|
1113 |
| - `Graph`: A new graph object. Schema and data is supposed to be |
1114 |
| - identical with the one that called serialized method. |
| 1106 | + `Graph`: A new graph object. |
1115 | 1107 | """
|
1116 |
| - op = dag_utils.load_graph_from(path, sess, **kwargs) |
1117 |
| - return sess._wrapper(GraphDAGNode(sess, op)) |
| 1108 | + from graphscope.client.session import get_default_session |
| 1109 | + |
| 1110 | + if sess is None: |
| 1111 | + sess = get_default_session() |
| 1112 | + uri = urlparse(uristring) |
| 1113 | + if uri.scheme and "+" in uri.scheme: |
| 1114 | + source = uri.scheme.split("+")[0] |
| 1115 | + path = uri.scheme.split("+")[-1] + "://" + uri.netloc + uri.path |
| 1116 | + if source == "graphar": |
| 1117 | + return cls._load_from_graphar(path, sess) |
| 1118 | + else: |
| 1119 | + raise ValueError("Unknown source: %s" % source) |
| 1120 | + else: |
| 1121 | + # not a uri string, assume it is a path for deserialization |
| 1122 | + storage_options = kwargs.pop("storage_options", {}) |
| 1123 | + deserialization_options = kwargs.pop("deserialization_options", {}) |
| 1124 | + op = dag_utils.deserialize_graph( |
| 1125 | + uristring, sess, storage_options, deserialization_options |
| 1126 | + ) |
| 1127 | + return sess._wrapper(GraphDAGNode(sess, op)) |
1118 | 1128 |
|
1119 |
| - def archive(self, path): |
1120 |
| - """Archive graph gar format files base on the graph info. |
1121 |
| - The meta and data of graph is dumped to specified location, |
1122 |
| - and can be restored by `Graph.deserialize` in other sessions. |
| 1129 | + def save_to( |
| 1130 | + self, |
| 1131 | + path, |
| 1132 | + format="serialization", |
| 1133 | + **kwargs, |
| 1134 | + ): |
| 1135 | + """Save graph to specified location with specified format. |
1123 | 1136 |
|
1124 | 1137 | Args:
|
1125 |
| - path (str): the graph info file path. |
| 1138 | + path (str): the directory path to write graph. |
| 1139 | + format (str): the format to write graph, default is "serialization". |
| 1140 | + kwargs: Other arguments that will be passed to the data source |
| 1141 | + saver. |
| 1142 | +
|
| 1143 | + Return (dict): A dict contains the type and uri string of output data. |
1126 | 1144 | """
|
1127 |
| - return self._session._wrapper(self._graph_node.archive(path)) |
| 1145 | + if format == "graphar": |
| 1146 | + graphar_options = kwargs.pop("graphar_options", {}) |
| 1147 | + graph_info_path = utils.generate_graphar_info_from_schema( |
| 1148 | + path, |
| 1149 | + self._schema, |
| 1150 | + graphar_options, |
| 1151 | + ) |
| 1152 | + op = dag_utils.save_to_graphar(self, graph_info_path) |
| 1153 | + self._session.dag.add_op(op) |
| 1154 | + self._session._wrapper(op) |
| 1155 | + return {"type": format, "uri": "graphar+" + graph_info_path} |
| 1156 | + elif format == "serialization": |
| 1157 | + # serialize graph |
| 1158 | + storage_options = kwargs.pop("storage_options", {}) |
| 1159 | + serialization_options = kwargs.pop("serialization_options", {}) |
| 1160 | + op = dag_utils.serialize_graph( |
| 1161 | + self, path, storage_options, serialization_options |
| 1162 | + ) |
| 1163 | + self._session.dag.add_op(op) |
| 1164 | + self._session._wrapper(op) |
| 1165 | + return {"type": format, "uri": path} |
| 1166 | + else: |
| 1167 | + raise ValueError("Unknown format: %s" % format) |
1128 | 1168 |
|
1129 | 1169 | def add_vertices(
|
1130 | 1170 | self, vertices, label="_", properties=None, vid_field: Union[int, str] = 0
|
@@ -1201,14 +1241,3 @@ def __init__(self, session, op):
|
1201 | 1241 | self._op = op
|
1202 | 1242 | # add op to dag
|
1203 | 1243 | self._session.dag.add_op(self._op)
|
1204 |
| - |
1205 |
| - |
1206 |
| -class ArchivedGraph(DAGNode): |
1207 |
| - """Archived graph node in a DAG""" |
1208 |
| - |
1209 |
| - def __init__(self, session, op): |
1210 |
| - super().__init__() |
1211 |
| - self._session = session |
1212 |
| - self._op = op |
1213 |
| - # add op to dag |
1214 |
| - self._session.dag.add_op(self._op) |
|
0 commit comments