Skip to content

Commit 3b84cfd

Browse files
liuzhenyingaiyin.lzy
and
aiyin.lzy
authored
[桑基图] 优化桑基图 cutoffcircle 逻辑,降低时间复杂度 (#2571)
* feat: 优化桑基图 cutoffcircle 函数性能 * feat: remove unused import * feat: 增加 removeData log 信息 Co-authored-by: aiyin.lzy <[email protected]>
1 parent 85bbcb2 commit 3b84cfd

File tree

2 files changed

+97
-34
lines changed

2 files changed

+97
-34
lines changed

__tests__/unit/plots/sankey/circle-spec.ts

+12
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,18 @@ describe('sankey ', () => {
8383
{ source: 'b', target: 'c' },
8484
]);
8585

86+
// 自定义 sourceField targetField
87+
const customData = [
88+
{ from: 'a', to: 'c' },
89+
{ from: 'b', to: 'c' },
90+
{ from: 'c', to: 'a' },
91+
];
92+
93+
expect(cutoffCircle(customData, 'from', 'to')).toEqual([
94+
{ from: 'a', to: 'c' },
95+
{ from: 'b', to: 'c' },
96+
]);
97+
8698
// 稍微正式一点的数据
8799
expect(cutoffCircle(ENERGY_RELATIONS, 'source', 'target')).toEqual(ENERGY_RELATIONS);
88100
expect(cutoffCircle(ENERGY_RELATIONS, 'source', 'target')).not.toBe(ENERGY_RELATIONS);

src/plots/sankey/circle.ts

+85-34
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,108 @@
1-
import { each, size } from '@antv/util';
2-
import { Data, Datum } from '../../types';
1+
import { isArray } from '@antv/util';
2+
import { Data } from '../../types';
33

44
/**
5-
* 是否有环的判断依据是,当前 source 对应的 target 是 source 的父节点
6-
* @param circleCache
7-
* @param source
8-
* @param target
5+
* 根据 edges 获取对应的 node 结构
96
*/
10-
function hasCircle(circleCache: Map<string, string[]>, source: string[], target: string): boolean {
11-
// 父元素为空,则表示已经到头了!
12-
if (size(source) === 0) return false;
13-
// target 在父元素路径上,所以形成环
14-
if (source.includes(target)) return true;
15-
16-
// 递归
17-
return source.some((s: string) => hasCircle(circleCache, circleCache.get(s), target));
7+
export function getNodes(edges: Data, sourceField: string, targetField: string): string[] {
8+
const nodes = [];
9+
edges.forEach((e) => {
10+
const source = e[sourceField] as string;
11+
const target = e[targetField] as string;
12+
if (!nodes.includes(source)) {
13+
nodes.push(source);
14+
}
15+
if (!nodes.includes(target)) {
16+
nodes.push(target);
17+
}
18+
});
19+
return nodes;
20+
}
21+
22+
/**
23+
* 根据 edges 获取对应的 dfs 邻接矩阵
24+
*/
25+
export function getMatrix(
26+
edges: Data,
27+
nodes: string[],
28+
sourceField: string,
29+
targetField: string
30+
): Record<string, Record<string, number>> {
31+
const graphMatrix = {};
32+
33+
nodes.forEach((pre) => {
34+
graphMatrix[pre] = {};
35+
nodes.forEach((next) => {
36+
graphMatrix[pre][next] = 0;
37+
});
38+
});
39+
40+
edges.forEach((edge) => {
41+
graphMatrix[edge[sourceField]][edge[targetField]] = 1;
42+
});
43+
44+
return graphMatrix;
1845
}
1946

2047
/**
21-
* 切断桑基图数据中的环(会丢失数据),保证顺序
48+
* 使用 DFS 思路切断桑基图数据中的环(会丢失数据),保证顺序
2249
* @param data
2350
* @param sourceField
2451
* @param targetField
2552
*/
26-
export function cutoffCircle(data: Data, sourceField: string, targetField: string): Data {
27-
const dataWithoutCircle = [];
53+
export function cutoffCircle(edges: Data, sourceField: string, targetField: string): Data {
54+
if (!isArray(edges)) return [];
55+
56+
// 待删除的环状结构
2857
const removedData = [];
2958

30-
/** 存储父子关系的链表关系,具体是 子 -> 父 数组 */
31-
const circleCache = new Map<string, string[]>();
59+
// 获取所有的节点
60+
const nodes = getNodes(edges, sourceField, targetField);
61+
// 获取节点与边的邻接矩阵
62+
const graphMatrix = getMatrix(edges, nodes, sourceField, targetField);
3263

33-
each(data, (d: Datum) => {
34-
const source = d[sourceField] as string;
35-
const target = d[targetField] as string;
64+
// visited:标记节点访问状态, 0:未访问,1:访问中, -1:已访问
65+
const visited = {};
66+
// 初始化visited
67+
nodes.forEach((node) => {
68+
visited[node] = 0;
69+
});
3670

37-
// 当前数据,不成环
38-
if (!hasCircle(circleCache, [source], target)) {
39-
// 保留数据
40-
dataWithoutCircle.push(d);
41-
// 存储关系链表
42-
if (!circleCache.has(target)) {
43-
circleCache.set(target, []);
71+
// 图的深度遍历函数
72+
function DFS(dfsNode) {
73+
// 节点状态置为正在访问
74+
visited[dfsNode] = 1;
75+
nodes.forEach((node) => {
76+
if (graphMatrix[dfsNode][node] != 0) {
77+
// 当前节点在访问中,再次被访问,证明有环,移动到 removeData
78+
if (visited[node] == 1) {
79+
// 拼接为字符串,方便最后过滤
80+
removedData.push(`${dfsNode}_${node}`);
81+
} else if (visited[node] == -1) {
82+
// 当前结点及后边的结点都被访问过,直接跳至下一个结点
83+
return;
84+
} else {
85+
DFS(node); // 否则递归访问
86+
}
4487
}
45-
circleCache.get(target).push(source);
46-
} else {
47-
// 保存起来用于打印 log
48-
removedData.push(d);
88+
});
89+
//遍历过所有相连的结点后,把本节点标记为-1
90+
visited[dfsNode] = -1;
91+
}
92+
93+
// 对每个节点执行 dfs 操作
94+
nodes.forEach((node) => {
95+
//该结点后边的结点都被访问过了,跳过它
96+
if (visited[node] == -1) {
97+
return;
4998
}
99+
DFS(node);
50100
});
51101

52102
if (removedData.length !== 0) {
53103
console.warn(`sankey data contains circle, ${removedData.length} records removed.`, removedData);
54104
}
55105

56-
return dataWithoutCircle;
106+
// 过滤 remove 路径
107+
return edges.filter((edge) => removedData.findIndex((i) => i === `${edge[sourceField]}_${edge[targetField]}`) < 0);
57108
}

0 commit comments

Comments
 (0)