Skip to content

[桑基图] 优化桑基图 cutoffcircle 逻辑,降低时间复杂度 #2571

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 24, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions __tests__/unit/plots/sankey/circle-spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,18 @@ describe('sankey ', () => {
{ source: 'b', target: 'c' },
]);

// 自定义 sourceField targetField
const customData = [
{ from: 'a', to: 'c' },
{ from: 'b', to: 'c' },
{ from: 'c', to: 'a' },
];

expect(cutoffCircle(customData, 'from', 'to')).toEqual([
{ from: 'a', to: 'c' },
{ from: 'b', to: 'c' },
]);

// 稍微正式一点的数据
expect(cutoffCircle(ENERGY_RELATIONS, 'source', 'target')).toEqual(ENERGY_RELATIONS);
expect(cutoffCircle(ENERGY_RELATIONS, 'source', 'target')).not.toBe(ENERGY_RELATIONS);
Expand Down
123 changes: 85 additions & 38 deletions src/plots/sankey/circle.ts
Original file line number Diff line number Diff line change
@@ -1,57 +1,104 @@
import { each, size } from '@antv/util';
import { Data, Datum } from '../../types';
import { isArray } from '@antv/util';
import { Data } from '../../types';

/**
* 是否有环的判断依据是,当前 source 对应的 target 是 source 的父节点
* @param circleCache
* @param source
* @param target
* 根据 edges 获取对应的 node 结构
*/
function hasCircle(circleCache: Map<string, string[]>, source: string[], target: string): boolean {
// 父元素为空,则表示已经到头了!
if (size(source) === 0) return false;
// target 在父元素路径上,所以形成环
if (source.includes(target)) return true;

// 递归
return source.some((s: string) => hasCircle(circleCache, circleCache.get(s), target));
export function getNodes(edges: Data, sourceField: string, targetField: string): string[] {
const nodes = [];
edges.forEach((e) => {
const source = e[sourceField] as string;
const target = e[targetField] as string;
if (!nodes.includes(source)) {
nodes.push(source);
}
if (!nodes.includes(target)) {
nodes.push(target);
}
});
return nodes;
}

/**
* 根据 edges 获取对应的 dfs 邻接矩阵
*/
export function getMatrix(
edges: Data,
nodes: string[],
sourceField: string,
targetField: string
): Record<string, Record<string, number>> {
const graphMatrix = {};

nodes.forEach((pre) => {
graphMatrix[pre] = {};
nodes.forEach((next) => {
graphMatrix[pre][next] = 0;
});
});

edges.forEach((edge) => {
graphMatrix[edge[sourceField]][edge[targetField]] = 1;
});

return graphMatrix;
}

/**
* 切断桑基图数据中的环(会丢失数据),保证顺序
* 使用 DFS 思路切断桑基图数据中的环(会丢失数据),保证顺序
* @param data
* @param sourceField
* @param targetField
*/
export function cutoffCircle(data: Data, sourceField: string, targetField: string): Data {
const dataWithoutCircle = [];
export function cutoffCircle(edges: Data, sourceField: string, targetField: string): Data {
if (!isArray(edges)) return [];

// 待删除的环状结构
const removedData = [];

/** 存储父子关系的链表关系,具体是 子 -> 父 数组 */
const circleCache = new Map<string, string[]>();
// 获取所有的节点
const nodes = getNodes(edges, sourceField, targetField);
// 获取节点与边的邻接矩阵
const graphMatrix = getMatrix(edges, nodes, sourceField, targetField);

each(data, (d: Datum) => {
const source = d[sourceField] as string;
const target = d[targetField] as string;
// visited:标记节点访问状态, 0:未访问,1:访问中, -1:已访问
const visited = {};
// 初始化visited
nodes.forEach((node) => {
visited[node] = 0;
});

// 当前数据,不成环
if (!hasCircle(circleCache, [source], target)) {
// 保留数据
dataWithoutCircle.push(d);
// 存储关系链表
if (!circleCache.has(target)) {
circleCache.set(target, []);
// 图的深度遍历函数
function DFS(dfsNode) {
// 节点状态置为正在访问
visited[dfsNode] = 1;
nodes.forEach((node) => {
if (graphMatrix[dfsNode][node] != 0) {
// 当前节点在访问中,再次被访问,证明有环,移动到 removeData
if (visited[node] == 1) {
// 拼接为字符串,方便最后过滤
removedData.push(`${dfsNode}${node}`);
} else if (visited[node] == -1) {
// 当前结点及后边的结点都被访问过,直接跳至下一个结点
return;
} else {
DFS(node); // 否则递归访问
}
}
circleCache.get(target).push(source);
} else {
// 保存起来用于打印 log
removedData.push(d);
});
//遍历过所有相连的结点后,把本节点标记为-1
visited[dfsNode] = -1;
}

// 对每个节点执行 dfs 操作
nodes.forEach((node) => {
//该结点后边的结点都被访问过了,跳过它
if (visited[node] == -1) {
return;
}
DFS(node);
});

if (removedData.length !== 0) {
console.warn(`sankey data contains circle, ${removedData.length} records removed.`, removedData);
}

return dataWithoutCircle;
// 过滤 remove 路径
return edges.filter((edge) => removedData.findIndex((i) => i === `${edge[sourceField]}${edge[targetField]}`) < 0);
}