543 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			543 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| /*
 | |
| 	MIT License http://www.opensource.org/licenses/mit-license.php
 | |
| 	Author Tobias Koppers @sokra
 | |
| */
 | |
| 
 | |
| "use strict";
 | |
| 
 | |
| // Simulations show these probabilities for a single change
 | |
| // 93.1% that one group is invalidated
 | |
| // 4.8% that two groups are invalidated
 | |
| // 1.1% that 3 groups are invalidated
 | |
| // 0.1% that 4 or more groups are invalidated
 | |
| //
 | |
| // And these for removing/adding 10 lexically adjacent files
 | |
| // 64.5% that one group is invalidated
 | |
| // 24.8% that two groups are invalidated
 | |
| // 7.8% that 3 groups are invalidated
 | |
| // 2.7% that 4 or more groups are invalidated
 | |
| //
 | |
| // And these for removing/adding 3 random files
 | |
| // 0% that one group is invalidated
 | |
| // 3.7% that two groups are invalidated
 | |
| // 80.8% that 3 groups are invalidated
 | |
| // 12.3% that 4 groups are invalidated
 | |
| // 3.2% that 5 or more groups are invalidated
 | |
| 
 | |
| /**
 | |
|  * @param {string} a key
 | |
|  * @param {string} b key
 | |
|  * @returns {number} the similarity as number
 | |
|  */
 | |
| const similarity = (a, b) => {
 | |
| 	const l = Math.min(a.length, b.length);
 | |
| 	let dist = 0;
 | |
| 	for (let i = 0; i < l; i++) {
 | |
| 		const ca = a.charCodeAt(i);
 | |
| 		const cb = b.charCodeAt(i);
 | |
| 		dist += Math.max(0, 10 - Math.abs(ca - cb));
 | |
| 	}
 | |
| 	return dist;
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * @param {string} a key
 | |
|  * @param {string} b key
 | |
|  * @param {Set<string>} usedNames set of already used names
 | |
|  * @returns {string} the common part and a single char for the difference
 | |
|  */
 | |
| const getName = (a, b, usedNames) => {
 | |
| 	const l = Math.min(a.length, b.length);
 | |
| 	let i = 0;
 | |
| 	while (i < l) {
 | |
| 		if (a.charCodeAt(i) !== b.charCodeAt(i)) {
 | |
| 			i++;
 | |
| 			break;
 | |
| 		}
 | |
| 		i++;
 | |
| 	}
 | |
| 	while (i < l) {
 | |
| 		const name = a.slice(0, i);
 | |
| 		const lowerName = name.toLowerCase();
 | |
| 		if (!usedNames.has(lowerName)) {
 | |
| 			usedNames.add(lowerName);
 | |
| 			return name;
 | |
| 		}
 | |
| 		i++;
 | |
| 	}
 | |
| 	// names always contain a hash, so this is always unique
 | |
| 	// we don't need to check usedNames nor add it
 | |
| 	return a;
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * @param {Record<string, number>} total total size
 | |
|  * @param {Record<string, number>} size single size
 | |
|  * @returns {void}
 | |
|  */
 | |
| const addSizeTo = (total, size) => {
 | |
| 	for (const key of Object.keys(size)) {
 | |
| 		total[key] = (total[key] || 0) + size[key];
 | |
| 	}
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * @param {Record<string, number>} total total size
 | |
|  * @param {Record<string, number>} size single size
 | |
|  * @returns {void}
 | |
|  */
 | |
| const subtractSizeFrom = (total, size) => {
 | |
| 	for (const key of Object.keys(size)) {
 | |
| 		total[key] -= size[key];
 | |
| 	}
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * @template T
 | |
|  * @param {Iterable<Node<T>>} nodes some nodes
 | |
|  * @returns {Record<string, number>} total size
 | |
|  */
 | |
| const sumSize = (nodes) => {
 | |
| 	const sum = Object.create(null);
 | |
| 	for (const node of nodes) {
 | |
| 		addSizeTo(sum, node.size);
 | |
| 	}
 | |
| 	return sum;
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * @param {Record<string, number>} size size
 | |
|  * @param {Record<string, number>} maxSize minimum size
 | |
|  * @returns {boolean} true, when size is too big
 | |
|  */
 | |
| const isTooBig = (size, maxSize) => {
 | |
| 	for (const key of Object.keys(size)) {
 | |
| 		const s = size[key];
 | |
| 		if (s === 0) continue;
 | |
| 		const maxSizeValue = maxSize[key];
 | |
| 		if (typeof maxSizeValue === "number" && s > maxSizeValue) return true;
 | |
| 	}
 | |
| 	return false;
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * @param {Record<string, number>} size size
 | |
|  * @param {Record<string, number>} minSize minimum size
 | |
|  * @returns {boolean} true, when size is too small
 | |
|  */
 | |
| const isTooSmall = (size, minSize) => {
 | |
| 	for (const key of Object.keys(size)) {
 | |
| 		const s = size[key];
 | |
| 		if (s === 0) continue;
 | |
| 		const minSizeValue = minSize[key];
 | |
| 		if (typeof minSizeValue === "number" && s < minSizeValue) return true;
 | |
| 	}
 | |
| 	return false;
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * @param {Record<string, number>} size size
 | |
|  * @param {Record<string, number>} minSize minimum size
 | |
|  * @returns {Set<string>} set of types that are too small
 | |
|  */
 | |
| const getTooSmallTypes = (size, minSize) => {
 | |
| 	const types = new Set();
 | |
| 	for (const key of Object.keys(size)) {
 | |
| 		const s = size[key];
 | |
| 		if (s === 0) continue;
 | |
| 		const minSizeValue = minSize[key];
 | |
| 		if (typeof minSizeValue === "number" && s < minSizeValue) types.add(key);
 | |
| 	}
 | |
| 	return types;
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * @template {object} T
 | |
|  * @param {T} size size
 | |
|  * @param {Set<string>} types types
 | |
|  * @returns {number} number of matching size types
 | |
|  */
 | |
| const getNumberOfMatchingSizeTypes = (size, types) => {
 | |
| 	let i = 0;
 | |
| 	for (const key of Object.keys(size)) {
 | |
| 		if (size[/** @type {keyof T} */ (key)] !== 0 && types.has(key)) i++;
 | |
| 	}
 | |
| 	return i;
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * @param {Record<string, number>} size size
 | |
|  * @param {Set<string>} types types
 | |
|  * @returns {number} selective size sum
 | |
|  */
 | |
| const selectiveSizeSum = (size, types) => {
 | |
| 	let sum = 0;
 | |
| 	for (const key of Object.keys(size)) {
 | |
| 		if (size[key] !== 0 && types.has(key)) sum += size[key];
 | |
| 	}
 | |
| 	return sum;
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * @template T
 | |
|  */
 | |
| class Node {
 | |
| 	/**
 | |
| 	 * @param {T} item item
 | |
| 	 * @param {string} key key
 | |
| 	 * @param {Record<string, number>} size size
 | |
| 	 */
 | |
| 	constructor(item, key, size) {
 | |
| 		this.item = item;
 | |
| 		this.key = key;
 | |
| 		this.size = size;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @template T
 | |
|  */
 | |
| class Group {
 | |
| 	/**
 | |
| 	 * @param {Node<T>[]} nodes nodes
 | |
| 	 * @param {number[] | null} similarities similarities between the nodes (length = nodes.length - 1)
 | |
| 	 * @param {Record<string, number>=} size size of the group
 | |
| 	 */
 | |
| 	constructor(nodes, similarities, size) {
 | |
| 		this.nodes = nodes;
 | |
| 		this.similarities = similarities;
 | |
| 		this.size = size || sumSize(nodes);
 | |
| 		/** @type {string | undefined} */
 | |
| 		this.key = undefined;
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * @param {(node: Node<T>) => boolean} filter filter function
 | |
| 	 * @returns {Node<T>[] | undefined} removed nodes
 | |
| 	 */
 | |
| 	popNodes(filter) {
 | |
| 		const newNodes = [];
 | |
| 		const newSimilarities = [];
 | |
| 		const resultNodes = [];
 | |
| 		let lastNode;
 | |
| 		for (let i = 0; i < this.nodes.length; i++) {
 | |
| 			const node = this.nodes[i];
 | |
| 			if (filter(node)) {
 | |
| 				resultNodes.push(node);
 | |
| 			} else {
 | |
| 				if (newNodes.length > 0) {
 | |
| 					newSimilarities.push(
 | |
| 						lastNode === this.nodes[i - 1]
 | |
| 							? /** @type {number[]} */ (this.similarities)[i - 1]
 | |
| 							: similarity(/** @type {Node<T>} */ (lastNode).key, node.key)
 | |
| 					);
 | |
| 				}
 | |
| 				newNodes.push(node);
 | |
| 				lastNode = node;
 | |
| 			}
 | |
| 		}
 | |
| 		if (resultNodes.length === this.nodes.length) return;
 | |
| 		this.nodes = newNodes;
 | |
| 		this.similarities = newSimilarities;
 | |
| 		this.size = sumSize(newNodes);
 | |
| 		return resultNodes;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @template T
 | |
|  * @param {Iterable<Node<T>>} nodes nodes
 | |
|  * @returns {number[]} similarities
 | |
|  */
 | |
| const getSimilarities = (nodes) => {
 | |
| 	// calculate similarities between lexically adjacent nodes
 | |
| 	/** @type {number[]} */
 | |
| 	const similarities = [];
 | |
| 	let last;
 | |
| 	for (const node of nodes) {
 | |
| 		if (last !== undefined) {
 | |
| 			similarities.push(similarity(last.key, node.key));
 | |
| 		}
 | |
| 		last = node;
 | |
| 	}
 | |
| 	return similarities;
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * @template T
 | |
|  * @typedef {object} GroupedItems<T>
 | |
|  * @property {string} key
 | |
|  * @property {T[]} items
 | |
|  * @property {Record<string, number>} size
 | |
|  */
 | |
| 
 | |
| /**
 | |
|  * @template T
 | |
|  * @typedef {object} Options
 | |
|  * @property {Record<string, number>} maxSize maximum size of a group
 | |
|  * @property {Record<string, number>} minSize minimum size of a group (preferred over maximum size)
 | |
|  * @property {Iterable<T>} items a list of items
 | |
|  * @property {(item: T) => Record<string, number>} getSize function to get size of an item
 | |
|  * @property {(item: T) => string} getKey function to get the key of an item
 | |
|  */
 | |
| 
 | |
| /**
 | |
|  * @template T
 | |
|  * @param {Options<T>} options options object
 | |
|  * @returns {GroupedItems<T>[]} grouped items
 | |
|  */
 | |
| module.exports = ({ maxSize, minSize, items, getSize, getKey }) => {
 | |
| 	/** @type {Group<T>[]} */
 | |
| 	const result = [];
 | |
| 
 | |
| 	const nodes = Array.from(
 | |
| 		items,
 | |
| 		(item) => new Node(item, getKey(item), getSize(item))
 | |
| 	);
 | |
| 
 | |
| 	/** @type {Node<T>[]} */
 | |
| 	const initialNodes = [];
 | |
| 
 | |
| 	// lexically ordering of keys
 | |
| 	nodes.sort((a, b) => {
 | |
| 		if (a.key < b.key) return -1;
 | |
| 		if (a.key > b.key) return 1;
 | |
| 		return 0;
 | |
| 	});
 | |
| 
 | |
| 	// return nodes bigger than maxSize directly as group
 | |
| 	// But make sure that minSize is not violated
 | |
| 	for (const node of nodes) {
 | |
| 		if (isTooBig(node.size, maxSize) && !isTooSmall(node.size, minSize)) {
 | |
| 			result.push(new Group([node], []));
 | |
| 		} else {
 | |
| 			initialNodes.push(node);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (initialNodes.length > 0) {
 | |
| 		const initialGroup = new Group(initialNodes, getSimilarities(initialNodes));
 | |
| 
 | |
| 		/**
 | |
| 		 * @param {Group<T>} group group
 | |
| 		 * @param {Record<string, number>} consideredSize size of the group to consider
 | |
| 		 * @returns {boolean} true, if the group was modified
 | |
| 		 */
 | |
| 		const removeProblematicNodes = (group, consideredSize = group.size) => {
 | |
| 			const problemTypes = getTooSmallTypes(consideredSize, minSize);
 | |
| 			if (problemTypes.size > 0) {
 | |
| 				// We hit an edge case where the working set is already smaller than minSize
 | |
| 				// We merge problematic nodes with the smallest result node to keep minSize intact
 | |
| 				const problemNodes = group.popNodes(
 | |
| 					(n) => getNumberOfMatchingSizeTypes(n.size, problemTypes) > 0
 | |
| 				);
 | |
| 				if (problemNodes === undefined) return false;
 | |
| 				// Only merge it with result nodes that have the problematic size type
 | |
| 				const possibleResultGroups = result.filter(
 | |
| 					(n) => getNumberOfMatchingSizeTypes(n.size, problemTypes) > 0
 | |
| 				);
 | |
| 				if (possibleResultGroups.length > 0) {
 | |
| 					const bestGroup = possibleResultGroups.reduce((min, group) => {
 | |
| 						const minMatches = getNumberOfMatchingSizeTypes(min, problemTypes);
 | |
| 						const groupMatches = getNumberOfMatchingSizeTypes(
 | |
| 							group,
 | |
| 							problemTypes
 | |
| 						);
 | |
| 						if (minMatches !== groupMatches) {
 | |
| 							return minMatches < groupMatches ? group : min;
 | |
| 						}
 | |
| 						if (
 | |
| 							selectiveSizeSum(min.size, problemTypes) >
 | |
| 							selectiveSizeSum(group.size, problemTypes)
 | |
| 						) {
 | |
| 							return group;
 | |
| 						}
 | |
| 						return min;
 | |
| 					});
 | |
| 					for (const node of problemNodes) bestGroup.nodes.push(node);
 | |
| 					bestGroup.nodes.sort((a, b) => {
 | |
| 						if (a.key < b.key) return -1;
 | |
| 						if (a.key > b.key) return 1;
 | |
| 						return 0;
 | |
| 					});
 | |
| 				} else {
 | |
| 					// There are no other nodes with the same size types
 | |
| 					// We create a new group and have to accept that it's smaller than minSize
 | |
| 					result.push(new Group(problemNodes, null));
 | |
| 				}
 | |
| 				return true;
 | |
| 			}
 | |
| 			return false;
 | |
| 		};
 | |
| 
 | |
| 		if (initialGroup.nodes.length > 0) {
 | |
| 			const queue = [initialGroup];
 | |
| 
 | |
| 			while (queue.length) {
 | |
| 				const group = /** @type {Group<T>} */ (queue.pop());
 | |
| 				// only groups bigger than maxSize need to be splitted
 | |
| 				if (!isTooBig(group.size, maxSize)) {
 | |
| 					result.push(group);
 | |
| 					continue;
 | |
| 				}
 | |
| 				// If the group is already too small
 | |
| 				// we try to work only with the unproblematic nodes
 | |
| 				if (removeProblematicNodes(group)) {
 | |
| 					// This changed something, so we try this group again
 | |
| 					queue.push(group);
 | |
| 					continue;
 | |
| 				}
 | |
| 
 | |
| 				// find unsplittable area from left and right
 | |
| 				// going minSize from left and right
 | |
| 				// at least one node need to be included otherwise we get stuck
 | |
| 				let left = 1;
 | |
| 				const leftSize = Object.create(null);
 | |
| 				addSizeTo(leftSize, group.nodes[0].size);
 | |
| 				while (left < group.nodes.length && isTooSmall(leftSize, minSize)) {
 | |
| 					addSizeTo(leftSize, group.nodes[left].size);
 | |
| 					left++;
 | |
| 				}
 | |
| 				let right = group.nodes.length - 2;
 | |
| 				const rightSize = Object.create(null);
 | |
| 				addSizeTo(rightSize, group.nodes[group.nodes.length - 1].size);
 | |
| 				while (right >= 0 && isTooSmall(rightSize, minSize)) {
 | |
| 					addSizeTo(rightSize, group.nodes[right].size);
 | |
| 					right--;
 | |
| 				}
 | |
| 
 | |
| 				//      left v   v right
 | |
| 				// [ O O O ] O O O [ O O O ]
 | |
| 				// ^^^^^^^^^ leftSize
 | |
| 				//       rightSize ^^^^^^^^^
 | |
| 				// leftSize > minSize
 | |
| 				// rightSize > minSize
 | |
| 
 | |
| 				// Perfect split: [ O O O ] [ O O O ]
 | |
| 				//                right === left - 1
 | |
| 
 | |
| 				if (left - 1 > right) {
 | |
| 					// We try to remove some problematic nodes to "fix" that
 | |
| 					let prevSize;
 | |
| 					if (right < group.nodes.length - left) {
 | |
| 						subtractSizeFrom(rightSize, group.nodes[right + 1].size);
 | |
| 						prevSize = rightSize;
 | |
| 					} else {
 | |
| 						subtractSizeFrom(leftSize, group.nodes[left - 1].size);
 | |
| 						prevSize = leftSize;
 | |
| 					}
 | |
| 					if (removeProblematicNodes(group, prevSize)) {
 | |
| 						// This changed something, so we try this group again
 | |
| 						queue.push(group);
 | |
| 						continue;
 | |
| 					}
 | |
| 					// can't split group while holding minSize
 | |
| 					// because minSize is preferred of maxSize we return
 | |
| 					// the problematic nodes as result here even while it's too big
 | |
| 					// To avoid this make sure maxSize > minSize * 3
 | |
| 					result.push(group);
 | |
| 					continue;
 | |
| 				}
 | |
| 				if (left <= right) {
 | |
| 					// when there is a area between left and right
 | |
| 					// we look for best split point
 | |
| 					// we split at the minimum similarity
 | |
| 					// here key space is separated the most
 | |
| 					// But we also need to make sure to not create too small groups
 | |
| 					let best = -1;
 | |
| 					let bestSimilarity = Infinity;
 | |
| 					let pos = left;
 | |
| 					const rightSize = sumSize(group.nodes.slice(pos));
 | |
| 
 | |
| 					//       pos v   v right
 | |
| 					// [ O O O ] O O O [ O O O ]
 | |
| 					// ^^^^^^^^^ leftSize
 | |
| 					// rightSize ^^^^^^^^^^^^^^^
 | |
| 
 | |
| 					while (pos <= right + 1) {
 | |
| 						const similarity = /** @type {number[]} */ (group.similarities)[
 | |
| 							pos - 1
 | |
| 						];
 | |
| 						if (
 | |
| 							similarity < bestSimilarity &&
 | |
| 							!isTooSmall(leftSize, minSize) &&
 | |
| 							!isTooSmall(rightSize, minSize)
 | |
| 						) {
 | |
| 							best = pos;
 | |
| 							bestSimilarity = similarity;
 | |
| 						}
 | |
| 						addSizeTo(leftSize, group.nodes[pos].size);
 | |
| 						subtractSizeFrom(rightSize, group.nodes[pos].size);
 | |
| 						pos++;
 | |
| 					}
 | |
| 					if (best < 0) {
 | |
| 						// This can't happen
 | |
| 						// but if that assumption is wrong
 | |
| 						// fallback to a big group
 | |
| 						result.push(group);
 | |
| 						continue;
 | |
| 					}
 | |
| 					left = best;
 | |
| 					right = best - 1;
 | |
| 				}
 | |
| 
 | |
| 				// create two new groups for left and right area
 | |
| 				// and queue them up
 | |
| 				const rightNodes = [group.nodes[right + 1]];
 | |
| 				/** @type {number[]} */
 | |
| 				const rightSimilarities = [];
 | |
| 				for (let i = right + 2; i < group.nodes.length; i++) {
 | |
| 					rightSimilarities.push(
 | |
| 						/** @type {number[]} */ (group.similarities)[i - 1]
 | |
| 					);
 | |
| 					rightNodes.push(group.nodes[i]);
 | |
| 				}
 | |
| 				queue.push(new Group(rightNodes, rightSimilarities));
 | |
| 
 | |
| 				const leftNodes = [group.nodes[0]];
 | |
| 				/** @type {number[]} */
 | |
| 				const leftSimilarities = [];
 | |
| 				for (let i = 1; i < left; i++) {
 | |
| 					leftSimilarities.push(
 | |
| 						/** @type {number[]} */ (group.similarities)[i - 1]
 | |
| 					);
 | |
| 					leftNodes.push(group.nodes[i]);
 | |
| 				}
 | |
| 				queue.push(new Group(leftNodes, leftSimilarities));
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// lexically ordering
 | |
| 	result.sort((a, b) => {
 | |
| 		if (a.nodes[0].key < b.nodes[0].key) return -1;
 | |
| 		if (a.nodes[0].key > b.nodes[0].key) return 1;
 | |
| 		return 0;
 | |
| 	});
 | |
| 
 | |
| 	// give every group a name
 | |
| 	const usedNames = new Set();
 | |
| 	for (let i = 0; i < result.length; i++) {
 | |
| 		const group = result[i];
 | |
| 		if (group.nodes.length === 1) {
 | |
| 			group.key = group.nodes[0].key;
 | |
| 		} else {
 | |
| 			const first = group.nodes[0];
 | |
| 			const last = group.nodes[group.nodes.length - 1];
 | |
| 			const name = getName(first.key, last.key, usedNames);
 | |
| 			group.key = name;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// return the results
 | |
| 	return result.map(
 | |
| 		(group) =>
 | |
| 			/** @type {GroupedItems<T>} */
 | |
| 			({
 | |
| 				key: group.key,
 | |
| 				items: group.nodes.map((node) => node.item),
 | |
| 				size: group.size
 | |
| 			})
 | |
| 	);
 | |
| };
 |