split fresh packs by time

add comments

fix loosing cache

correctly store a lazy
This commit is contained in:
Tobias Koppers 2021-09-08 13:45:27 +02:00
parent 200ce66945
commit 3270275ffe
3 changed files with 182 additions and 58 deletions

View File

@ -76,7 +76,9 @@ makeSerializable(
const MIN_CONTENT_SIZE = 1024 * 1024; // 1 MB
const CONTENT_COUNT_TO_MERGE = 10;
const MIN_ITEMS_IN_FRESH_PACK = 100;
const MAX_ITEMS_IN_FRESH_PACK = 50000;
const MAX_TIME_IN_FRESH_PACK = 1 * 60 * 1000; // 1 min
class PackItemInfo {
/**
@ -99,6 +101,7 @@ class Pack {
this.itemInfo = new Map();
/** @type {string[]} */
this.requests = [];
this.requestsTimeout = undefined;
/** @type {Map<string, PackItemInfo>} */
this.freshContent = new Map();
/** @type {(undefined | PackContent)[]} */
@ -108,6 +111,16 @@ class Pack {
this.maxAge = maxAge;
}
_addRequest(identifier) {
this.requests.push(identifier);
if (this.requestsTimeout === undefined) {
this.requestsTimeout = setTimeout(() => {
this.requests.push(undefined);
this.requestsTimeout = undefined;
}, MAX_TIME_IN_FRESH_PACK);
}
}
/**
* @param {string} identifier unique name for the resource
* @param {string | null} etag etag of the resource
@ -115,7 +128,7 @@ class Pack {
*/
get(identifier, etag) {
const info = this.itemInfo.get(identifier);
this.requests.push(identifier);
this._addRequest(identifier);
if (info === undefined) {
return undefined;
}
@ -147,12 +160,12 @@ class Pack {
if (info === undefined) {
const newInfo = new PackItemInfo(identifier, etag, data);
this.itemInfo.set(identifier, newInfo);
this.requests.push(identifier);
this._addRequest(identifier);
this.freshContent.set(identifier, newInfo);
} else {
const loc = info.location;
if (loc >= 0) {
this.requests.push(identifier);
this._addRequest(identifier);
this.freshContent.set(identifier, info);
const content = this.content[loc];
content.delete(identifier);
@ -220,27 +233,39 @@ class Pack {
}
_persistFreshContent() {
if (this.freshContent.size > 0) {
const packCount = Math.ceil(
this.freshContent.size / MAX_ITEMS_IN_FRESH_PACK
);
const itemsPerPack = Math.ceil(this.freshContent.size / packCount);
this.logger.log(`${this.freshContent.size} fresh items in cache`);
const packs = Array.from({ length: packCount }, () => {
const itemsCount = this.freshContent.size;
if (itemsCount > 0) {
const packCount = Math.ceil(itemsCount / MAX_ITEMS_IN_FRESH_PACK);
const itemsPerPack = Math.ceil(itemsCount / packCount);
const packs = [];
let i = 0;
let ignoreNextTimeTick = false;
const createNextPack = () => {
const loc = this._findLocation();
this.content[loc] = null; // reserve
return {
const pack = {
/** @type {Set<string>} */
items: new Set(),
/** @type {Map<string, any>} */
map: new Map(),
loc
};
});
let i = 0;
let pack = packs[0];
let packIndex = 0;
packs.push(pack);
return pack;
};
let pack = createNextPack();
if (this.requestsTimeout !== undefined)
clearTimeout(this.requestsTimeout);
for (const identifier of this.requests) {
if (identifier === undefined) {
if (ignoreNextTimeTick) {
ignoreNextTimeTick = false;
} else if (pack.items.size >= MIN_ITEMS_IN_FRESH_PACK) {
i = 0;
pack = createNextPack();
}
continue;
}
const info = this.freshContent.get(identifier);
if (info === undefined) continue;
pack.items.add(identifier);
@ -250,9 +275,11 @@ class Pack {
this.freshContent.delete(identifier);
if (++i > itemsPerPack) {
i = 0;
pack = packs[++packIndex];
pack = createNextPack();
ignoreNextTimeTick = true;
}
}
this.requests.length = 0;
for (const pack of packs) {
this.content[pack.loc] = new PackContent(
pack.items,
@ -260,6 +287,15 @@ class Pack {
new PackContentItems(pack.map)
);
}
this.logger.log(
`${itemsCount} fresh items in cache put into pack ${
packs.length > 1
? packs
.map(pack => `${pack.loc} (${pack.items.size} items)`)
.join(", ")
: packs[0].loc
}`
);
}
}
@ -332,7 +368,9 @@ class Pack {
addToMergedMap.push(async map => {
// unpack existing content
// after that values are accessible in .content
await content.unpack();
await content.unpack(
"it should be merged with other small pack contents"
);
for (const [identifier, value] of content.content) {
map.set(identifier, value);
}
@ -392,7 +430,9 @@ class Pack {
usedItems,
new Set(usedItems),
async () => {
await content.unpack();
await content.unpack(
"it should be splitted into used and unused items"
);
const map = new Map();
for (const identifier of usedItems) {
map.set(identifier, content.content.get(identifier));
@ -417,7 +457,9 @@ class Pack {
unusedItems,
usedOfUnusedItems,
async () => {
await content.unpack();
await content.unpack(
"it should be splitted into used and unused items"
);
const map = new Map();
for (const identifier of unusedItems) {
map.set(identifier, content.content.get(identifier));
@ -466,7 +508,9 @@ class Pack {
this.content[loc] =
items.size > 0
? new PackContent(items, usedItems, async () => {
await content.unpack();
await content.unpack(
"it contains old items that should be garbage collected"
);
const map = new Map();
for (const identifier of items) {
map.set(identifier, content.content.get(identifier));
@ -496,7 +540,7 @@ class Pack {
const content = this.content[i];
if (content !== undefined) {
write(content.items);
writeSeparate(content.getLazyContentItems(), { name: `${i}` });
content.writeLazy(lazy => writeSeparate(lazy, { name: `${i}` }));
} else {
write(undefined); // undefined marks an empty content slot
}
@ -669,6 +713,25 @@ makeSerializable(
);
class PackContent {
/*
This class can be in these states:
| this.lazy | this.content | this.outdated | state
A1 | undefined | Map | false | fresh content
A2 | undefined | Map | true | (will not happen)
B1 | lazy () => {} | undefined | false | not deserialized
B2 | lazy () => {} | undefined | true | not deserialized, but some items has been removed
C1 | lazy* () => {} | Map | false | deserialized
C2 | lazy* () => {} | Map | true | deserialized, and some items has been removed
this.used is a subset of this.items.
this.items is a subset of this.content.keys() resp. this.lazy().map.keys()
When this.outdated === false, this.items === this.content.keys() resp. this.lazy().map.keys()
When this.outdated === true, this.items should be used to recreated this.lazy/this.content.
When this.lazy and this.content is set, they contain the same data.
this.get must only be called with a valid item from this.items.
In state C this.lazy is unMemoized
*/
/**
* @param {Set<string>} items keys
* @param {Set<string>} usedItems used keys
@ -678,7 +741,7 @@ class PackContent {
*/
constructor(items, usedItems, dataOrFn, logger, lazyName) {
this.items = items;
/** @type {function(): Promise<PackContentItems> | PackContentItems } */
/** @type {function(): Promise<PackContentItems> | PackContentItems} */
this.lazy = typeof dataOrFn === "function" ? dataOrFn : undefined;
/** @type {Map<string, any>} */
this.content = typeof dataOrFn === "function" ? undefined : dataOrFn.map;
@ -693,6 +756,8 @@ class PackContent {
if (this.content) {
return this.content.get(identifier);
}
// We are in state B
const { lazyName } = this;
let timeMessage;
if (lazyName) {
@ -715,6 +780,7 @@ class PackContent {
if (timeMessage) {
this.logger.timeEnd(timeMessage);
}
// Move to state C
this.content = map;
this.lazy = SerializerMiddleware.unMemoizeLazy(this.lazy);
return map.get(identifier);
@ -724,6 +790,7 @@ class PackContent {
if (timeMessage) {
this.logger.timeEnd(timeMessage);
}
// Move to state C
this.content = map;
this.lazy = SerializerMiddleware.unMemoizeLazy(this.lazy);
return map.get(identifier);
@ -731,10 +798,13 @@ class PackContent {
}
/**
* @param {string} reason explanation why unpack is necessary
* @returns {void | Promise} maybe a promise if lazy
*/
unpack() {
unpack(reason) {
if (this.content) return;
// Move from state B to C
if (this.lazy) {
const { lazyName } = this;
let timeMessage;
@ -744,6 +814,11 @@ class PackContent {
timeMessage = `unpack cache content ${lazyName} (${formatSize(
this.getSize()
)})`;
this.logger.log(
`starting to unpack cache content ${lazyName} (${formatSize(
this.getSize()
)}) because ${reason}`
);
this.logger.time(timeMessage);
}
const value = this.lazy();
@ -782,48 +857,93 @@ class PackContent {
}
/**
* @returns {function(): PackContentItems | Promise<PackContentItems>} lazy content items
* @template T
* @param {function(any): function(): Promise<PackContentItems> | PackContentItems} write write function
* @returns {void}
*/
getLazyContentItems() {
if (!this.outdated && this.lazy) return this.lazy;
writeLazy(write) {
if (!this.outdated && this.lazy) {
// State B1 or C1
// this.lazy is still the valid deserialized version
write(this.lazy);
return;
}
if (!this.outdated && this.content) {
// State A1
const map = new Map(this.content);
return (this.lazy = memoize(() => new PackContentItems(map)));
// Move to state C1
this.lazy = SerializerMiddleware.unMemoizeLazy(
write(() => new PackContentItems(map))
);
return;
}
this.outdated = false;
if (this.content) {
return (this.lazy = memoize(() => {
/** @type {Map<string, any>} */
const map = new Map();
for (const item of this.items) {
map.set(item, this.content.get(item));
}
return new PackContentItems(map);
}));
// State A2 or C2
/** @type {Map<string, any>} */
const map = new Map();
for (const item of this.items) {
map.set(item, this.content.get(item));
}
// Move to state C1
this.outdated = false;
this.content = map;
this.lazy = SerializerMiddleware.unMemoizeLazy(
write(() => new PackContentItems(map))
);
return;
}
const lazy = this.lazy;
return (this.lazy = () => {
const value = lazy();
if (value instanceof Promise) {
return value.then(data => {
// State B2
const { lazyName } = this;
let timeMessage;
if (lazyName) {
// only log once
this.lazyName = undefined;
timeMessage = `unpack cache content ${lazyName} (${formatSize(
this.getSize()
)})`;
this.logger.log(
`starting to unpack cache content ${lazyName} (${formatSize(
this.getSize()
)}) because it's outdated and need to be serialized`
);
this.logger.time(timeMessage);
}
const value = this.lazy();
this.outdated = false;
if (value instanceof Promise) {
// Move to state B1
this.lazy = write(() =>
value.then(data => {
if (timeMessage) {
this.logger.timeEnd(timeMessage);
}
const oldMap = data.map;
/** @type {Map<string, any>} */
const map = new Map();
for (const item of this.items) {
map.set(item, oldMap.get(item));
}
// Move to state C1 (or maybe C2)
this.content = map;
this.lazy = SerializerMiddleware.unMemoizeLazy(this.lazy);
return new PackContentItems(map);
});
} else {
const oldMap = value.map;
/** @type {Map<string, any>} */
const map = new Map();
for (const item of this.items) {
map.set(item, oldMap.get(item));
}
return new PackContentItems(map);
})
);
} else {
// Move to state C1
if (timeMessage) {
this.logger.timeEnd(timeMessage);
}
});
const oldMap = value.map;
/** @type {Map<string, any>} */
const map = new Map();
for (const item of this.items) {
map.set(item, oldMap.get(item));
}
this.content = map;
this.lazy = write(() => new PackContentItems(map));
}
}
}
@ -1110,10 +1230,10 @@ class PackFileCacheStrategy {
const packPromise = this.packPromise;
if (packPromise === undefined) return Promise.resolve();
const reportProgress = ProgressPlugin.getReporter(this.compiler);
this.packPromise = undefined;
return (this.storePromise = packPromise
.then(pack => {
if (!pack.invalid) return;
this.packPromise = undefined;
this.logger.log(`Storing pack...`);
let promise;
const newBuildDependencies = new Set();

View File

@ -510,11 +510,11 @@ class ObjectMiddleware extends SerializerMiddleware {
} else if (SerializerMiddleware.isLazy(item, this)) {
throw new Error("Not implemented");
} else {
result.push(
SerializerMiddleware.serializeLazy(item, data =>
this.serialize([data], context)
)
const data = SerializerMiddleware.serializeLazy(item, data =>
this.serialize([data], context)
);
SerializerMiddleware.setLazySerializedValue(item, data);
result.push(data);
}
} else if (item === undefined) {
result.push(ESCAPE, ESCAPE_UNDEFINED);

View File

@ -111,9 +111,13 @@ module.exports = {
);
};
context.writeSeparate = (value, options) => {
context.write(
SerializerMiddleware.createLazy(value, fileMiddleware, options)
const lazy = SerializerMiddleware.createLazy(
value,
fileMiddleware,
options
);
context.write(lazy);
return lazy;
};
}
}),