Use custom radix sort for ordering blocks (#1277)

* Use custom radix sort for ordering blocks

* Licenses :P

* Fix ordering, add test for it

* Grant Gradle more memory (thx Fabric)

* Increase (?) / Specify memory for core tests

* Many attempts at fixing OOM

(cherry picked from commit bb013aa89b8457c4649e2546eef0aa94b9fc4020)
This commit is contained in:
Octavia Togami 2020-03-31 21:13:32 -04:00 committed by MattBDev
parent 374ad992a2
commit 735a37ffd0
8 changed files with 389 additions and 13 deletions

View File

@ -2,12 +2,28 @@ import java.util.Properties
plugins {
`kotlin-dsl`
kotlin("jvm") version "1.3.61"
kotlin("jvm") version embeddedKotlinVersion
}
repositories {
jcenter()
gradlePluginPortal()
maven {
name = "Forge Maven"
url = uri("https://files.minecraftforge.net/maven")
}
maven {
name = "Fabric"
url = uri("https://maven.fabricmc.net/")
}
maven {
name = "sponge"
url = uri("https://repo.spongepowered.org/maven")
}
maven {
name = "EngineHub Repository"
url = uri("https://maven.enginehub.org/repo/")
}
}
configurations.all {
@ -31,9 +47,16 @@ val mixinVersion: String = properties.getProperty("mixin.version")
dependencies {
implementation(gradleApi())
implementation("gradle.plugin.net.minecrell:licenser:0.4.1")
implementation("org.ajoberstar.grgit:grgit-gradle:3.1.1")
implementation("com.github.jengelman.gradle.plugins:shadow:5.1.0")
implementation("net.ltgt.apt-eclipse:net.ltgt.apt-eclipse.gradle.plugin:0.21")
implementation("net.ltgt.apt-idea:net.ltgt.apt-idea.gradle.plugin:0.21")
implementation("org.jfrog.buildinfo:build-info-extractor-gradle:4.9.7")
implementation("gradle.plugin.org.spongepowered:spongegradle:0.9.0")
implementation("net.minecraftforge.gradle:ForgeGradle:3.0.168")
implementation("net.fabricmc:fabric-loom:$loomVersion")
implementation("net.fabricmc:sponge-mixin:$mixinVersion")
implementation("gradle.plugin.com.mendhak.gradlecrowdin:plugin:0.1.0")
implementation("org.enginehub.gradle:gradle-codecov-plugin:0.1.0")
}

View File

@ -1,12 +1,12 @@
import org.gradle.api.Project
object Versions {
const val TEXT = "3.0.1"
const val TEXT_EXTRAS = "3.0.2"
const val TEXT = "3.0.3"
const val TEXT_EXTRAS = "3.0.3"
const val PISTON = "0.5.2"
const val AUTO_VALUE = "1.6.5"
const val JUNIT = "5.5.0"
const val MOCKITO = "3.0.0"
const val AUTO_VALUE = "1.7"
const val JUNIT = "5.6.1"
const val MOCKITO = "3.3.3"
const val LOGBACK = "1.2.3"
}

View File

@ -1,11 +1,11 @@
group=com.sk89q.worldedit
#version=7.1.0-SNAPSHOT
version=7.2.0-SNAPSHOT
org.gradle.jvmargs=-Xmx1G
org.gradle.jvmargs=-Xmx1512M
org.gradle.daemon=true
org.gradle.configureondemand=true
org.gradle.parallel=true
org.gradle.caching=true
loom.version=0.2.6-20191213.183106-50
loom.version=0.2.6-20200124.104118-60
mixin.version=0.8+build.17

View File

@ -61,6 +61,10 @@ dependencies {
implementation(kotlin("stdlib-jdk8", "1.3.61"))
}
tasks.named<Test>("test") {
maxHeapSize = "1G"
}
tasks.withType<JavaCompile>().configureEach {
dependsOn(":worldedit-libs:build")
options.compilerArgs.add("-Aarg.name.key.prefix=")

View File

@ -25,15 +25,15 @@ import com.sk89q.worldedit.extent.AbstractBufferingExtent;
import com.sk89q.worldedit.extent.Extent;
import com.sk89q.worldedit.function.operation.Operation;
import com.sk89q.worldedit.function.operation.RunContext;
import com.sk89q.worldedit.internal.util.RegionOptimizedVectorSorter;
import com.sk89q.worldedit.math.BlockVector3;
import com.sk89q.worldedit.math.RegionOptimizedComparator;
import com.sk89q.worldedit.util.collection.BlockMap;
import com.sk89q.worldedit.world.block.BaseBlock;
import com.sk89q.worldedit.world.block.BlockStateHolder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
/**
* A special extent that batches changes into Minecraft chunks. This helps
@ -94,7 +94,9 @@ public class ChunkBatchingExtent extends AbstractBufferingExtent {
@Override
public Operation resume(RunContext run) throws WorldEditException {
if (iterator == null) {
iterator = blockMap.keySet().parallelStream().sorted(RegionOptimizedComparator.INSTANCE).iterator();
List<BlockVector3> blockVectors = new ArrayList<>(blockMap.keySet());
RegionOptimizedVectorSorter.sort(blockVectors);
iterator = blockVectors.iterator();
}
while (iterator.hasNext()) {
BlockVector3 position = iterator.next();

View File

@ -0,0 +1,216 @@
/*
* WorldEdit, a Minecraft world manipulation toolkit
* Copyright (C) sk89q <http://www.sk89q.com>
* Copyright (C) WorldEdit team and contributors
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.sk89q.worldedit.internal.util;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.sk89q.worldedit.math.BlockVector3;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.ListIterator;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import static com.sk89q.worldedit.math.BitMath.mask;
/**
* Uses a radix sort to order vectors by region, then chunk, then Y value (max -> min).
*/
public class RegionOptimizedVectorSorter {
// We need to sort by region file, chunk, and Y (x/z don't really matter)
// Due to MC having x/z axes of only 60,000,000 blocks, the max value is <=26 bits.
// We can store the chunk in 4 bits less, 22 bits; and region in 5 bits less than that, 17 bits
// If we share the region + chunk bits, we can make a radix key that is really 5 parts:
// [region X (17)][region Z (17)][chunk X (5)][chunk Z (5)][block Y (20)] = 64 bits
// Even though we only normally need 8 bits for Y, we might as well use it for cubic chunks
// compatibility in the future, since we have the room in the long value
private static final int CHUNK_Z_SHIFT = 20;
private static final int CHUNK_X_SHIFT = 5 + CHUNK_Z_SHIFT;
private static final int REGION_Z_SHIFT = 5 + CHUNK_X_SHIFT;
private static final int REGION_X_SHIFT = 17 + REGION_Z_SHIFT;
private static final long REGION_X_MASK = ((long) mask(17)) << REGION_X_SHIFT;
private static final long REGION_Z_MASK = ((long) mask(17)) << REGION_Z_SHIFT;
private static final long CHUNK_X_MASK = ((long) mask(5)) << CHUNK_X_SHIFT;
private static final long CHUNK_Z_MASK = ((long) mask(5)) << CHUNK_Z_SHIFT;
private static final int Y_MAX = mask(20);
// We flip the region x/z sign to turn signed numbers into unsigned ones
// this allows us to sort on the raw bits, and not care about signs
// Essentially it transforms [negative values][positive values]
// to [positive value][even more positive values], i.e. a shift upwards
private static final long FLIP_REGION_X_SIGN = 0x1_00_00L << REGION_X_SHIFT;
private static final long FLIP_REGION_Z_SIGN = 0x1_00_00L << REGION_Z_SHIFT;
private static long key(BlockVector3 elem) {
long x = elem.getX();
long z = elem.getZ();
return (((x << (REGION_X_SHIFT - 9)) & REGION_X_MASK) ^ FLIP_REGION_X_SIGN)
| (((z << (REGION_Z_SHIFT - 9)) & REGION_Z_MASK) ^ FLIP_REGION_Z_SIGN)
| ((x << (CHUNK_X_SHIFT - 4)) & CHUNK_X_MASK)
| ((z << (CHUNK_Z_SHIFT - 4)) & CHUNK_Z_MASK)
| (Y_MAX - elem.getY());
}
private static final int NUMBER_OF_BITS = 64;
private static final int BITS_PER_SORT = 16;
private static final int MAX_FOR_BPS = 1 << BITS_PER_SORT;
private static final int MASK_FOR_BPS = (1 << BITS_PER_SORT) - 1;
private static final int NUMBER_OF_SORTS = NUMBER_OF_BITS / BITS_PER_SORT;
private static final int NUMBER_OF_CORES = Runtime.getRuntime().availableProcessors();
static int PARALLELISM_THRESHOLD;
static {
if (NUMBER_OF_CORES == 1) {
// don't even bother
PARALLELISM_THRESHOLD = Integer.MAX_VALUE;
} else {
// Determined via benchmarking serial vs. parallel.
// Didn't try anything more fine-grained that increments of 100,000.
PARALLELISM_THRESHOLD = 200000;
}
}
private static final ExecutorService SORT_SVC = Executors.newFixedThreadPool(
Runtime.getRuntime().availableProcessors(),
new ThreadFactoryBuilder()
.setDaemon(true)
.setNameFormat("worldedit-sort-svc-%d")
.build()
);
public static void sort(List<BlockVector3> vectors) {
sort(vectors.size() >= PARALLELISM_THRESHOLD, vectors);
}
/**
* For test purposes, or if you want to finely control when parallelism occurs.
*
* <p>
* {@code vectors} must be mutable, and will be sorted after this method returns.
* </p>
*
* @param parallel {@code true} to sort in parallel
* @param vectors the vectors to sort
*/
public static void sort(boolean parallel, List<BlockVector3> vectors) {
// Currently we don't do an in-place radix sort, but we could in the future.
int size = vectors.size();
// take care of some easy cases
if (size == 0 || size == 1) {
return;
}
BlockVector3[] source = vectors.toArray(new BlockVector3[0]);
BlockVector3[] sorted = new BlockVector3[size];
source = !parallel
? serialSort(source, size, sorted)
: parallelSort(source, size, sorted);
ListIterator<BlockVector3> it = vectors.listIterator();
for (BlockVector3 blockVector3 : source) {
it.next();
it.set(blockVector3);
}
}
private static BlockVector3[] parallelSort(BlockVector3[] source, int size, BlockVector3[] sorted) {
int[][] counts = new int[NUMBER_OF_CORES][MAX_FOR_BPS];
int[] finalCounts = new int[MAX_FOR_BPS];
int[] keys = new int[size];
List<Future<int[]>> tasks = new ArrayList<>(NUMBER_OF_CORES);
int kStep = (size + NUMBER_OF_CORES - 1) / NUMBER_OF_CORES;
for (int p = 0; p < NUMBER_OF_SORTS; p++) {
BlockVector3[] currentSource = source;
int shift = BITS_PER_SORT * p;
for (int c = 0; c < NUMBER_OF_CORES; c++) {
int[] localCounts = counts[c];
int kStart = kStep * c;
int kEnd = Math.min(kStart + kStep, size);
tasks.add(SORT_SVC.submit(() -> {
for (int i = kStart; i < kEnd; i++) {
int k = ((int) (key(currentSource[i]) >>> shift) & MASK_FOR_BPS);
keys[i] = k;
localCounts[k]++;
}
return localCounts;
}));
}
for (Future<int[]> task : tasks) {
try {
task.get();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IllegalStateException(e);
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
}
for (int c = 0; c < NUMBER_OF_CORES; c++) {
int[] localCounts = counts[c];
for (int i = 0; i < MAX_FOR_BPS; i++) {
finalCounts[i] += localCounts[i];
localCounts[i] = 0;
}
}
tasks.clear();
copyByCounts(size, source, sorted, keys, finalCounts);
BlockVector3[] temp = source;
source = sorted;
sorted = temp;
}
// after the loop returns, source is now the final sorted array!
return source;
}
private static BlockVector3[] serialSort(BlockVector3[] source, int size, BlockVector3[] sorted) {
int[] counts = new int[MAX_FOR_BPS];
int[] keys = new int[size];
for (int p = 0; p < NUMBER_OF_SORTS; p++) {
for (int i = 0; i < size; i++) {
int k = ((int) (key(source[i]) >>> (BITS_PER_SORT * p)) & MASK_FOR_BPS);
keys[i] = k;
counts[k]++;
}
copyByCounts(size, source, sorted, keys, counts);
BlockVector3[] temp = source;
source = sorted;
sorted = temp;
}
// after the loop returns, source is now the final sorted array!
return source;
}
private static void copyByCounts(int size, BlockVector3[] source, BlockVector3[] sorted, int[] keys, int[] finalCounts) {
int lastCount = finalCounts[0];
for (int i = 1; i < MAX_FOR_BPS; i++) {
lastCount = (finalCounts[i] += lastCount);
}
for (int i = size - 1; i >= 0; i--) {
int key = keys[i];
int count = --finalCounts[key];
sorted[count] = source[i];
}
Arrays.fill(finalCounts, 0);
}
private RegionOptimizedVectorSorter() {
}
}

View File

@ -0,0 +1,131 @@
/*
* WorldEdit, a Minecraft world manipulation toolkit
* Copyright (C) sk89q <http://www.sk89q.com>
* Copyright (C) WorldEdit team and contributors
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.sk89q.worldedit.internal.util;
import com.google.common.collect.Lists;
import com.sk89q.worldedit.math.BlockVector3;
import com.sk89q.worldedit.regions.CuboidRegion;
import it.unimi.dsi.fastutil.ints.IntRBTreeSet;
import it.unimi.dsi.fastutil.ints.IntSortedSet;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;
/**
* Verifies that {@link RegionOptimizedVectorSorter} sorts properly.
*/
public class RegionOptimizedVectorSorterTest {
/**
* Find factors, smallest to biggest.
*
* @param num the number to find factors of
* @return the factors from smallest to biggest
*/
private static IntSortedSet findFactors(int num) {
IntSortedSet factors = new IntRBTreeSet();
// Skip two if the number is odd
int incrementer = num % 2 == 0 ? 1 : 2;
for (int i = 1; i <= Math.sqrt(num); i += incrementer) {
// If there is no remainder, then the number is a factor.
if (num % i == 0) {
factors.add(i);
factors.add(num / i);
}
}
return factors;
}
@ParameterizedTest(
name = "size={0}"
)
@ValueSource(ints = {
0, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000, 10_000_000
})
void checkSorted(int size) {
Random rng = new Random(size);
List<BlockVector3> toSort;
if (size == 0) {
toSort = new ArrayList<>();
} else {
IntSortedSet factors = findFactors(size);
// take the middle factors
int x = factors.toIntArray()[factors.size() / 2];
int z = size / x;
int minX = x / 2;
int maxX = minX + x % 2;
int minZ = z / 2;
int maxZ = minZ + z % 2;
toSort = Lists.newArrayList(new CuboidRegion(
BlockVector3.at(-minX, 0, -minZ), BlockVector3.at(maxX - 1, 0, maxZ - 1)
));
}
assertEquals(size, toSort.size());
Collections.shuffle(toSort, rng);
RegionOptimizedVectorSorter.sort(toSort);
for (int i = 0; i < toSort.size() - 1; i++) {
BlockVector3 curr = toSort.get(i);
BlockVector3 next = toSort.get(i + 1);
int currChunkX = curr.getX() >> 4;
int nextChunkX = next.getX() >> 4;
int currChunkZ = curr.getZ() >> 4;
int nextChunkZ = next.getZ() >> 4;
int currRegionX = currChunkX >> 5;
int nextRegionX = nextChunkX >> 5;
int currRegionZ = currChunkZ >> 5;
int nextRegionZ = nextChunkZ >> 5;
String spaceship = "(" + curr + " <=> " + next + ")";
if (currRegionX > nextRegionX) {
fail(spaceship + " "
+ currRegionX + " region x should be less than or equal to " + nextRegionX);
} else if (currRegionX == nextRegionX) {
if (currRegionZ > nextRegionZ) {
fail(spaceship + " "
+ currRegionZ + " region z should be less than or equal to " + nextRegionZ);
} else if (currRegionZ == nextRegionZ) {
if (currChunkX > nextChunkX) {
fail(spaceship + " "
+ currChunkX + " chunk x should be less than or equal to " + nextChunkX);
} else if (currChunkX == nextChunkX) {
if (currChunkZ > nextChunkZ) {
fail(spaceship + " "
+ currChunkZ + " chunk z should be less than or equal to " + nextChunkZ);
} else if (currChunkZ == nextChunkZ) {
if (curr.getY() < next.getY()) {
fail(spaceship + " "
+ curr + " y should be greater than or equal to " + next);
}
}
}
}
}
}
}
}

View File

@ -1,7 +1,7 @@
import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar
import net.minecraftforge.gradle.common.util.RunConfig
import net.minecraftforge.gradle.userdev.UserDevExtension
import net.minecraftforge.gradle.userdev.tasks.GenerateSRG
import net.minecraftforge.gradle.mcp.task.GenerateSRG
import net.minecraftforge.gradle.userdev.tasks.RenameJarInPlace
plugins {