Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/java/util/ArrayPrefixHelpers.java
38829 views
/*1* Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/24package java.util;2526/*27* Written by Doug Lea with assistance from members of JCP JSR-16628* Expert Group and released to the public domain, as explained at29* http://creativecommons.org/publicdomain/zero/1.0/30*/3132import java.util.concurrent.ForkJoinPool;33import java.util.concurrent.CountedCompleter;34import java.util.function.BinaryOperator;35import java.util.function.IntBinaryOperator;36import java.util.function.LongBinaryOperator;37import java.util.function.DoubleBinaryOperator;3839/**40* ForkJoin tasks to perform Arrays.parallelPrefix operations.41*42* @author Doug Lea43* @since 1.844*/45class ArrayPrefixHelpers {46private ArrayPrefixHelpers() {}; // non-instantiable4748/*49* Parallel prefix (aka cumulate, scan) task classes50* are based loosely on Guy Blelloch's original51* algorithm (http://www.cs.cmu.edu/~scandal/alg/scan.html):52* Keep dividing by two to threshold segment size, and then:53* Pass 1: Create tree of partial sums for each segment54* Pass 2: For each segment, cumulate with offset of left sibling55*56* This version improves performance within FJ framework mainly by57* allowing the second pass of ready left-hand sides to proceed58* even if some right-hand side first passes are still executing.59* It also combines first and second pass for leftmost segment,60* and skips the first pass for rightmost segment (whose result is61* not needed for second pass). It similarly manages to avoid62* requiring that users supply an identity basis for accumulations63* by tracking those segments/subtasks for which the first64* existing element is used as base.65*66* Managing this relies on ORing some bits in the pendingCount for67* phases/states: CUMULATE, SUMMED, and FINISHED. CUMULATE is the68* main phase bit. When false, segments compute only their sum.69* When true, they cumulate array elements. CUMULATE is set at70* root at beginning of second pass and then propagated down. But71* it may also be set earlier for subtrees with lo==0 (the left72* spine of tree). SUMMED is a one bit join count. For leafs, it73* is set when summed. For internal nodes, it becomes true when74* one child is summed. When the second child finishes summing,75* we then moves up tree to trigger the cumulate phase. FINISHED76* is also a one bit join count. For leafs, it is set when77* cumulated. For internal nodes, it becomes true when one child78* is cumulated. When the second child finishes cumulating, it79* then moves up tree, completing at the root.80*81* To better exploit locality and reduce overhead, the compute82* method loops starting with the current task, moving if possible83* to one of its subtasks rather than forking.84*85* As usual for this sort of utility, there are 4 versions, that86* are simple copy/paste/adapt variants of each other. (The87* double and int versions differ from long version soley by88* replacing "long" (with case-matching)).89*/9091// see above92static final int CUMULATE = 1;93static final int SUMMED = 2;94static final int FINISHED = 4;9596/** The smallest subtask array partition size to use as threshold */97static final int MIN_PARTITION = 16;9899static final class CumulateTask<T> extends CountedCompleter<Void> {100final T[] array;101final BinaryOperator<T> function;102CumulateTask<T> left, right;103T in, out;104final int lo, hi, origin, fence, threshold;105106/** Root task constructor */107public CumulateTask(CumulateTask<T> parent,108BinaryOperator<T> function,109T[] array, int lo, int hi) {110super(parent);111this.function = function; this.array = array;112this.lo = this.origin = lo; this.hi = this.fence = hi;113int p;114this.threshold =115(p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))116<= MIN_PARTITION ? MIN_PARTITION : p;117}118119/** Subtask constructor */120CumulateTask(CumulateTask<T> parent, BinaryOperator<T> function,121T[] array, int origin, int fence, int threshold,122int lo, int hi) {123super(parent);124this.function = function; this.array = array;125this.origin = origin; this.fence = fence;126this.threshold = threshold;127this.lo = lo; this.hi = hi;128}129130@SuppressWarnings("unchecked")131public final void compute() {132final BinaryOperator<T> fn;133final T[] a;134if ((fn = this.function) == null || (a = this.array) == null)135throw new NullPointerException(); // hoist checks136int th = threshold, org = origin, fnc = fence, l, h;137CumulateTask<T> t = this;138outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {139if (h - l > th) {140CumulateTask<T> lt = t.left, rt = t.right, f;141if (lt == null) { // first pass142int mid = (l + h) >>> 1;143f = rt = t.right =144new CumulateTask<T>(t, fn, a, org, fnc, th, mid, h);145t = lt = t.left =146new CumulateTask<T>(t, fn, a, org, fnc, th, l, mid);147}148else { // possibly refork149T pin = t.in;150lt.in = pin;151f = t = null;152if (rt != null) {153T lout = lt.out;154rt.in = (l == org ? lout :155fn.apply(pin, lout));156for (int c;;) {157if (((c = rt.getPendingCount()) & CUMULATE) != 0)158break;159if (rt.compareAndSetPendingCount(c, c|CUMULATE)){160t = rt;161break;162}163}164}165for (int c;;) {166if (((c = lt.getPendingCount()) & CUMULATE) != 0)167break;168if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {169if (t != null)170f = t;171t = lt;172break;173}174}175if (t == null)176break;177}178if (f != null)179f.fork();180}181else {182int state; // Transition to sum, cumulate, or both183for (int b;;) {184if (((b = t.getPendingCount()) & FINISHED) != 0)185break outer; // already done186state = ((b & CUMULATE) != 0? FINISHED :187(l > org) ? SUMMED : (SUMMED|FINISHED));188if (t.compareAndSetPendingCount(b, b|state))189break;190}191192T sum;193if (state != SUMMED) {194int first;195if (l == org) { // leftmost; no in196sum = a[org];197first = org + 1;198}199else {200sum = t.in;201first = l;202}203for (int i = first; i < h; ++i) // cumulate204a[i] = sum = fn.apply(sum, a[i]);205}206else if (h < fnc) { // skip rightmost207sum = a[l];208for (int i = l + 1; i < h; ++i) // sum only209sum = fn.apply(sum, a[i]);210}211else212sum = t.in;213t.out = sum;214for (CumulateTask<T> par;;) { // propagate215if ((par = (CumulateTask<T>)t.getCompleter()) == null) {216if ((state & FINISHED) != 0) // enable join217t.quietlyComplete();218break outer;219}220int b = par.getPendingCount();221if ((b & state & FINISHED) != 0)222t = par; // both done223else if ((b & state & SUMMED) != 0) { // both summed224int nextState; CumulateTask<T> lt, rt;225if ((lt = par.left) != null &&226(rt = par.right) != null) {227T lout = lt.out;228par.out = (rt.hi == fnc ? lout :229fn.apply(lout, rt.out));230}231int refork = (((b & CUMULATE) == 0 &&232par.lo == org) ? CUMULATE : 0);233if ((nextState = b|state|refork) == b ||234par.compareAndSetPendingCount(b, nextState)) {235state = SUMMED; // drop finished236t = par;237if (refork != 0)238par.fork();239}240}241else if (par.compareAndSetPendingCount(b, b|state))242break outer; // sib not ready243}244}245}246}247}248249static final class LongCumulateTask extends CountedCompleter<Void> {250final long[] array;251final LongBinaryOperator function;252LongCumulateTask left, right;253long in, out;254final int lo, hi, origin, fence, threshold;255256/** Root task constructor */257public LongCumulateTask(LongCumulateTask parent,258LongBinaryOperator function,259long[] array, int lo, int hi) {260super(parent);261this.function = function; this.array = array;262this.lo = this.origin = lo; this.hi = this.fence = hi;263int p;264this.threshold =265(p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))266<= MIN_PARTITION ? MIN_PARTITION : p;267}268269/** Subtask constructor */270LongCumulateTask(LongCumulateTask parent, LongBinaryOperator function,271long[] array, int origin, int fence, int threshold,272int lo, int hi) {273super(parent);274this.function = function; this.array = array;275this.origin = origin; this.fence = fence;276this.threshold = threshold;277this.lo = lo; this.hi = hi;278}279280public final void compute() {281final LongBinaryOperator fn;282final long[] a;283if ((fn = this.function) == null || (a = this.array) == null)284throw new NullPointerException(); // hoist checks285int th = threshold, org = origin, fnc = fence, l, h;286LongCumulateTask t = this;287outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {288if (h - l > th) {289LongCumulateTask lt = t.left, rt = t.right, f;290if (lt == null) { // first pass291int mid = (l + h) >>> 1;292f = rt = t.right =293new LongCumulateTask(t, fn, a, org, fnc, th, mid, h);294t = lt = t.left =295new LongCumulateTask(t, fn, a, org, fnc, th, l, mid);296}297else { // possibly refork298long pin = t.in;299lt.in = pin;300f = t = null;301if (rt != null) {302long lout = lt.out;303rt.in = (l == org ? lout :304fn.applyAsLong(pin, lout));305for (int c;;) {306if (((c = rt.getPendingCount()) & CUMULATE) != 0)307break;308if (rt.compareAndSetPendingCount(c, c|CUMULATE)){309t = rt;310break;311}312}313}314for (int c;;) {315if (((c = lt.getPendingCount()) & CUMULATE) != 0)316break;317if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {318if (t != null)319f = t;320t = lt;321break;322}323}324if (t == null)325break;326}327if (f != null)328f.fork();329}330else {331int state; // Transition to sum, cumulate, or both332for (int b;;) {333if (((b = t.getPendingCount()) & FINISHED) != 0)334break outer; // already done335state = ((b & CUMULATE) != 0? FINISHED :336(l > org) ? SUMMED : (SUMMED|FINISHED));337if (t.compareAndSetPendingCount(b, b|state))338break;339}340341long sum;342if (state != SUMMED) {343int first;344if (l == org) { // leftmost; no in345sum = a[org];346first = org + 1;347}348else {349sum = t.in;350first = l;351}352for (int i = first; i < h; ++i) // cumulate353a[i] = sum = fn.applyAsLong(sum, a[i]);354}355else if (h < fnc) { // skip rightmost356sum = a[l];357for (int i = l + 1; i < h; ++i) // sum only358sum = fn.applyAsLong(sum, a[i]);359}360else361sum = t.in;362t.out = sum;363for (LongCumulateTask par;;) { // propagate364if ((par = (LongCumulateTask)t.getCompleter()) == null) {365if ((state & FINISHED) != 0) // enable join366t.quietlyComplete();367break outer;368}369int b = par.getPendingCount();370if ((b & state & FINISHED) != 0)371t = par; // both done372else if ((b & state & SUMMED) != 0) { // both summed373int nextState; LongCumulateTask lt, rt;374if ((lt = par.left) != null &&375(rt = par.right) != null) {376long lout = lt.out;377par.out = (rt.hi == fnc ? lout :378fn.applyAsLong(lout, rt.out));379}380int refork = (((b & CUMULATE) == 0 &&381par.lo == org) ? CUMULATE : 0);382if ((nextState = b|state|refork) == b ||383par.compareAndSetPendingCount(b, nextState)) {384state = SUMMED; // drop finished385t = par;386if (refork != 0)387par.fork();388}389}390else if (par.compareAndSetPendingCount(b, b|state))391break outer; // sib not ready392}393}394}395}396}397398static final class DoubleCumulateTask extends CountedCompleter<Void> {399final double[] array;400final DoubleBinaryOperator function;401DoubleCumulateTask left, right;402double in, out;403final int lo, hi, origin, fence, threshold;404405/** Root task constructor */406public DoubleCumulateTask(DoubleCumulateTask parent,407DoubleBinaryOperator function,408double[] array, int lo, int hi) {409super(parent);410this.function = function; this.array = array;411this.lo = this.origin = lo; this.hi = this.fence = hi;412int p;413this.threshold =414(p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))415<= MIN_PARTITION ? MIN_PARTITION : p;416}417418/** Subtask constructor */419DoubleCumulateTask(DoubleCumulateTask parent, DoubleBinaryOperator function,420double[] array, int origin, int fence, int threshold,421int lo, int hi) {422super(parent);423this.function = function; this.array = array;424this.origin = origin; this.fence = fence;425this.threshold = threshold;426this.lo = lo; this.hi = hi;427}428429public final void compute() {430final DoubleBinaryOperator fn;431final double[] a;432if ((fn = this.function) == null || (a = this.array) == null)433throw new NullPointerException(); // hoist checks434int th = threshold, org = origin, fnc = fence, l, h;435DoubleCumulateTask t = this;436outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {437if (h - l > th) {438DoubleCumulateTask lt = t.left, rt = t.right, f;439if (lt == null) { // first pass440int mid = (l + h) >>> 1;441f = rt = t.right =442new DoubleCumulateTask(t, fn, a, org, fnc, th, mid, h);443t = lt = t.left =444new DoubleCumulateTask(t, fn, a, org, fnc, th, l, mid);445}446else { // possibly refork447double pin = t.in;448lt.in = pin;449f = t = null;450if (rt != null) {451double lout = lt.out;452rt.in = (l == org ? lout :453fn.applyAsDouble(pin, lout));454for (int c;;) {455if (((c = rt.getPendingCount()) & CUMULATE) != 0)456break;457if (rt.compareAndSetPendingCount(c, c|CUMULATE)){458t = rt;459break;460}461}462}463for (int c;;) {464if (((c = lt.getPendingCount()) & CUMULATE) != 0)465break;466if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {467if (t != null)468f = t;469t = lt;470break;471}472}473if (t == null)474break;475}476if (f != null)477f.fork();478}479else {480int state; // Transition to sum, cumulate, or both481for (int b;;) {482if (((b = t.getPendingCount()) & FINISHED) != 0)483break outer; // already done484state = ((b & CUMULATE) != 0? FINISHED :485(l > org) ? SUMMED : (SUMMED|FINISHED));486if (t.compareAndSetPendingCount(b, b|state))487break;488}489490double sum;491if (state != SUMMED) {492int first;493if (l == org) { // leftmost; no in494sum = a[org];495first = org + 1;496}497else {498sum = t.in;499first = l;500}501for (int i = first; i < h; ++i) // cumulate502a[i] = sum = fn.applyAsDouble(sum, a[i]);503}504else if (h < fnc) { // skip rightmost505sum = a[l];506for (int i = l + 1; i < h; ++i) // sum only507sum = fn.applyAsDouble(sum, a[i]);508}509else510sum = t.in;511t.out = sum;512for (DoubleCumulateTask par;;) { // propagate513if ((par = (DoubleCumulateTask)t.getCompleter()) == null) {514if ((state & FINISHED) != 0) // enable join515t.quietlyComplete();516break outer;517}518int b = par.getPendingCount();519if ((b & state & FINISHED) != 0)520t = par; // both done521else if ((b & state & SUMMED) != 0) { // both summed522int nextState; DoubleCumulateTask lt, rt;523if ((lt = par.left) != null &&524(rt = par.right) != null) {525double lout = lt.out;526par.out = (rt.hi == fnc ? lout :527fn.applyAsDouble(lout, rt.out));528}529int refork = (((b & CUMULATE) == 0 &&530par.lo == org) ? CUMULATE : 0);531if ((nextState = b|state|refork) == b ||532par.compareAndSetPendingCount(b, nextState)) {533state = SUMMED; // drop finished534t = par;535if (refork != 0)536par.fork();537}538}539else if (par.compareAndSetPendingCount(b, b|state))540break outer; // sib not ready541}542}543}544}545}546547static final class IntCumulateTask extends CountedCompleter<Void> {548final int[] array;549final IntBinaryOperator function;550IntCumulateTask left, right;551int in, out;552final int lo, hi, origin, fence, threshold;553554/** Root task constructor */555public IntCumulateTask(IntCumulateTask parent,556IntBinaryOperator function,557int[] array, int lo, int hi) {558super(parent);559this.function = function; this.array = array;560this.lo = this.origin = lo; this.hi = this.fence = hi;561int p;562this.threshold =563(p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))564<= MIN_PARTITION ? MIN_PARTITION : p;565}566567/** Subtask constructor */568IntCumulateTask(IntCumulateTask parent, IntBinaryOperator function,569int[] array, int origin, int fence, int threshold,570int lo, int hi) {571super(parent);572this.function = function; this.array = array;573this.origin = origin; this.fence = fence;574this.threshold = threshold;575this.lo = lo; this.hi = hi;576}577578public final void compute() {579final IntBinaryOperator fn;580final int[] a;581if ((fn = this.function) == null || (a = this.array) == null)582throw new NullPointerException(); // hoist checks583int th = threshold, org = origin, fnc = fence, l, h;584IntCumulateTask t = this;585outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {586if (h - l > th) {587IntCumulateTask lt = t.left, rt = t.right, f;588if (lt == null) { // first pass589int mid = (l + h) >>> 1;590f = rt = t.right =591new IntCumulateTask(t, fn, a, org, fnc, th, mid, h);592t = lt = t.left =593new IntCumulateTask(t, fn, a, org, fnc, th, l, mid);594}595else { // possibly refork596int pin = t.in;597lt.in = pin;598f = t = null;599if (rt != null) {600int lout = lt.out;601rt.in = (l == org ? lout :602fn.applyAsInt(pin, lout));603for (int c;;) {604if (((c = rt.getPendingCount()) & CUMULATE) != 0)605break;606if (rt.compareAndSetPendingCount(c, c|CUMULATE)){607t = rt;608break;609}610}611}612for (int c;;) {613if (((c = lt.getPendingCount()) & CUMULATE) != 0)614break;615if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {616if (t != null)617f = t;618t = lt;619break;620}621}622if (t == null)623break;624}625if (f != null)626f.fork();627}628else {629int state; // Transition to sum, cumulate, or both630for (int b;;) {631if (((b = t.getPendingCount()) & FINISHED) != 0)632break outer; // already done633state = ((b & CUMULATE) != 0? FINISHED :634(l > org) ? SUMMED : (SUMMED|FINISHED));635if (t.compareAndSetPendingCount(b, b|state))636break;637}638639int sum;640if (state != SUMMED) {641int first;642if (l == org) { // leftmost; no in643sum = a[org];644first = org + 1;645}646else {647sum = t.in;648first = l;649}650for (int i = first; i < h; ++i) // cumulate651a[i] = sum = fn.applyAsInt(sum, a[i]);652}653else if (h < fnc) { // skip rightmost654sum = a[l];655for (int i = l + 1; i < h; ++i) // sum only656sum = fn.applyAsInt(sum, a[i]);657}658else659sum = t.in;660t.out = sum;661for (IntCumulateTask par;;) { // propagate662if ((par = (IntCumulateTask)t.getCompleter()) == null) {663if ((state & FINISHED) != 0) // enable join664t.quietlyComplete();665break outer;666}667int b = par.getPendingCount();668if ((b & state & FINISHED) != 0)669t = par; // both done670else if ((b & state & SUMMED) != 0) { // both summed671int nextState; IntCumulateTask lt, rt;672if ((lt = par.left) != null &&673(rt = par.right) != null) {674int lout = lt.out;675par.out = (rt.hi == fnc ? lout :676fn.applyAsInt(lout, rt.out));677}678int refork = (((b & CUMULATE) == 0 &&679par.lo == org) ? CUMULATE : 0);680if ((nextState = b|state|refork) == b ||681par.compareAndSetPendingCount(b, nextState)) {682state = SUMMED; // drop finished683t = par;684if (refork != 0)685par.fork();686}687}688else if (par.compareAndSetPendingCount(b, b|state))689break outer; // sib not ready690}691}692}693}694}695}696697698