C2 Loop predicate

循环预测说的是,比如一个循环里面有个arr[val],C2会生成range check检查是否越界,这时候循环预测会把这个range check提升到循环入口前,在进入循环的地方插入一个预测(predicate),也就是个if,然后检查条件是否成立,如果成立则进入循环,否则引发uncommon trap退优化。

怎么进来

libjvm.so!PhaseIdealLoop::loop_predication_impl(PhaseIdealLoop * const this, IdealLoopTree * loop) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/loopPredicate.cpp:1367)
libjvm.so!IdealLoopTree::loop_predication(IdealLoopTree * const this, PhaseIdealLoop * phase) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/loopPredicate.cpp:1473)
libjvm.so!PhaseIdealLoop::build_and_optimize(PhaseIdealLoop * const this, LoopOptsMode mode) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/loopnode.cpp:4021)
libjvm.so!PhaseIdealLoop::PhaseIdealLoop(PhaseIdealLoop * const this, PhaseIterGVN & igvn, LoopOptsMode mode) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/loopnode.hpp:1069)
libjvm.so!PhaseIdealLoop::optimize(PhaseIterGVN & igvn, LoopOptsMode mode) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/loopnode.hpp:1148)
libjvm.so!Compile::Optimize(Compile * const this) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/compile.cpp:2180)
libjvm.so!Compile::Compile(Compile * const this, ciEnv * ci_env, ciMethod * target, int osr_bci, bool subsume_loads, bool do_escape_analysis, bool eliminate_boxing, bool install_code, DirectiveSet * directive) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/compile.cpp:770)
libjvm.so!C2Compiler::compile_method(C2Compiler * const this, ciEnv * env, ciMethod * target, int entry_bci, bool install_code, DirectiveSet * directive) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/c2compiler.cpp:103)
libjvm.so!CompileBroker::invoke_compiler_on_method(CompileTask * task) (/home/qingfeng.yy/jdktip/src/hotspot/share/compiler/compileBroker.cpp:2312)
libjvm.so!CompileBroker::compiler_thread_loop() (/home/qingfeng.yy/jdktip/src/hotspot/share/compiler/compileBroker.cpp:1985)
libjvm.so!CompilerThread::thread_entry(JavaThread * thread, Thread * __the_thread__) (/home/qingfeng.yy/jdktip/src/hotspot/share/compiler/compilerThread.cpp:59)
libjvm.so!JavaThread::thread_main_inner(JavaThread * const this) (/home/qingfeng.yy/jdktip/src/hotspot/share/runtime/thread.cpp:1335)
libjvm.so!JavaThread::run(JavaThread * const this) (/home/qingfeng.yy/jdktip/src/hotspot/share/runtime/thread.cpp:1318)
libjvm.so!Thread::call_run(Thread * const this) (/home/qingfeng.yy/jdktip/src/hotspot/share/runtime/thread.cpp:393)
libjvm.so!thread_native_entry(Thread * thread) (/home/qingfeng.yy/jdktip/src/hotspot/os/linux/os_linux.cpp:719)
libpthread.so.0!start_thread (Unknown Source:0)
libc.so.6!clone (Unknown Source:0)

PhaseIdealLoop:build_and_optimize是循环相关优化的入口,循环优化可以看这里。

Case1: 循环不变量

    static int[] foo14(int x){
        int[] a = new int[500];
        for(int i=0;i<70;i++){
            int t = x%5;
            a[t] = 12;
        }
        return a;
    }

Before:
image

After(apply loop predicate):
image

实现


//------------------------------ loop_predication_impl--------------------------
// Insert loop predicates for null checks and range checks
bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
  if (!UseLoopPredicate) return false;

  if (!loop->_head->is_Loop()) {
    // Could be a simple region when irreducible loops are present.
    return false;
  }
  LoopNode* head = loop->_head->as_Loop();

  if (head->unique_ctrl_out()->Opcode() == Op_NeverBranch) {
    // do nothing for infinite loops
    return false;
  }

  if (head->is_OuterStripMinedLoop()) {
    return false;
  }

  CountedLoopNode *cl = NULL;
  if (head->is_valid_counted_loop(T_INT)) {
    cl = head->as_CountedLoop();
    // do nothing for iteration-splitted loops
    if (!cl->is_normal_loop()) return false;
    // Avoid RCE if Counted loop's test is '!='.
    BoolTest::mask bt = cl->loopexit()->test_trip();
    if (bt != BoolTest::lt && bt != BoolTest::gt)
      cl = NULL;
  }

  Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
  ProjNode *loop_limit_proj = NULL;
  ProjNode *predicate_proj = NULL;
  ProjNode *profile_predicate_proj = NULL;
  // Loop limit check predicate should be near the loop.
  loop_limit_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
  if (loop_limit_proj != NULL) {
    entry = skip_loop_predicates(loop_limit_proj);
  }
  bool has_profile_predicates = false;
  profile_predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
  if (profile_predicate_proj != NULL) {
    Node* n = skip_loop_predicates(entry);
    // Check if predicates were already added to the profile predicate
    // block
    if (n != entry->in(0)->in(0) || n->outcnt() != 1) {
      has_profile_predicates = true;
    }
    entry = n;
  }
  predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);

  float loop_trip_cnt = -1;
  bool follow_branches = loop_predication_should_follow_branches(loop, profile_predicate_proj, loop_trip_cnt);
  assert(!follow_branches || loop_trip_cnt >= 0, "negative trip count?");

  if (predicate_proj == NULL && !follow_branches) {
#ifndef PRODUCT
    if (TraceLoopPredicate) {
      tty->print("missing predicate:");
      loop->dump_head();
      head->dump(1);
    }
#endif
    return false;
  }
  ConNode* zero = _igvn.intcon(0);
  set_ctrl(zero, C->root());

  ResourceArea* area = Thread::current()->resource_area();
  Invariance invar(area, loop);

  // Create list of if-projs such that a newer proj dominates all older
  // projs in the list, and they all dominate loop->tail()
  Node_List if_proj_list;
  Node_List regions;
  Node* current_proj = loop->tail(); // start from tail


  Node_List controls;
  // 循环里面,沿着dominate从尾巴向头开始找,找那些被循环支配,且是If,RangeCheck这些类型的节点
  while (current_proj != head) {
    if (loop == get_loop(current_proj) && // still in the loop ?
        current_proj->is_Proj()        && // is a projection  ?
        (current_proj->in(0)->Opcode() == Op_If ||
         current_proj->in(0)->Opcode() == Op_RangeCheck)) { // is a if projection ?
      if_proj_list.push(current_proj);
    }
    if (follow_branches &&
        current_proj->Opcode() == Op_Region &&
        loop == get_loop(current_proj)) {
      regions.push(current_proj);
    }
    current_proj = idom(current_proj);
  }
  
  bool hoisted = false; // true if at least one proj is promoted

  if (!has_profile_predicates) {
    while (if_proj_list.size() > 0) {
      Node* n = if_proj_list.pop();

      ProjNode* proj = n->as_Proj();
      //刚刚找到的If/RangeCheck(本例是RangeCheck)
      IfNode*   iff  = proj->in(0)->as_If();
      // 确定是uncommon_trap_if这种模式
      CallStaticJavaNode* call = proj->is_uncommon_trap_if_pattern(Deoptimization::Reason_none);
      if (call == NULL) {
        if (loop->is_loop_exit(iff)) {
          // stop processing the remaining projs in the list because the execution of them
          // depends on the condition of "iff" (iff->in(1)).
          break;
        } else {
          // Both arms are inside the loop. There are two cases:
          // (1) there is one backward branch. In this case, any remaining proj
          //     in the if_proj list post-dominates "iff". So, the condition of "iff"
          //     does not determine the execution the remining projs directly, and we
          //     can safely continue.
          // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj"
          //     does not dominate loop->tail(), so it can not be in the if_proj list.
          continue;
        }
      }
      Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(call->uncommon_trap_request());
      if (reason == Deoptimization::Reason_predicate) {
        break;
      }

      if (predicate_proj != NULL) {
        // 进入loop predicate核心实现
        hoisted = loop_predication_impl_helper(loop, proj, predicate_proj, cl, zero, invar, Deoptimization::Reason_predicate) | hoisted;
      }
    } // end while
  }

  if (follow_branches) {
    PathFrequency pf(loop->_head, this);

    // Some projections were skipped by regular predicates because of
    // an early loop exit. Try them with profile data.
    while (if_proj_list.size() > 0) {
      Node* proj = if_proj_list.pop();
      float f = pf.to(proj);
      if (proj->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none) &&
          f * loop_trip_cnt >= 1) {
        hoisted = loop_predication_impl_helper(loop, proj->as_Proj(), profile_predicate_proj, cl, zero, invar, Deoptimization::Reason_profile_predicate) | hoisted;
      }
    }

    // And look into all branches
    Node_Stack stack(0);
    VectorSet seen;
    Node_List if_proj_list_freq(area);
    while (regions.size() > 0) {
      Node* c = regions.pop();
      loop_predication_follow_branches(c, loop, loop_trip_cnt, pf, stack, seen, if_proj_list_freq);
    }

    for (uint i = 0; i < if_proj_list_freq.size(); i++) {
      ProjNode* proj = if_proj_list_freq.at(i)->as_Proj();
      hoisted = loop_predication_impl_helper(loop, proj, profile_predicate_proj, cl, zero, invar, Deoptimization::Reason_profile_predicate) | hoisted;
    }
  }

#ifndef PRODUCT
  // report that the loop predication has been actually performed
  // for this loop
  if (TraceLoopPredicate && hoisted) {
    tty->print("Loop Predication Performed:");
    loop->dump_head();
  }
#endif

  head->verify_strip_mined(1);

  return hoisted;
}

核心实现如下:

bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree *loop, ProjNode* proj, ProjNode *predicate_proj,
                                                  CountedLoopNode *cl, ConNode* zero, Invariance& invar,
                                                  Deoptimization::DeoptReason reason) {
  // Following are changed to nonnull when a predicate can be hoisted
  ProjNode* new_predicate_proj = NULL;
  IfNode*   iff  = proj->in(0)->as_If();
  // 找到test条件
  Node*     test = iff->in(1);
  if (!test->is_Bool()){ //Conv2B, ...
    return false;
  }
  BoolNode* bol = test->as_Bool();
  // 如果条件是循环不变量
  if (invar.is_invariant(bol)) {
    // Invariant test
    // 在循环外面创建predicate,本例是一个RangeCheck
    new_predicate_proj = create_new_if_for_predicate(predicate_proj, NULL,
                                                     reason,
                                                     iff->Opcode());
    Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0);
    // 克隆老的test条件,创建新的条件
    BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool();

    // Negate test if necessary
    bool negated = false;
    if (proj->_con != predicate_proj->_con) {
      new_predicate_bol = new BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate());
      register_new_node(new_predicate_bol, ctrl);
      negated = true;
    }
    // 然后新的test条件链接到新的RangeCheck上面
    IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If();
    _igvn.hash_delete(new_predicate_iff);
    new_predicate_iff->set_req(1, new_predicate_bol);
    // 完成!
#ifndef PRODUCT
    if (TraceLoopPredicate) {
      tty->print("Predicate invariant if%s: %d ", negated ? " negated" : "", new_predicate_iff->_idx);
      loop->dump_head();
    } else if (TraceLoopOpts) {
      tty->print("Predicate IC ");
      loop->dump_head();
    }
#endif
  } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) {
    // 本例不会走到这里,见下面的例子。
    // Range check for counted loops
    // 对应107#CmpU
    const Node*    cmp    = bol->in(1)->as_Cmp();
    // 81#Phi
    Node*          idx    = cmp->in(1);
    assert(!invar.is_invariant(idx), "index is variant");
    // 106#LoadRange
    Node* rng = cmp->in(2);
    assert(rng->Opcode() == Op_LoadRange || iff->is_RangeCheck() || _igvn.type(rng)->is_int()->_lo >= 0, "must be");
    assert(invar.is_invariant(rng), "range must be invariant");
    int scale    = 1;
    Node* offset = zero;
    // 然后判断arr[idx]这个idx是不是符合期望的pattern
    bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset);
    assert(ok, "must be index expression");

    Node* init    = cl->init_trip();
    // Limit is not exact.
    // Calculate exact limit here.
    // Note, counted loop's test is '<' or '>'.
    Node* limit   = exact_limit(loop);
    int  stride   = cl->stride()->get_int();

    // Build if's for the upper and lower bound tests.  The
    // lower_bound test will dominate the upper bound test and all
    // cloned or created nodes will use the lower bound test as
    // their declared control.

    // Perform cloning to keep Invariance state correct since the
    // late schedule will place invariant things in the loop.
    // 克隆这个rangecheck
    Node *ctrl = predicate_proj->in(0)->as_If()->in(0);
    rng = invar.clone(rng, ctrl);
    if (offset && offset != zero) {
      assert(invar.is_invariant(offset), "offset must be loop invariant");
      offset = invar.clone(offset, ctrl);
    }
    // If predicate expressions may overflow in the integer range, longs are used.
    bool overflow = false;

    // Test the lower bound
    BoolNode* lower_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, false, overflow);
    // Negate test if necessary
    bool negated = false;
    if (proj->_con != predicate_proj->_con) {
      lower_bound_bol = new BoolNode(lower_bound_bol->in(1), lower_bound_bol->_test.negate());
      register_new_node(lower_bound_bol, ctrl);
      negated = true;
    }
    ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, reason, overflow ? Op_If : iff->Opcode());
    IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If();
    _igvn.hash_delete(lower_bound_iff);
    lower_bound_iff->set_req(1, lower_bound_bol);
    if (TraceLoopPredicate) tty->print_cr("lower bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx);

    // Test the upper bound
    BoolNode* upper_bound_bol = rc_predicate(loop, lower_bound_proj, scale, offset, init, limit, stride, rng, true, overflow);
    negated = false;
    if (proj->_con != predicate_proj->_con) {
      upper_bound_bol = new BoolNode(upper_bound_bol->in(1), upper_bound_bol->_test.negate());
      register_new_node(upper_bound_bol, ctrl);
      negated = true;
    }
    ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, reason, overflow ? Op_If : iff->Opcode());
    assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate");
    IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If();
    _igvn.hash_delete(upper_bound_iff);
    upper_bound_iff->set_req(1, upper_bound_bol);
    if (TraceLoopPredicate) tty->print_cr("upper bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx);

    // Fall through into rest of the clean up code which will move
    // any dependent nodes onto the upper bound test.
    new_predicate_proj = upper_bound_proj;

    if (iff->is_RangeCheck()) {
      new_predicate_proj = insert_initial_skeleton_predicate(iff, loop, proj, predicate_proj, upper_bound_proj, scale, offset, init, limit, stride, rng, overflow, reason);
    }

#ifndef PRODUCT
    if (TraceLoopOpts && !TraceLoopPredicate) {
      tty->print("Predicate RC ");
      loop->dump_head();
    }
#endif
  } else {
    // Loop variant check (for example, range check in non-counted loop)
    // with uncommon trap.
    return false;
  }
  assert(new_predicate_proj != NULL, "sanity");
  // Success - attach condition (new_predicate_bol) to predicate if
  invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate

  // Eliminate the old If in the loop body
  dominated_by( new_predicate_proj, iff, proj->_con != new_predicate_proj->_con );

  C->set_major_progress();
  return true;
}

上面的例子演示了最简单的loop predicate,下面接着演示上面不会走到的路径。

Case2:带计算的range check

Java程序:

    static int[] arr = new int[100];

    static int foo14(int x){
        int sum=0;
        for(int i=0;i<50;i+=2){
            sum += arr[i];
        }
        return sum;
    }

在loop predicate前,这个rangechecknode是在循环里面,被94#if支配的:
image

loop predicate之后:
image

原文地址:https://www.cnblogs.com/kelthuzadx/p/15718460.html