C2 Loop unswitching

在讨论loop unswitching之前,有必要看看什么是counted loop。

Counted Loop

有界循环

    public static int foo5(long k){
        int sum = 0;

        for(int i=5;i<1005;i++){
            sum+=16;
        }

        return sum;
    }

image
simplified之后显示的counted loop如上图,相当于:

    public static int foo5(long k){
        int sum = 0;
        int i=5;
  
        // OuterStripMinedLoop
        if(i>1005){
        	return sum;
        }
        // CountedLoop
        for(;;){
            sum+=16;
            i++;
            
            // CountedLoopEnd
            if(i<1005){
              continue;
            }else{
              break;
            }
        }
        // OuterStripMinedLoopEnd
        return sum;
    }

Loop unswitching

concept

//================= Loop Unswitching =====================
//
// orig:                       transformed:
//                               if (invariant-test) then
//  predicate                      predicate
//  loop                           loop
//    stmt1                          stmt1
//    if (invariant-test) then       stmt2
//      stmt2                        stmt4
//    else                         endloop
//      stmt3                    else
//    endif                        predicate [clone]
//    stmt4                        loop [clone]
//  endloop                          stmt1 [clone]
//                                   stmt3
//                                   stmt4 [clone]
//                                 endloop
//                               endif

demo

比如下面的程序

public static int foo4(long k){
    int sum = 0;
    for(int i=0;i<1000;i++){
        if (k==1024){
            sum+=1;
        }else {
            sum+=2;
        }
        sum+=3;
    }
    return sum;
}

它的idealgraph如下:
image
77#If是循环里面的那个if。

只关注for循环本身的cfg:

  • 108#bool是lt,表示i<1000这个比较的结果。如果true,那么100#IfTrue继续循环,否则110#IfFalse结束循环。那i<1000结果哪来的呢?
  • 69#Phi是表示i,28#ConI是1,106#AddI即i+1,24#ConI表示循环上限1000,这里比较i+1和1000,结果就是108#Bool了。

回到loop unswitching的主角77#If
before:
image

after:
image
之前77#If的k==1024这个cmp变成了122#If,77#If直接true了,相当于:

 public static int foo4(long k){
        int sum = 0;

        if (k==1024){
            for(int i=0;i<1000;i++){
                if (true){
                    sum+=1;
                }else {
                    sum+=2;
                }
                sum+=3;
            }
        }else{
            for(int i=0;i<1000;i++){
                if (false){
                    sum+=1;
                }else {
                    sum+=2;
                }
                sum+=3;
            }
        }
        
        return sum;
    }

最后,这个代码直接优化成了一条cmove,挺让人惊讶的。
image

impl

void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) {

  LoopNode *head = loop->_head->as_Loop();
  Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
  if (find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check) != NULL
      || (UseProfiledLoopPredicate && find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate) != NULL)
      || (UseLoopPredicate && find_predicate_insertion_point(entry, Deoptimization::Reason_predicate) != NULL)) {
    assert(entry->is_IfProj(), "sanity - must be ifProj since there is at least one predicate");
    if (entry->outcnt() > 1) {
      // Bailout if there are loop predicates from which there are additional control dependencies (i.e. from
      // loop entry 'entry') to previously partially peeled statements since this case is not handled and can lead
      // to wrong execution. Remove this bailout, once this is fixed.
      return;
    }
  }
  // Find first invariant test that doesn't exit the loop
  // 找到循环里面那个if,也就是要做unswitching的if
  IfNode* unswitch_iff = find_unswitching_candidate((const IdealLoopTree *)loop);
  assert(unswitch_iff != NULL, "should be at least one");

#ifndef PRODUCT
  if (TraceLoopOpts) {
    tty->print("Unswitch   %d ", head->unswitch_count()+1);
    loop->dump_head();
  }
#endif

  // Need to revert back to normal loop
  if (head->is_CountedLoop() && !head->as_CountedLoop()->is_normal_loop()) {
    head->as_CountedLoop()->set_normal_loop();
  }
  // 做了绝大部分事情:把循环里面的if提到外面,循环里面的if变成if(true) If(false)这种
  // 然后clone整个循环,分别放入提出去的if的then和else块里面
  ProjNode* proj_true = create_slow_version_of_loop(loop, old_new, unswitch_iff->Opcode(), CloneIncludesStripMined);

#ifdef ASSERT
  ...
#endif
  // Increment unswitch count
  LoopNode* head_clone = old_new[head->_idx]->as_Loop();
  int nct = head->unswitch_count() + 1;
  head->set_unswitch_count(nct);
  head_clone->set_unswitch_count(nct);

  // Add test to new "if" outside of loop
  IfNode* invar_iff   = proj_true->in(0)->as_If();
  Node* invar_iff_c   = invar_iff->in(0);
  BoolNode* bol       = unswitch_iff->in(1)->as_Bool();
  invar_iff->set_req(1, bol);
  invar_iff->_prob    = unswitch_iff->_prob;

  ProjNode* proj_false = invar_iff->proj_out(0)->as_Proj();

  // Hoist invariant casts out of each loop to the appropriate
  // control projection.

  Node_List worklist;

  for (DUIterator_Fast imax, i = unswitch_iff->fast_outs(imax); i < imax; i++) {
    ProjNode* proj= unswitch_iff->fast_out(i)->as_Proj();
    // Copy to a worklist for easier manipulation
    for (DUIterator_Fast jmax, j = proj->fast_outs(jmax); j < jmax; j++) {
      Node* use = proj->fast_out(j);
      if (use->Opcode() == Op_CheckCastPP && loop->is_invariant(use->in(1))) {
        worklist.push(use);
      }
    }
    ProjNode* invar_proj = invar_iff->proj_out(proj->_con)->as_Proj();
    while (worklist.size() > 0) {
      Node* use = worklist.pop();
      Node* nuse = use->clone();
      nuse->set_req(0, invar_proj);
      _igvn.replace_input_of(use, 1, nuse);
      register_new_node(nuse, invar_proj);
      // Same for the clone
      Node* use_clone = old_new[use->_idx];
      _igvn.replace_input_of(use_clone, 1, nuse);
    }
  }

  // Hardwire the control paths in the loops into if(true) and if(false)
  _igvn.rehash_node_delayed(unswitch_iff);
  dominated_by(proj_true, unswitch_iff, false, false);

  IfNode* unswitch_iff_clone = old_new[unswitch_iff->_idx]->as_If();
  _igvn.rehash_node_delayed(unswitch_iff_clone);
  dominated_by(proj_false, unswitch_iff_clone, false, false);

  // Reoptimize loops
  loop->record_for_igvn();
  for(int i = loop->_body.size() - 1; i >= 0 ; i--) {
    Node *n = loop->_body[i];
    Node *n_clone = old_new[n->_idx];
    _igvn._worklist.push(n_clone);
  }

#ifndef PRODUCT
  if (TraceLoopUnswitching) {
    tty->print_cr("Loop unswitching orig: %d @ %d  new: %d @ %d",
                  head->_idx,                unswitch_iff->_idx,
                  old_new[head->_idx]->_idx, unswitch_iff_clone->_idx);
  }
#endif

  C->set_major_progress();
}
ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
                                                      Node_List &old_new,
                                                      int opcode,
                                                      CloneLoopMode mode) {
  LoopNode* head  = loop->_head->as_Loop();
  bool counted_loop = head->is_CountedLoop();
  Node*     entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
  _igvn.rehash_node_delayed(entry);
  IdealLoopTree* outer_loop = loop->_parent;

  head->verify_strip_mined(1);
  // 创造最外面的大if
  Node *cont      = _igvn.intcon(1);
  set_ctrl(cont, C->root());
  Node* opq       = new Opaque1Node(C, cont);
  register_node(opq, outer_loop, entry, dom_depth(entry));
  Node *bol       = new Conv2BNode(opq);
  register_node(bol, outer_loop, entry, dom_depth(entry));
  IfNode* iff = (opcode == Op_RangeCheck) ? new RangeCheckNode(entry, bol, PROB_MAX, COUNT_UNKNOWN) :
    new IfNode(entry, bol, PROB_MAX, COUNT_UNKNOWN);
  register_node(iff, outer_loop, entry, dom_depth(entry));
  ProjNode* iffast = new IfTrueNode(iff);
  register_node(iffast, outer_loop, iff, dom_depth(iff));
  ProjNode* ifslow = new IfFalseNode(iff);
  register_node(ifslow, outer_loop, iff, dom_depth(iff));

  // Clone the loop body.  The clone becomes the slow loop.  The
  // original pre-header will (illegally) have 3 control users
  // (old & new loops & new if).
  // clone循环体,clone出来的新循环体是slowcase,原来的是fastpath
  clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), mode, iff);
  assert(old_new[head->_idx]->is_Loop(), "" );

  // Fast (true) and Slow (false) control
  ProjNode* iffast_pred = iffast;
  ProjNode* ifslow_pred = ifslow;
  // 其实最开始的C2没有这个的,后面kvn加到。
  // 为两个循环创建OuterStripMinedLoop和if,然后这两个if又和最外面的if关联起来
  clone_predicates_to_unswitched_loop(loop, old_new, iffast_pred, ifslow_pred);
    
  // 为两个循环体设置支配关系
  Node* l = head->skip_strip_mined();
  _igvn.replace_input_of(l, LoopNode::EntryControl, iffast_pred);
  set_idom(l, iffast_pred, dom_depth(l));
  LoopNode* slow_l = old_new[head->_idx]->as_Loop()->skip_strip_mined();
  _igvn.replace_input_of(slow_l, LoopNode::EntryControl, ifslow_pred);
  set_idom(slow_l, ifslow_pred, dom_depth(l));

  recompute_dom_depth();

  return iffast;
}
原文地址:https://www.cnblogs.com/kelthuzadx/p/15718499.html