PHP获取首字母相关方法

项目中使用经常遇到各站需求。本次即遇到这个需求。

在网上找了几份相关的方法,细数下来以下方法会稍微好一点。

虽然同样有很多识别不出来,但是比起其他已经好很多

第一个方法是我觉得用的比较好一些的

它是封装的一个类库

直接上代码

<?php
namespace appindexcontroller; 
 /**
* Modified by fuyong @ 2015-09-13
* 修复二分法查找方法
* 汉字拼音首字母工具类
*  注: 英文的字串:不变返回(包括数字)    eg .abc123 => abc123
*      中文字符串:返回拼音首字符        eg. 测试字符串 => CSZFC
*      中英混合串: 返回拼音首字符和英文   eg. 我i我j => WIWJ
*  eg.
*  $py = new pinyinfirstchar();
*  $result = $py->getInitials('我想和你在一起');
*  $result = $py->getFirstchar('小时候我就想和你在一起');
*/

//下面3行测试代码
// $py = new pinyinfirstchar();
// $result = $py->getFirstchar('根据中文姓名');
// print_r($result);

class Aafirstchar
{
    private $_pinyins = array(
        176161 => 'A',
        176197 => 'B',
        178193 => 'C',
        180238 => 'D',
        182234 => 'E',
        183162 => 'F',
        184193 => 'G',
        185254 => 'H',
        187247 => 'J',
        191166 => 'K',
        192172 => 'L',
        194232 => 'M',
        196195 => 'N',
        197182 => 'O',
        197190 => 'P',
        198218 => 'Q',
        200187 => 'R',
        200246 => 'S',
        203250 => 'T',
        205218 => 'W',
        206244 => 'X',
        209185 => 'Y',
        212209 => 'Z',
    );
    private $_charset = null;
    /**
     * 构造函数, 指定需要的编码 default: utf-8
     * 支持utf-8, gb2312
     *
     * @param unknown_type $charset
     */
    public function __construct( $charset = 'utf-8' )
    {
        $this->_charset    = $charset;
    }
    /**
     * 中文字符串 substr
     *
     * @param string $str
     * @param int    $start
     * @param int    $len
     * @return string
     */
    private function _msubstr ($str, $start, $len)
    {
        $start  = $start * 2;
        $len    = $len * 2;
        $strlen = strlen($str);
        $result = '';
        for ( $i = 0; $i < $strlen; $i++ ) {
            if ( $i >= $start && $i < ($start + $len) ) {
                if ( ord(substr($str, $i, 1)) > 129 ) $result .= substr($str, $i, 2);
                else $result .= substr($str, $i, 1);
            }
            if ( ord(substr($str, $i, 1)) > 129 ) $i++;
        }
        return $result;
    }
    /**
     * 字符串切分为数组 (汉字或者一个字符为单位)
     *
     * @param string $str
     * @return array
     */
    private function _cutWord( $str )
    {
        $words = array();
         while ( $str != "" )
         {
            if ( $this->_isAscii($str) ) {/*非中文*/
                $words[] = $str[0];
                $str = substr( $str, strlen($str[0]) );
            }else{
                $word = $this->_msubstr( $str, 0, 1 );
                $words[] = $word;
                $str = substr( $str, strlen($word) );
            }
         }
         return $words;
    }
    /**
     * 判断字符是否是ascii字符
     *
     * @param string $char
     * @return bool
     */
    private function _isAscii( $char )
    {
        return ( ord( substr($char,0,1) ) < 160 );
    }
    /**
     * 判断字符串前3个字符是否是ascii字符
     *
     * @param string $str
     * @return bool
     */
    private function _isAsciis( $str )
    {
        $len = strlen($str) >= 3 ? 3: 2;
        $chars = array();
        for( $i = 1; $i < $len -1; $i++ ){
            $chars[] = $this->_isAscii( $str[$i] ) ? 'yes':'no';
        }
        $result = array_count_values( $chars );
        if ( empty($result['no']) ){
            return true;
        }
        return false;
    }
    /**
     * 获取中文字串的拼音首字符
     *
     * @param string $str
     * @return string
     */
    public function getInitials( $str )
    {
        if ( empty($str) ) return '';
        if ( $this->_isAscii($str[0]) && $this->_isAsciis( $str )){
            return $str;
        }
        $result = array();
        if ( $this->_charset == 'utf-8' ){
            $str = iconv( 'utf-8', 'gbk//ignore', $str );
        }
        $words = $this->_cutWord( $str );
        foreach ( $words as $word )
        {
            if ( $this->_isAscii($word) ) {/*非中文*/
                $result[] = $word;
                continue;
            }
            $code = ord( substr($word,0,1) ) * 1000 + ord( substr($word,1,1) );
            /*获取拼音首字母A--Z*/
            if ( ($i = $this->_search($code)) != -1 ){
                $result[] = $this->_pinyins[$i];
            }
        }
        return strtoupper(implode('',$result));
    }
    private function _getChar( $ascii )
    {
        if ( $ascii >= 48 && $ascii <= 57){
            return chr($ascii);  /*数字*/
        }elseif ( $ascii>=65 && $ascii<=90 ){
            return chr($ascii);   /* A--Z*/
        }elseif ($ascii>=97 && $ascii<=122){
            return chr($ascii-32); /* a--z*/
        }else{
            return '-'; /*其他*/
        }
    }

    /**
     * 查找需要的汉字内码(gb2312) 对应的拼音字符( 二分法 )
     *
     * @param int $code
     * @return int
     */
    private function _search( $code )
    {
        $data = array_keys($this->_pinyins);
        $lower = 0;
        $upper = sizeof($data)-1;
        $middle = (int) round(($lower + $upper) / 2);
        if ( $code < $data[0] ) return -1;
        for (;;) {
            if ( $lower > $upper ){
                return $data[$lower-1];
            }
            $tmp = (int) round(($lower + $upper) / 2);
            if ( !isset($data[$tmp]) ){
                return $data[$middle];
            }else{ 
                $middle = $tmp;
            }
            if ( $data[$middle] < $code ){
                $lower = (int)$middle + 1;
            }else if ( $data[$middle] == $code ) {
                return $data[$middle];
            }else{
                $upper = (int)$middle - 1;
            }
        }
    }

    /**
     * 获取一整串中文字串的拼音首字符(只返回1个字符)
     *
     * @param string $str
     * @return string
     */
    public function getFirstchar( $str )
    {
        if ( empty($str) ) return '';
        return substr($this->getInitials($str), 0, 1);
    }
}
?>

这个是我个人认为比其他的好一点的方法

借鉴与   大大博客 地址https://blog.csdn.net/liiuweii/article/details/52239508

补充一下本人调用方法:

#获取对应的首字母
function getUsersInit($nickname)
{
    $py = new appindexcontrollerAafirstchar();
    $res = $py->getFirstchar($nickname);

    $init = !empty(ifUsersInit(strtoupper($res))) ? strtoupper($res) : '#';
    return $init;
}

#根据首字母来进行对比是否在对应字段
function ifUsersInit($str)
{
    return strstr('ABCDEFGHIJKLMNOPQRSTUVWXYZ',$str);
}

其他方法基本是使用ASCII码表来确定的,这种的获取不如上面的方法好

我选择了两个比较有对比性的存放了一下下

方法一

function getfirstchar($s0)
{
    $s=iconv('UTF-8','gb2312', $s0);
    if (ord($s0)>128) { //汉字开头
        $asc=ord($s{0})*256+ord($s{1})-65536;
        if($asc>=-20319 and $asc<=-20284)return "A";
        if($asc>=-20283 and $asc<=-19776)return "B";
        if($asc>=-19775 and $asc<=-19219)return "C";
        if($asc>=-19218 and $asc<=-18711)return "D";
        if($asc>=-18710 and $asc<=-18527)return "E"; 
        if($asc>=-18526 and $asc<=-18240)return "F"; 
        if($asc>=-18239 and $asc<=-17923)return "G"; 
        if($asc>=-17922 and $asc<=-17418)return "I";              
        if($asc>=-17417 and $asc<=-16475)return "J";              
        if($asc>=-16474 and $asc<=-16213)return "K";              
        if($asc>=-16212 and $asc<=-15641)return "L";              
        if($asc>=-15640 and $asc<=-15166)return "M";              
        if($asc>=-15165 and $asc<=-14923)return "N";              
        if($asc>=-14922 and $asc<=-14915)return "O";              
        if($asc>=-14914 and $asc<=-14631)return "P";              
        if($asc>=-14630 and $asc<=-14150)return "Q";              
        if($asc>=-14149 and $asc<=-14091)return "R";              
        if($asc>=-14090 and $asc<=-13319)return "S";              
        if($asc>=-13318 and $asc<=-12839)return "T";              
        if($asc>=-12838 and $asc<=-12557)return "W";              
        if($asc>=-12556 and $asc<=-11848)return "X";              
        if($asc>=-11847 and $asc<=-11056)return "Y";              
        if($asc>=-11055 and $asc<=-10247)return "Z";  
    }else if(ord($s)>=48 and ord($s)<=57){ //数字开头
        switch(iconv_substr($s,0,1,'utf-8'))
        {
            case 1:return "Y";
            case 2:return "E";
            case 3:return "S";
            case 4:return "S";
            case 5:return "W";
            case 6:return "L";
            case 7:return "Q";
            case 8:return "B";
            case 9:return "J";
            case 0:return "L";
        }                
    }else if(ord($s)>=65 and ord($s)<=90){ //大写英文开头
        return substr($s,0,1);
    }else if(ord($s)>=97 and ord($s)<=122){ //小写英文开头
        return strtoupper(substr($s,0,1));
    }
    else
    {
        return iconv_substr($s0,0,1,'utf-8');//中英混合的词语,不适合上面的各种情况,因此直接提取首个字符即可
    }
}

本篇借鉴与  大大博客 地址:https://blog.csdn.net/PHP1923880282/article/details/8833192

方法二

//获取汉字的首字母
function getFirstCharters($str)
{
  if (empty($str)) {
    return '';
  }
  //取出参数字符串中的首个字符
  $temp_str = substr($str,0,1);
  if(ord($temp_str) > 127){
    $str = substr($str,0,3);
  }else{
    $str = $temp_str;
    $fchar = ord($str{0});
    if ($fchar >= ord('A') && $fchar <= ord('z')){
      return strtoupper($temp_str);
    }else{
      return null;
    }
  }
  $s1 = iconv('UTF-8', 'gb2312//IGNORE', $str);
  if(empty($s1)){
    return null;
  }
  $s2 = iconv('gb2312', 'UTF-8', $s1);
  if(empty($s2)){
    return null;
  }
  $s = $s2 == $str ? $s1 : $str;
  $asc = ord($s{0}) * 256 + ord($s{1}) - 65536;
  if ($asc >= -20319 && $asc <= -20284)
    return 'A';
  if ($asc >= -20283 && $asc <= -19776)
    return 'B';
  if ($asc >= -19775 && $asc <= -19219)
    return 'C';
  if ($asc >= -19218 && $asc <= -18711)
    return 'D';
  if ($asc >= -18710 && $asc <= -18527)
    return 'E';
  if ($asc >= -18526 && $asc <= -18240)
    return 'F';
  if ($asc >= -18239 && $asc <= -17923)
    return 'G';
  if ($asc >= -17922 && $asc <= -17418)
    return 'H';
  if ($asc >= -17417 && $asc <= -16475)
    return 'J';
  if ($asc >= -16474 && $asc <= -16213)
    return 'K';
  if ($asc >= -16212 && $asc <= -15641)
    return 'L';
  if ($asc >= -15640 && $asc <= -15166)
    return 'M';
  if ($asc >= -15165 && $asc <= -14923)
    return 'N';
  if ($asc >= -14922 && $asc <= -14915)
    return 'O';
  if ($asc >= -14914 && $asc <= -14631)
    return 'P';
  if ($asc >= -14630 && $asc <= -14150)
    return 'Q';
  if ($asc >= -14149 && $asc <= -14091)
    return 'R';
  if ($asc >= -14090 && $asc <= -13319)
    return 'S';
  if ($asc >= -13318 && $asc <= -12839)
    return 'T';
  if ($asc >= -12838 && $asc <= -12557)
    return 'W';
  if ($asc >= -12556 && $asc <= -11848)
    return 'X';
  if ($asc >= -11847 && $asc <= -11056)
    return 'Y';
  if ($asc >= -11055 && $asc <= -10247)
    return 'Z';
  return rare_words($asc);
}
//百家姓中的生僻字
function rare_words($asc=''){
  $rare_arr = array(
    -3652=>array('word'=>"窦",'first_char'=>'D'),
    -8503=>array('word'=>"奚",'first_char'=>'X'),
    -9286=>array('word'=>"酆",'first_char'=>'F'),
    -7761=>array('word'=>"岑",'first_char'=>'C'),
    -5128=>array('word'=>"滕",'first_char'=>'T'),
    -9479=>array('word'=>"邬",'first_char'=>'W'),
    -5456=>array('word'=>"臧",'first_char'=>'Z'),
    -7223=>array('word'=>"闵",'first_char'=>'M'),
    -2877=>array('word'=>"裘",'first_char'=>'Q'),
    -6191=>array('word'=>"缪",'first_char'=>'M'),
    -5414=>array('word'=>"贲",'first_char'=>'B'),
    -4102=>array('word'=>"嵇",'first_char'=>'J'),
    -8969=>array('word'=>"荀",'first_char'=>'X'),
    -4938=>array('word'=>"於",'first_char'=>'Y'),
    -9017=>array('word'=>"芮",'first_char'=>'R'),
    -2848=>array('word'=>"羿",'first_char'=>'Y'),
    -9477=>array('word'=>"邴",'first_char'=>'B'),
    -9485=>array('word'=>"隗",'first_char'=>'K'),
    -6731=>array('word'=>"宓",'first_char'=>'M'),
    -9299=>array('word'=>"郗",'first_char'=>'X'),
    -5905=>array('word'=>"栾",'first_char'=>'L'),
    -4393=>array('word'=>"钭",'first_char'=>'T'),
    -9300=>array('word'=>"郜",'first_char'=>'G'),
    -8706=>array('word'=>"蔺",'first_char'=>'L'),
    -3613=>array('word'=>"胥",'first_char'=>'X'),
    -8777=>array('word'=>"莘",'first_char'=>'S'),
    -6708=>array('word'=>"逄",'first_char'=>'P'),
    -9302=>array('word'=>"郦",'first_char'=>'L'),
    -5965=>array('word'=>"璩",'first_char'=>'Q'),
    -6745=>array('word'=>"濮",'first_char'=>'P'),
    -4888=>array('word'=>"扈",'first_char'=>'H'),
    -9309=>array('word'=>"郏",'first_char'=>'J'),
    -5428=>array('word'=>"晏",'first_char'=>'Y'),
    -2849=>array('word'=>"暨",'first_char'=>'J'),
    -7206=>array('word'=>"阙",'first_char'=>'Q'),
    -4945=>array('word'=>"殳",'first_char'=>'S'),
    -9753=>array('word'=>"夔",'first_char'=>'K'),
    -10041=>array('word'=>"厍",'first_char'=>'S'),
    -5429=>array('word'=>"晁",'first_char'=>'C'),
    -2396=>array('word'=>"訾",'first_char'=>'Z'),
    -7205=>array('word'=>"阚",'first_char'=>'K'),
    -10049=>array('word'=>"乜",'first_char'=>'N'),
    -10015=>array('word'=>"蒯",'first_char'=>'K'),
    -3133=>array('word'=>"竺",'first_char'=>'Z'),
    -6698=>array('word'=>"逯",'first_char'=>'L'),
    -9799=>array('word'=>"俟",'first_char'=>'Q'),
    -6749=>array('word'=>"澹",'first_char'=>'T'),
    -7220=>array('word'=>"闾",'first_char'=>'L'),
    -10047=>array('word'=>"亓",'first_char'=>'Q'),
    -10005=>array('word'=>"仉",'first_char'=>'Z'),
    -3417=>array('word'=>"颛",'first_char'=>'Z'),
    -6431=>array('word'=>"驷",'first_char'=>'S'),
    -7226=>array('word'=>"闫",'first_char'=>'Y'),
    -9293=>array('word'=>"鄢",'first_char'=>'Y'),
    -6205=>array('word'=>"缑",'first_char'=>'G'),
    -9764=>array('word'=>"佘",'first_char'=>'S'),
    -9818=>array('word'=>"佴",'first_char'=>'N'),
    -9509=>array('word'=>"谯",'first_char'=>'Q'),
    -3122=>array('word'=>"笪",'first_char'=>'D'),
    -9823=>array('word'=>"佟",'first_char'=>'T'),
  );
  if(array_key_exists($asc, $rare_arr) && $rare_arr[$asc]['first_char']){
    return $rare_arr[$asc]['first_char'] ;
  }else{
    return null;
  }
}
//测试:
echo getFirstCharters('窦');

本篇借鉴与  学知无涯 大大博客 地址:https://www.cnblogs.com/gyfluck/p/8521259.html

然后就是最基础版本的方法

function Getzimu($str)
{
    if(empty($str)){return '';}

    $fchar=ord($str{0});

    if($fchar>=ord('A')&&$fchar<=ord('z')) return strtoupper($str{0});

    $s1=iconv('UTF-8','gb2312',$str);

    $s2=iconv('gb2312','UTF-8',$s1);

    $s=$s2==$str?$s1:$str;

    $asc=ord($s{0})*256+ord($s{1})-65536;

    if($asc>=-20319&&$asc<=-20284) return 'A';

    if($asc>=-20283&&$asc<=-19776) return 'B';

    if($asc>=-19775&&$asc<=-19219) return 'C';

    if($asc>=-19218&&$asc<=-18711) return 'D';

    if($asc>=-18710&&$asc<=-18527) return 'E';

    if($asc>=-18526&&$asc<=-18240) return 'F';

    if($asc>=-18239&&$asc<=-17923) return 'G';

    if($asc>=-17922&&$asc<=-17418) return 'H';

    if($asc>=-17417&&$asc<=-16475) return 'J';

    if($asc>=-16474&&$asc<=-16213) return 'K';

    if($asc>=-16212&&$asc<=-15641) return 'L';

    if($asc>=-15640&&$asc<=-15166) return 'M';

    if($asc>=-15165&&$asc<=-14923) return 'N';

    if($asc>=-14922&&$asc<=-14915) return 'O';

    if($asc>=-14914&&$asc<=-14631) return 'P';

    if($asc>=-14630&&$asc<=-14150) return 'Q';

    if($asc>=-14149&&$asc<=-14091) return 'R';

    if($asc>=-14090&&$asc<=-13319) return 'S';

    if($asc>=-13318&&$asc<=-12839) return 'T';

    if($asc>=-12838&&$asc<=-12557) return 'W';

    if($asc>=-12556&&$asc<=-11848) return 'X';

    if($asc>=-11847&&$asc<=-11056) return 'Y';

    if($asc>=-11055&&$asc<=-10247) return 'Z';

    return "#";
}

本篇借鉴与 PEIZIJUN  大大博客  地址:https://www.jianshu.com/p/2ac2b9c369b3

最后就是个人找到的最近出版本,和上种方法基本一致

#获取中文首字母
function Getzimu($str) 
{ 
    $str= iconv("UTF-8","gb2312", $str);//如果程序是gbk的,此行就要注释掉 
    if (preg_match("/^[x7f-xff]/", $str)) 
    { 
        $fchar=ord($str{0}); 
        if($fchar>=ord("A") and $fchar<=ord("z") )return strtoupper($str{0}); 
        $a = $str; 
        $val=ord($a{0})*256+ord($a{1})-65536; 
        if($val>=-20319 and $val<=-20284)return "A"; 
        if($val>=-20283 and $val<=-19776)return "B"; 
        if($val>=-19775 and $val<=-19219)return "C"; 
        if($val>=-19218 and $val<=-18711)return "D"; 
        if($val>=-18710 and $val<=-18527)return "E"; 
        if($val>=-18526 and $val<=-18240)return "F"; 
        if($val>=-18239 and $val<=-17923)return "G"; 
        if($val>=-17922 and $val<=-17418)return "H"; 
        if($val>=-17417 and $val<=-16475)return "J"; 
        if($val>=-16474 and $val<=-16213)return "K"; 
        if($val>=-16212 and $val<=-15641)return "L"; 
        if($val>=-15640 and $val<=-15166)return "M"; 
        if($val>=-15165 and $val<=-14923)return "N"; 
        if($val>=-14922 and $val<=-14915)return "O"; 
        if($val>=-14914 and $val<=-14631)return "P"; 
        if($val>=-14630 and $val<=-14150)return "Q"; 
        if($val>=-14149 and $val<=-14091)return "R"; 
        if($val>=-14090 and $val<=-13319)return "S"; 
        if($val>=-13318 and $val<=-12839)return "T"; 
        if($val>=-12838 and $val<=-12557)return "W"; 
        if($val>=-12556 and $val<=-11848)return "X"; 
        if($val>=-11847 and $val<=-11056)return "Y"; 
        if($val>=-11055 and $val<=-10247)return "Z"; 
    }  
    else 
    { 
        return false; 
    } 
} 

以上就是本人找到的获取首字母的相关方法,希望对您有帮助

2020年07月01日

 

原文地址:https://www.cnblogs.com/YFYQ/p/13220526.html