psql: FATAL: index "pg_opclass_oid_index" contains unexpected zero page at block 0 以及错误 rm: cannot remove ‘base/90112/95992_fsm’: Structure needs cleaning

测试环境pg10.5 两节点集群

之前由于主库的报错,把主从切换了。以至于后来解决这个问题。

psql: FATAL:  index "pg_opclass_oid_index" contains unexpected zero page at block 0

==备库
[root@pg01 keepalived]# su - postgres
Last login: Fri May  7 17:09:46 CST 2021 on pts/0
-bash-4.2$ psql
psql: FATAL:  index "pg_opclass_oid_index" contains unexpected zero page at block 0
HINT:  Please REINDEX it.
==主库
postgres=# select client_addr,sync_state from pg_stat_replication;
 client_addr | sync_state 
-------------+------------
 10.15.9.13  | async
(1 row)

根据之前的经验 reindex index ;

在主库执行 reindex index pg_opclass_oid_index; 从库还是无法登陆。

由于这个是系统表的索引,索引无法通过 drop,create的方式重新创建索引,只有从新通过 pg_basebackup 同步

1 停止备库、备份配置文件、删除数据目录

-bash-4.2$ pg_ctl stop  -D /home/pgdata/
-bash-4.2$ cp pg_hba.conf postgresql.conf recovery.conf /opt/pgcnf/
-bash-4.2$ cd /home/pgdata
-bash-4.2$ rm -rf *
rm: cannot remove ‘base/90112/95992_fsm’: Structure needs cleaning ##报错,这个文件夹删不掉
rm: cannot remove ‘base/98794’: Structure needs cleaning
-bash-4.2$ ll
total 0
drwx------ 4 postgres postgres 32 May 10 14:47 base
[root@pg01 pgcnf]# df -i
Filesystem                Inodes IUsed    IFree IUse% Mounted on
/dev/mapper/centos-root 26214400 44141 26170259    1% /
devtmpfs                  998571   361   998210    1% /dev
tmpfs                    1001321     1  1001320    1% /dev/shm
tmpfs                    1001321   467  1000854    1% /run
tmpfs                    1001321    16  1001305    1% /sys/fs/cgroup
/dev/sda1                 524288   328   523960    1% /boot
/dev/mapper/centos-home 73986048  2941 73983107    1% /home
tmpfs                    1001321     1  1001320    1% /run/user/0
[root@pg01 pgcnf]# lsblk
NAME            MAJ:MIN RM   SIZE RO TYPE MOUNTPOINT
sda               8:0    0   200G  0 disk 
├─sda1            8:1    0     1G  0 part /boot
└─sda2            8:2    0   199G  0 part 
  ├─centos-root 253:0    0    50G  0 lvm  /
  ├─centos-swap 253:1    0   7.9G  0 lvm  [SWAP]
  └─centos-home 253:2    0 141.1G  0 lvm  /home
sr0              11:0    1  1024M  0 rom  
[root@pg01 pgcnf]# mount -l
/dev/sda1 on /boot type xfs (rw,relatime,attr2,inode64,noquota)
/dev/mapper/centos-home on /home type xfs (rw,relatime,attr2,inode64,noquota)
-bash-4.2$ cd base/
-bash-4.2$ ll
ls: cannot access 98794: Structure needs cleaning
total 32
drwx------ 2 postgres postgres 16384 Sep 28  2020 90112
d????????? ? ?        ?            ?            ? 98794
[root@pg01 pgcnf]# df -Th
Filesystem              Type      Size  Used Avail Use% Mounted on
/dev/mapper/centos-root xfs        50G  3.0G   47G   6% /
devtmpfs                devtmpfs  3.9G     0  3.9G   0% /dev
tmpfs                   tmpfs     3.9G     0  3.9G   0% /dev/shm
tmpfs                   tmpfs     3.9G  401M  3.5G  11% /run
tmpfs                   tmpfs     3.9G     0  3.9G   0% /sys/fs/cgroup
/dev/sda1               xfs      1014M  143M  872M  15% /boot
/dev/mapper/centos-home xfs       142G  5.9G  136G   5% /home
tmpfs                   tmpfs     783M     0  783M   0% /run/user/0
[root@pg01 pgcnf]# umount /dev/mapper/centos-home 
umount: /home: target is busy.
        (In some cases useful info about processes that use
         the device is found by lsof(8) or fuser(1))
[root@pg01 pgcnf]# lsof |grep /dev/mapper/centos-home 
-bash: lsof: command not found
[root@pg01 yum.repos.d]# yum-config-manager --add-repo=http://10.15.10.199/repo
[root@pg01 yum.repos.d]# yum install lsof -y
[root@pg01 yum.repos.d]# lsof |grep /dev/mapper/centos-home
[root@pg01 yum.repos.d]# lsof |grep /home
bash      32197      postgres  cwd       DIR              253,2        32  134266468 /home/pgdata/base
[root@pg01 yum.repos.d]# ps -ef|grep 32197
root      1705 32361  0 15:09 pts/0    00:00:00 grep --color=auto 32197
postgres 32197 32196  0 14:45 pts/1    00:00:00 -bash
[root@pg01 yum.repos.d]# kill -9 32197
[root@pg01 yum.repos.d]# lsof |grep /home
##操作前备份 /home的所有重要文件
[root@pg01 yum.repos.d]# umount /home
[root@pg01 yum.repos.d]# xfs_repair /dev/mapper/centos-home #如果是ext4:fsck.ext4 /dev/mapper/centos-home
disconnected inode 275099398, moving to lost+found
disconnected inode 275099400, moving to lost+found
Phase 7 - verify and correct link counts...
resetting inode 134266468 nlinks from 4 to 3
done
[root@pg01 yum.repos.d]# reboot
重启后 ,由于有自动挂载,所以开机自动挂载成功
[root@pg01 ~]# cat /etc/fstab
/dev/mapper/centos-home /home                   xfs     defaults        0 0
/dev/mapper/centos-swap swap                    swap    defaults        0 0
[root@pg01 ~]# cd /home/pgdata
[root@pg01 pgdata]# ll
total 0
drwx------ 3 postgres postgres 19 May 10 14:47 base
[root@pg01 pgdata]# rm -rf base/ #删除该目录

2 同步主从、回复配置文件

-bash-4.2$ pg_basebackup -D /home/pgdata -F p -X stream  -R -v -P -h 10.15.9.150 -p 5432 -U repuser
Password: 
pg_basebackup: initiating base backup, waiting for checkpoint to complete
pg_basebackup: checkpoint completed
pg_basebackup: write-ahead log start point: 4/C000028 on timeline 2
pg_basebackup: starting background WAL receiver
4657982/4657982 kB (100%), 1/1 tablespace                                         
pg_basebackup: write-ahead log end point: 4/C000130
pg_basebackup: waiting for background process to finish streaming ...
pg_basebackup: base backup completed

-bash-4.2$ vim pg_hba.conf 
-bash-4.2$ vim recovery.conf
recovery_target_timeline = 'latest'
standby_mode = 'on'
-bash-4.2$ chmod 700 /home/pgdata
-bash-4.2$ pg_ctl -D /home/pgdata  start
waiting for server to start....2021-05-10 15:22:03.181 CST [1275] LOG:  listening on IPv4 address "0.0.0.0", port 5432
2021-05-10 15:22:03.181 CST [1275] LOG:  listening on IPv6 address "::", port 5432
2021-05-10 15:22:03.185 CST [1275] LOG:  listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432"
2021-05-10 15:22:03.194 CST [1275] LOG:  listening on Unix socket "/tmp/.s.PGSQL.5432"
2021-05-10 15:22:03.319 CST [1275] LOG:  redirecting log output to logging collector process
2021-05-10 15:22:03.319 CST [1275] HINT:  Future log output will appear in directory "log".
 done
server started

3 主库查看同步状态正常

postgres=# select client_addr,sync_state from pg_stat_replication;
 client_addr | sync_state 
-------------+------------
 10.15.9.13  | async
(1 row)
从库 正常登陆
-bash-4.2$ psql
psql (10.11)
Type "help" for help.
postgres=# l
                                  List of databases
    Name    |  Owner   | Encoding |   Collate   |    Ctype    |   Access privileges   
------------+----------+----------+-------------+-------------+-----------------------
 postgres   | postgres | UTF8     | en_US.UTF-8 | en_US.UTF-8 | 
 template0  | postgres | UTF8     | en_US.UTF-8 | en_US.UTF-8 | =c/postgres          +
            |          |          |             |             | postgres=CTc/postgres
 template1  | postgres | UTF8     | en_US.UTF-8 | en_US.UTF-8 | =c/postgres          +
            |          |          |             |             | postgres=CTc/postgres
 test1      | yhq      | UTF8     | en_US.UTF-8 | en_US.UTF-8 | =Tc/yhq              +
            |          |          |             |             | yhq=CTc/yhq
 test2      | postgres | UTF8     | en_US.UTF-8 | en_US.UTF-8 | =Tc/postgres         +
            |          |          |             |             | postgres=CTc/postgres+
            |          |          |             |             | yhq=CTc/postgres
(5 rows)

postgres=# c test1
You are now connected to database "test1" as user "postgres".
                      ^
test1=# d
          List of relations
 Schema |   Name   | Type  |  Owner   
--------+----------+-------+----------
 public | user_tbl | table | postgres
(1 row)

test1=# select * from user_tbl;
  name   | signup_date 
---------+-------------
 yhqtest | 2019-07-19
(1 row)
原文地址:https://www.cnblogs.com/yhq1314/p/14792180.html