本地代码 搜索脚本 python实现
本来用find+grep可以搞定的,不过如果搜索多个路径和多个规则,写正则可能写过不来
上面语句是在py文件中查找redis的字符。
不过如果要在指定多个位置查找,可能要拼接几个管道,并且如果我要几个字符的关系是并集,就是多个关键字要在文本中同时出现,而且不一定在同一行,所以也不好写。
所以写了个python脚本,也方便在centos下运行
运行: python main.py --kw=asyncio,gather
find . -type f -name "*.py" | xargs grep "redis"
上面语句是在py文件中查找redis的字符。
不过如果要在指定多个位置查找,可能要拼接几个管道,并且如果我要几个字符的关系是并集,就是多个关键字要在文本中同时出现,而且不一定在同一行,所以也不好写。
所以写了个python脚本,也方便在centos下运行
# -*- coding: utf-8 -*-
# @Time : 2021/4/14 1:46
# @File : search_string_in_folder.py
# @Author : Rocky C@www.30daydo.com
'''
搜索代码脚本
'''
import fire
import glob
import re
# TODO 用PYQT重写一个
PATH_LIST = [r'C:\git\\',r'C:\OtherGit\\',r'C:\OneDrive\viewed_code\\']
POST_FIX = 'py' # 后缀文件
# 关键词
WORDS=[]
EXCLUDE_PATH=[r'C:\OtherGit\cpython']
DEBUG = True
class FileSearcher:
def __init__(self,kw):
self.root_path_list = PATH_LIST
self.default_coding ='utf-8'
self.exception_handle_coding='gbk'
self.kw=[]
if not isinstance(kw,tuple):
kw=(kw,)
for k in kw:
k=k.strip()
self.kw.append(k)
def search(self,file,encoding):
match_dict = dict()
for w in self.kw:
match_dict.setdefault(w, False)
line_number = 0
line_list=list()
with open(file, 'r', encoding=encoding) as fp:
while 1:
try:
line = fp.readline()
except UnicodeDecodeError as e:
if DEBUG:
print(f'Error coding in file {file}')
print(e)
return None,None,None
except Exception as e:
if DEBUG:
print(f'Error in file {file}')
print(e)
break
if not line:
break
line = line.strip()
if not line:
continue
for w in self.kw:
m=re.search(w,line,re.IGNORECASE)
if m:
match_dict.update({w:True})
line_list.append(line_number)
line_number+=1
return True,match_dict.copy(),line_list.copy()
def print_match_result(self,file,line_list,encoding):
with open(file, 'r', encoding=encoding) as fp:
line_number = 0
while 1:
try:
line = fp.readline()
except Exception as e:
if DEBUG:
print(f'Error in file {file}')
print(e)
break
if not line:
break
line=line.strip()
if not line:
continue
if line_number in line_list:
print(f'{file} :: {line_number} ====>\n {line[:50]}\n')
line_number += 1
def run(self):
for path in self.root_path_list:
search_path=path+'**/*.'+POST_FIX
for file in glob.iglob(search_path,recursive=True):
for ex_path in EXCLUDE_PATH:
ex_path=ex_path.replace('\\','')
temp_file=file.replace('\\','')
if ex_path in temp_file:
continue
use_encoding=self.default_coding
encode_proper,match_dict,line_list=self.search(file,use_encoding)
if not encode_proper:
use_encoding = self.exception_handle_coding
encode_proper,match_dict,line_list=self.search(file, use_encoding)
if match_dict is not None and len(match_dict)>0 and all(match_dict.values()):
# print(match_dict.values())
self.print_match_result(file,line_list,use_encoding)
# print(line_list)
def test_error_file():
path=r'C:\git\CodePool\example-code\19-dyn-attr-prop\oscon\schedule2.py'
with open(path,'r',encoding='utf8') as fp:
while 1:
x=fp.readline()
if not x:
break
print(x)
def main(kw):
app = FileSearcher(kw)
app.run()
if __name__ == '__main__':
fire.Fire(main)
运行: python main.py --kw=asyncio,gather