一、文本查询程序代码整理

1.1 textsearchprogram.h

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#ifndef TEXT_SEARCH_PROGRAM_H
#define TEXT_SEARCH_PROGRAM_H

#include "my_operation.h"
void cleanWord(string & word);
//包括单词出现次数和出现行号
class WordDate {
private:
int count = 0;
set <int> linenums;
friend class ProgramDate;
public:
WordDate () {};
WordDate (int _count, set <int> num) : count(_count), linenums( num){}
void Update (const int & linenum) {
++ count;
linenums.emplace(linenum);
}
void clear() {
linenums.clear();
count = 0;
}
};

//读文件并写入
class ProgramBegin {
public:
int operator()(const string & filename);
};

// 文件内容,单例模式,本文只能有一个文件
class ProgramDate {
private:

vector<string> _filecontent; //按行存放文件内容
map<string, WordDate> _wordmap; //存放单词匹配情况,单词 -- 次数 -- 对应行数
stack <WordDate> args; //存放结果栈

ProgramDate() = default; // 私有构造函数
ProgramDate(const ProgramDate&) = delete;
ProgramDate& operator=(const ProgramDate&) = delete;
~ProgramDate() = default;
static ProgramDate * programdate;

public:

void Init (); // 所有容器初始化

// 初始化方法
static ProgramDate * getInstance();

// 拆分命令行参数
void splitCommand(vector <string> & args);

void VectorUpdate (const string & line);

void MapUpdate (const string & word, const int & linenum);

void Search(const string & word);

void print (const string & word);

void operator & (const string & op);

void operator | (const string & op);

void operator ~ ();

bool isempty();

};


#endif

1.2 my_operation.h

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#pragma once
#ifndef OPERATION_H
#define OPERATION_H
#include <iostream>
#include <string>
#include <vector>
#include <unordered_map>
#include <set>
#include <stack>
#include <fstream>
#include <sstream>
#include <cctype>
#include <algorithm>
#include <memory>
using std::cout;
using std::endl;
using std::string;
using std::vector;
using std::set;
using std::stack;
using std::cin;
using std::ifstream;
using std::istringstream;


class CompareOperation {
private:
// 静态成员:存储运算符优先级的映射表
static std::unordered_map<char, int> mapoperation;
int num = 0; // 当前运算符的优先级值

public:
// 默认构造函数
CompareOperation() {}

// 带参构造函数:根据输入字符获取优先级
CompareOperation(char rc) : num(mapoperation[rc]) {} // 修复:移除引用&,避免临时变量绑定问题

// 重载>=运算符:比较当前运算符与另一个运算符的优先级
bool operator >= (char rc);
};

// 操作基类
class Operation {
public:
virtual void execute(const string & word) = 0;
virtual ~Operation() = default;
};

class SearchOperation : public Operation {
public:
void execute(const string & word) override;
};

class AndOperation : public Operation {
public:
void execute(const string & op) override;
};

class OrOperation : public Operation {
public:
void execute(const string & op) override;
};

class NotOperation : public Operation {
public:
void execute(const string & op) override;
};

// 工厂类
// 根据参数决定创建哪种操作(工厂模式核心)
class Processing {
public:
void operator()(vector<string> & args);
private:
Operation * createOperation(const char & arg);
};

#endif

1.3 mian.cpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#include "textsearchprogram.h"

int main(int argc, char* argv[]) {
ProgramDate * programdate = ProgramDate::getInstance();
Processing process;
// 主循环处理用户输入
cout << "=== 文本搜索程序 ===" << endl;
cout << "支持命令:(单词查询显示次数,其余查询仅展示出现行的次数)" << endl;
cout << " 1. @ <文件名> - 加载文本文件" << endl;
cout << " 2. <单词> operator ...<单词> - 执行单词操作(不区分大小写)" << endl;
cout << " 3. 特殊字符'&','|','~'仅支持单个查询,多个查询结果有误" << endl;
cout << " 4. $ 表示退出" << endl;
cout << "目前实现单词搜索、逻辑与、逻辑或、逻辑非及三者的任意组合(||双符号输入会看做寻找|)" << endl;
cout << "===================" << endl;
while (true) {
//划分命令,先判断是否退出和读取
vector <string> args; //0位置是全部的命令

programdate->splitCommand(args);
if (args[1] == "$") {
cout << "欢迎下次光临" << endl;
break;
} else if(programdate->isempty() == 0 || args[1] == "@"){
ProgramBegin programbegin;
if(args.size() == 3){
programbegin(args[2]);
} else {
cout << "请先打开文件" << endl;
}
continue;
} else if ( args.size() < 2){
cout << "错误指令" << endl;
continue;
}

std::reverse(std::next(args.begin()), args.end());
cout << "后缀命令:";
for(int i = args.size() - 1; i > 0; --i){
cout << args[i] << " " ;
}
cout << endl;
process(args);
}
return 0;
}

1.4 operation.cpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#include "textsearchprogram.h"

// 搜索操作实现
void SearchOperation::execute(const string & word) {

// 获取单例实例并使用
ProgramDate::getInstance()->Search(word);
}

void AndOperation::execute(const string & op){

// 获取单例实例并使用
ProgramDate::getInstance()->operator&(op);
}

void OrOperation::execute(const string & op){

// 获取单例实例并使用
ProgramDate::getInstance()->operator|(op);
}

void NotOperation::execute(const string & op){

// 获取单例实例并使用
ProgramDate::getInstance()->operator~();
}

// 工厂类
bool IsNeedOp (char & op){
switch (op){
case '~':
case '|':
case '&':
return true;
default:
return false;
}
}
void Processing::operator()(vector<string>& args) {

do {
char op = args.back()[0];
std::unique_ptr <Operation> operation (createOperation(op));
if(operation){
operation->execute(args.back());
} else {
std::cerr << "操作有误" << endl;
}
args.pop_back();
} while(args.size() > 1);
ProgramDate::getInstance()->print(args[0]);
}

//考虑传入格式
//单词A & 单词B
//单词A | 单词B
//~ 单词A
//单词A
// 根据参数决定创建哪种操作
Operation * Processing::createOperation(const char & arg) {
switch (arg) {
case '&':
return new AndOperation();
case '|':
return new OrOperation();
case '~':
return new NotOperation();
default:
return new SearchOperation();
}
}

1.5 programbegin.cpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#include "textsearchprogram.h"

// 转换单词为小写
void toLower(string & word) {
for(auto & it : word){
if(it >= 'A' && it <= 'Z'){
it = std::tolower(it);
}
}
}
// 清洗单词,移除前后标点符号,并且将标点符号放入
void cleanWord(string & word, int & linenum) {
if (word.empty()) return;
toLower(word);
// 移除开头的非字母数字字符
ProgramDate * programdate = ProgramDate::getInstance();
auto it = word.begin();
string newword;
while(it != word.end()){
if(!isalnum(*it)){
if(!newword.empty()){
programdate->MapUpdate(newword, linenum);
}
programdate->MapUpdate(string(1, *it), linenum);
newword.clear();
}
newword += *it;
++ it;
}
if(!newword.empty()){
programdate->MapUpdate(word, linenum);
}
}

int ProgramBegin::operator()(const string & filename) {

ProgramDate * programdate = ProgramDate::getInstance();
programdate->Init();
ifstream file (filename);
if(!file.good()){
std::cerr << "Filed open " << filename << endl;
return -1;
}

string line;
int linenum = 0;
for(;getline(file, line); ++ linenum){
programdate->VectorUpdate(line);
istringstream iss(line);

string word;
while(iss >> word) {
cleanWord(word, linenum);
}
}

static int times = 0;
if(!file.eof()){
if(++times > 3){
std::cerr << "Failed to try " << times << " to read " << filename << endl;
file.close();
return -1;
};
file.close();
operator()(filename);
}

file.close();
cout << filename << "一共" << linenum << "行" << endl;
return 0;
}

1.6 programdate.cpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#include "textsearchprogram.h"

// 重载>=运算符:比较当前运算符与另一个运算符的优先级
bool CompareOperation::operator >= (char rc) { // 修复:移除引用&
return num >= mapoperation[rc];
}

// 优先级规则:~ > & > |(数字越大优先级越高)
map<char, int> CompareOperation::mapoperation = {
{'~', 3},
{'&', 2},
{'|', 1},
{'(', 0}, // 左括号优先级最低,仅用于控制范围
{')', 0} // 右括号优先级最低
};

void InsertVector (vector <string> & args, vector <char> & operation, char & c){
// 左括号直接入栈
if (c == '(') {
operation.push_back(c);
return;
}

// 右括号:弹出到左括号(不含括号)
if (c == ')') {
while (!operation.empty() && operation.back() != '(') {
args.emplace_back(1, operation.back());
operation.pop_back();
}
if (!operation.empty()) operation.pop_back(); // 弹出左括号
return;
}

// 处理普通运算符:按优先级入栈
CompareOperation cop(c);
while (!operation.empty() && operation.back() != '(' && !(cop >= operation.back())) {
args.emplace_back(1, operation.back());
operation.pop_back();
}
operation.push_back(c);
}

// 拆分命令行参数, 并且以后缀表达式方式存放
void ProgramDate::splitCommand(vector <string> & args) {

args.clear(); //0位置是全部的命令
string input;
cout << "请输入命令:";
cin >> input;

args.emplace_back(input);
vector <char> operation;
string currentToken; // 用于累积当前单词

for (auto & c : input) {
switch (c){
case '$':
case '@':
args.emplace_back(1, c);
break;
case '(':
case ')':
case '&':
case '|':
case '~':
if (!currentToken.empty()) { // 避免连续空格导致空字符串
args.emplace_back(currentToken);
currentToken.clear();
}
InsertVector(args, operation, c);
break;
case ' ':
if (!currentToken.empty()) { // 避免连续空格导致空字符串
args.emplace_back(currentToken);
currentToken.clear();
}
break;
default:
currentToken += c;
break;
}
}

// 处理最后一个单词(如果输入不以空格结束)
if (!currentToken.empty()) {
args.push_back(currentToken);
}
// 处理剩余的运算符
while (!operation.empty()) {
args.emplace_back(1, operation.back());
operation.pop_back();
}
args.shrink_to_fit();
}


ProgramDate * ProgramDate::programdate = nullptr;

void ProgramDate::Init (){
_filecontent.clear();
_wordmap.clear();
args = stack<WordDate>();
}

ProgramDate * ProgramDate::getInstance() {
if (programdate == nullptr) {
programdate = new ProgramDate();
}
atexit([](){
if(programdate != nullptr){
delete programdate;
programdate = nullptr;
}
});
return programdate;
}

//搜索单词
void ProgramDate::Search(const string & word){
cout << "放置结果" << word << endl;
if(_wordmap.count(word)){
args.emplace(_wordmap[word]);
} else {
args.emplace(WordDate());
}
}

// 打印
void ProgramDate::print(const string &word) {
if(args.size() > 1){
cout << args.size() << endl;
std::cerr << "命令错误" << endl;
} else {
cout << "Executing Query for: " << word << endl;
cout << "occurs " << args.top().count << " times" << endl;
for (auto & linenum : args.top().linenums) {
cout << "(line" << linenum + 1 << ") " << _filecontent[linenum] << endl;
}
}
args = stack <WordDate> ();
}

void ProgramDate::VectorUpdate (const string & line){
_filecontent.emplace_back(line);
}

void ProgramDate::MapUpdate (const string & word, const int & linenum) {
programdate->_wordmap[word].Update(linenum);
}

void ProgramDate::operator & (const string & op){
cout << "逻辑" << op << endl;
if(args.size() < 2){
//std::cerr << "命令错误" << endl;
//args = stack <WordDate> ();
this->Search(op);
return;
}
auto temporaryaid = args.top();
args.pop();
auto lbegin = args.top().linenums.begin();
auto rbegin = temporaryaid.linenums.begin();
while (lbegin != args.top().linenums.end() && rbegin != temporaryaid.linenums.end()) {
if(*lbegin < *rbegin){
lbegin = args.top().linenums.erase(lbegin);
} else if (*lbegin > *rbegin) {
++ rbegin;
} else {
++ lbegin;
++ rbegin;
}
}
if(lbegin != args.top().linenums.end()){
args.top().linenums.erase(lbegin, args.top().linenums.end());
}
args.top().count = args.top().linenums.size();
}

void ProgramDate::operator | (const string & op){
cout << "逻辑" << op << endl;
if(args.size() < 2){
this->Search(op);
//std::cerr << "命令错误" << endl;
//args = stack <WordDate> ();
return;
}
auto temporaryaid = args.top();
args.pop();
if(temporaryaid.linenums != args.top().linenums){
for(auto & it : temporaryaid.linenums){
args.top().linenums.emplace(it);
}
args.top().count = args.top().linenums.size();
}
}

void ProgramDate::operator ~ (){
cout << "逻辑~" << endl;
if(args.size() < 1){
this->Search("~");
//std::cerr << "命令错误" << endl;
//args = stack <WordDate> ();
return;
}
// 取反运算:保留不在结果中的行号
set<int> newresult;
if(args.top().linenums.size() != _filecontent.size()){
for (int i = 0; i < (int)_filecontent.size(); ++i) {
if (args.top().linenums.find(i) == args.top().linenums.end()) {
newresult.insert(i);
}
}

}
args.top().linenums.swap(newresult);
args.top().count = args.top().linenums.size();
}

bool ProgramDate::isempty(){
return _filecontent.size();
}

1.7 CmakeLists.txt

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
cmake_minimum_required(VERSION 3.16)

# 项目名称(可手动修改为自定义名称,避免中文/特殊字符)
set(PROJECT_NAME "project")
project(${PROJECT_NAME} LANGUAGES CXX)

# 设置C++标准(根据需求修改:11/14/17/20)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# 输出目录配置(统一管理编译产物,不污染源码)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) # 可执行文件 - build/bin
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) # 动态库→build/lib
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) # 静态库→build/lib

# -------------------------- 自动扫描文件 --------------------------
# 递归查找所有C++源文件(.cpp和.cc)
file(GLOB_RECURSE SOURCE_FILES
${PROJECT_SOURCE_DIR}/*.cpp
${PROJECT_SOURCE_DIR}/*.cc
)
# 排除非源码目录(关键!避免CMake临时文件干扰)
list(FILTER SOURCE_FILES EXCLUDE REGEX ".*/CMakeFiles/.*")
list(FILTER SOURCE_FILES EXCLUDE REGEX ".*/build/.*")
list(FILTER SOURCE_FILES EXCLUDE REGEX ".*/.git/.*")

# 递归查找所有头文件(.h和.hpp)
file(GLOB_RECURSE HEADER_FILES
${PROJECT_SOURCE_DIR}/*.h
${PROJECT_SOURCE_DIR}/*.hpp
)
# 排除非源码目录
list(FILTER HEADER_FILES EXCLUDE REGEX ".*/CMakeFiles/.*")
list(FILTER HEADER_FILES EXCLUDE REGEX ".*/build/.*")
list(FILTER HEADER_FILES EXCLUDE REGEX ".*/.git/.*")

# 自动添加头文件目录(无需手动写include_directories)
foreach(HEADER ${HEADER_FILES})
get_filename_component(HEADER_DIR ${HEADER} DIRECTORY)
list(APPEND INCLUDE_DIRS ${HEADER_DIR})
endforeach()
list(REMOVE_DUPLICATES INCLUDE_DIRS)
include_directories(${INCLUDE_DIRS})

# -------------------------- 构建配置 --------------------------
if(SOURCE_FILES)
# 生成可执行文件(名称=项目名)
add_executable(${PROJECT_NAME} ${SOURCE_FILES} ${HEADER_FILES})

# 编译警告:抑制未使用参数,保留关键检查
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
target_compile_options(${PROJECT_NAME} PRIVATE
-Wall
-Wextra
-Wpedantic
-Werror=return-type
-Wno-unused-parameter # 解决main函数argc/argv警告
)
endif()

# -------------------------- 库链接区域 --------------------------
# 示例1:链接系统动态库(pthread)
# target_link_libraries(${PROJECT_NAME} PRIVATE pthread)
# 示例2:链接自定义动态库
# target_link_libraries(${PROJECT_NAME} PRIVATE /path/to/your/lib.so)
# 示例3:链接静态库
# target_link_libraries(${PROJECT_NAME} PRIVATE /path/to/your/lib.a)
# -------------------------------------------------------------------

else()
message(WARNING "⚠️ 未找到任何.cpp或.cc源文件,请检查项目目录")
endif()

# 显示扫描结果(方便排查问题)
message(STATUS "📁 项目目录: ${PROJECT_SOURCE_DIR}")
message(STATUS "🔍 找到源文件数量: ${CMAKE_ARGC}")
message(STATUS "🔍 找到头文件目录: ${INCLUDE_DIRS}")