Back to Info Hunter. The last time I worked on debugging the program, I originally 2 issues:
The scraping was taking too long.
The information text was getting moved to the top of the window because the layout() method was not being called for some reason.
I also found a problem where the application was not being stopped properly.
After running some tests, it looks like the issues have been fixed but I found another issue which I will explain later.
Where the problems are
The problematic part of the code is located in the StartScraping method. As multiple threads use it, problems might appear. These are the following changes I made:
I added conditional statements so that each thread will skip the operation if an issue with the connection appears or it has been canceled.
I removed the threads.clear() method in the locked code and added it to the end of the method in a conditional statement to be executed by the last thread only.
I added a new static bool variable in the Scraper object named hasDisconnected to control the flow of the operation in a similar way to cancelation.
Refactored the code to remove needed and duplicated code.
How the method looks now
// Start Scraping function
void MainFrame::StartScraping(int amount, int counter, std::vector<std::string> keywords,
std::vector<std::string> getUrls)
{
m.lock();
if (Scraper::isCanceled || Scraper::hasDisconnected)
{
// if (!threads.empty())
// {
// threads.clear();
// }
if (scrapingInfoText != nullptr) {
scrapingInfoText->Destroy();
scrapingInfoText = nullptr;
}
content->SetFont(wxFontInfo(32).FaceName("Helvetica Neue").Bold());
scrapingInfoText = new wxStaticText(MainFrame::content, wxID_ANY,
"Please wait while stopping.",
wxDefaultPosition, wxDefaultSize);
scrapingInfoSizer = new wxBoxSizer(wxVERTICAL);
scrapingInfoSizer->Add(scrapingInfoText, 0, wxCENTER);
runContentHolder->Add(scrapingInfoSizer, 1, wxEXPAND);
content->SetSizer(runContentHolder);
content->Layout();
operationCounter++;
}
else
{
std::vector<std::string> scraperKeywords;
scraperKeywords.reserve(amount);
for (int j = 0; j < amount; j++) {
scraperKeywords.push_back(keywords[j]);
}
if (scrapingInfoText != nullptr) {
scrapingInfoText->Destroy();
scrapingInfoText = nullptr;
}
content->SetFont(wxFontInfo(32).FaceName("Helvetica Neue").Bold());
Scraper::SetupScraper(scraperKeywords, getUrls[counter]);
scrapingInfoText = new wxStaticText(MainFrame::content, wxID_ANY,
std::string("Currently checking: ") +
std::string(getUrls[counter]),
wxDefaultPosition, wxDefaultSize);
scrapingInfoSizer = new wxBoxSizer(wxVERTICAL);
scrapingInfoSizer->Add(scrapingInfoText, 0, wxCENTER);
runContentHolder->Add(scrapingInfoSizer, 1, wxEXPAND);
content->SetSizer(runContentHolder);
content->Layout();
cpr::Response r = Scraper::request_info(Scraper::baseURL);
std::string convertToLowerCase = boost::locale::to_lower(r.text);
std::vector<std::string> urls = Scraper::ParseContent(convertToLowerCase);
// Iterate through the urls
for (const std::string &item: urls) {
if (!Scraper::CheckForConnection()) {
wxMessageBox("You have been disconnected from the internet", "",
wxOK);
if (scrapingInfoText != nullptr) {
scrapingInfoText->Destroy();
scrapingInfoText = nullptr;
}
Scraper::hasDisconnected = true;
scrapingState = SST_Waiting;
break;
}
if (Scraper::isCanceled) {
break;
}
AnalyzePages::analyzeEntry(item, scraperKeywords, scraper);
}
// if (scrapingInfoText != nullptr) {
// scrapingInfoText->Destroy();
// scrapingInfoText = nullptr;
// }
operationCounter++;
}
m.unlock();
if (Scraper::isCanceled && operationCounter >= operationSize ||
Scraper::hasDisconnected && operationCounter >= operationSize)
{
operationCounter = 0;
operationSize = 0;
if (scrapingInfoText != nullptr) {
scrapingInfoText->Destroy();
scrapingInfoText = nullptr;
}
Scraper::hasDisconnected = false;
Scraper::isCanceled = false;
AnalyzePages::hasStarted = false;
scrapingState = SST_Waiting;
wxMessageBox("Operation has been canceled.", "",wxOK);
return;
}
if (operationCounter >= operationSize)
{
if (!threads.empty())
{
threads.clear();
}
if (scrapingInfoText != nullptr) {
scrapingInfoText->Destroy();
scrapingInfoText = nullptr;
}
operationCounter = 0;
operationSize = 0;
scrapingState = SST_Waiting;
AnalyzePages::hasStarted = false;
wxMessageBox("Operation has been completed.", "", wxOK);
return;
}
if (!threads.empty())
{
threads.clear();
}
}
It keeps getting better and better which is good.
A new bug
On today's test, I noticed that the same keywords were being searched on every website. This is a critical bug to fix and I need to focus on fixing it tomorrow.