#include "widget.h"
#include "ui_widget.h"
Widget::Widget(QWidget *parent) :
QWidget(parent),
ui(new Ui::Widget)
{
ui->setupUi(this);
connect(ui->id_getWebCode_pushButton,SIGNAL(clicked(bool)),this,
SLOT(on_id_getWebCode_pushButton_clicked()),Qt::UniqueConnection);
connect(ui->id_divisionFile_pushButton,SIGNAL(clicked(bool)),this,
SLOT(on_id_divisionFile_pushButton_clicked()),Qt::UniqueConnection);
setWindowTitle(tr("CSDN文章爬虫"));
hasSourceFile=false; //临时初始化
webNum=0;
}
Widget::~Widget()
{
delete ui;
}
void Widget::saveURLandTilte()
{
QFile file("URL&Title.csv");
file.open(QIODevice::WriteOnly);
QTextStream out(&file);
for(int i=0;i<webNum;i++){
out<<webURLAndTitle[i][0]+";"+webURLAndTitle[i][1]<<endl;
}
}
void Widget::on_id_getWebCode_pushButton_clicked()
{
const QString URLSTR=ui->id_url_lineEdit->text();
const QString FILE_NAME=ui->id_SaveFileName_lineEdit->text();
QUrl url(URLSTR);
QEventLoop loop;
QNetworkAccessManager manager;
//发出请求
QNetworkReply *reply=manager.get(QNetworkRequest(url));
//请求结束并下载完成后,退出子事件循环
connect(reply,SIGNAL(finished()),&loop,SLOT(quit()),Qt::UniqueConnection);
//开启子事件循环
loop.exec();
//将读取到的信息写入文件
sourceCode=reply->readAll();
QFile file(FILE_NAME);
file.open(QIODevice::WriteOnly);
QTextStream out(&file);
out<<sourceCode<<endl;
if(file.size()>10){
QMessageBox::information(this,tr("提示"),tr("网站数据获取成功!"),QMessageBox::Ok);
hasSourceFile=true;
}
else{
QMessageBox::information(this,tr("提示"),tr("网站数据获取失败!"),QMessageBox::Ok);
hasSourceFile=false;
}
file.close();
if(file.size()<10){
file.remove();
}
}
void Widget::on_id_divisionFile_pushButton_clicked()
{
if(hasSourceFile){
//处理sourceCode
this->GetURLAndTitle(ui->id_URL_division_lineEdit->text(),ui->id_divisionTopTitle_linEdit->text(),ui->id_title_division_lineEdit->text());
}
else{
QMessageBox::information(this,"提示",tr("没有获取到网站数据!"),QMessageBox::Ok);
}
}
void Widget::GetURLAndTitle(QString urlLack, QString topTitleLack, QString titleLack){
QString divisionSource=sourceCode;
if(!urlLack.isEmpty()&&!titleLack.isEmpty()){
divisionSource.remove(QRegExp("\\s"));
//存一下divisionSource
QFile file("divisionSource.txt");
if(file.exists()){
file.remove();
}
file.open(QIODevice::WriteOnly);
QTextStream out(&file);
out<<divisionSource<<endl;
ui->plainTextEdit->setPlainText(divisionSource);
char *divisionSourceCodeChar;
char *divisionURL;
char *divisionTitle;
char *divisionTopTitle;
//源码转char*,urlLack转char*,titleLack转char*
QByteArray divisionStringba=divisionSource.toUtf8();
divisionSourceCodeChar=divisionStringba.data();
/*qDebug()<<"divisionSource:"<<divisionSource.length();
qDebug()<<"divisionSourceCodeChar:"<<strlen(divisionSourceCodeChar);*/
// int divisionCharLength=strlen(divisionSourceCodeChar);
// qDebug()<<"divisionCharLength:"<<divisionCharLength;
QByteArray urlDivisionBa=urlLack.toUtf8();
divisionURL=urlDivisionBa.data();
QByteArray titleDivisionTopBa=topTitleLack.toUtf8();
divisionTopTitle=titleDivisionTopBa.data();
QByteArray titleDivisionBa=titleLack.toUtf8();
divisionTitle=titleDivisionBa.data();
// qDebug()<<divisionURL;
// qDebug()<<divisionTitle;
int urlNumtemp=0; //为了添加webURLAndTitle[128][2]这个
for(int i=0;i<strlen(divisionSourceCodeChar);i++){
if(divisionSourceCodeChar[i]==divisionURL[0]){
for(int k=1;k<strlen(divisionURL);k++){
if(divisionSourceCodeChar[i+k] != divisionURL[k]){
break;
}
if(strlen(divisionURL)-1==k){
QString temp="";
for(int a=1;a<=8;a++){
QChar c=divisionSourceCodeChar[i+k+a];
temp=temp.append(c);
}
//qDebug()<<temp;
if(!temp.isEmpty()){
webURLAndTitle[urlNumtemp][0]=ui->id_URL_division_lineEdit->text()+temp;
qDebug()<<webURLAndTitle[urlNumtemp][0];
urlNumtemp++;
}
}
}
}
}
//下面是提取置顶title
int urlNum=0; //为了添加webURLAndTitle[128][2]这个
for(int i=0;i<divisionSource.length();i++){
if(divisionSource.mid(i,1)==topTitleLack.mid(0,1)){
for(int k=1;k<topTitleLack.length();k++){
if(divisionSource.mid(i+k,1) != topTitleLack.mid(k,1)){
break;
}
if(topTitleLack.length()-1==k){
QString temp="";
int a=1;
while(divisionSource.mid(i+k+a,1)!="<"){
temp=temp.append(divisionSource.mid(i+k+a,1));
a++;
}
QString temp_append="置顶:";
temp_append=temp_append.append(temp);
webURLAndTitle[urlNum][1]=temp_append;
qDebug()<<webURLAndTitle[urlNum][1];
urlNum++;
}
}
}
}
//下面是提取title
for(int i=0;i<divisionSource.length();i++){
if(divisionSource.mid(i,1)==titleLack.mid(0,1)){
for(int k=1;k<titleLack.length();k++){
if(divisionSource.mid(i+k,1) != titleLack.mid(k,1)){
break;
}
if(titleLack.length()-1==k){
QString temp="";
int a=1;
while(divisionSource.mid(i+k+a,1)!="<"){
temp=temp.append(divisionSource.mid(i+k+a,1));
a++;
}
if(!temp.isEmpty()){
webURLAndTitle[urlNum][1]=temp;
qDebug()<<webURLAndTitle[urlNum][1];
urlNum++;
}
}
}
}
}
webNum=urlNum;
saveURLandTilte();
}
else{
QMessageBox::information(this,"提示",tr("两个TextEdit未填写"),QMessageBox::Ok);
}
}