运行老版微博官网抽取评论
var res = "评论内容\t大V标志\t时间+地点\t点赞数\n";
function sleep(delay)
{
var start = new Date().getTime();
while (new Date().getTime() < start + delay);
}
function downLoadDataToLoc(saveDatas) {
var filename = document.querySelector("title").text.substring(1,10);
// 上面是拿到我自己的数据,数据的格式是Json字符串
var blob = new Blob([saveDatas], {type: 'application/txt'})
// 创建一个blob的对象,把Json转化为字符串作为我们的值
if("msSaveOrOpenBlob" in navigator){
// 这个判断要不要都行,如果是IE浏览器,使用的是这个,
window.navigator.msSaveOrOpenBlob(blob, filename+".txt");
} else { // 不是IE浏览器使用的下面的
var url = window.URL.createObjectURL(blob)
// 上面这个是创建一个blob的对象连链接,
var link = document.createElement('a')
// 创建一个链接元素,是属于 a 标签的链接元素,所以括号里才是a,
link.href = url;
// 把上面获得的blob的对象链接赋值给新创建的这个 a 链接
link.setAttribute('download', filename+".txt")
// 设置下载的属性(所以使用的是download),这个是a 标签的一个属性
// 后面的是文件名字,可以更改
link.click();
// 使用js点击这个链接
}
}
var k = 10;
var myVar=setInterval(function(){
// k--;
let cli;
window.scrollTo(5000, 5000);
sleep(1000);
let ul = document.querySelector(".list_ul > div[node-type='root_comment']");
while(ul != undefined&&ul!=null){
//删除二级评论
document.querySelector(".list_ul > div[node-type='root_comment'] div.list_box_in").remove();
//获取评论内容
let x = document.querySelector(".list_ul > div[node-type='root_comment'] div.WB_text");
res += x.outerText.split(":")[1]+'\t ';
//判断是否有大V标志
if(x.querySelector("a[suda-data='key=pc_apply_entry&value=feed_icon']")!=null){
res+='微博个人认证 \t ';
}else{
res+='false \t ';
}
//时间
let y = document.querySelector(".list_ul > div[node-type='root_comment'] div[class='WB_from S_txt2']");
res+=y.innerText+"\t ";
//点赞数
let z = document.querySelector(".list_ul > div[node-type='root_comment'] span[class='line S_line1'] em:last-child");
res+=z.innerText+"\t ";
//删除评论
document.querySelector(".list_ul > div[node-type='root_comment']").remove();
sleep(10);
ul = document.querySelector(".list_ul > div[node-type='root_comment']");
res+='\n';
}
cli = document.querySelector("a[action-type='click_more_comment']");
console.info(cli+"return:"+res.length);
if(cli==null||cli==undefined){
downLoadDataToLoc(res);
// sleep(1000);
res="";
clearTimeout(myVar);
//alert(res);
}
document.querySelector("a[action-type='click_more_comment']").click();
},3000);
封装数据
package com.example.httpcilent;
import java.io.*;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
public class Clean {
private static String filename = "C:\\Users\\18745\\Desktop\\1.txt";
public static void main(String[] args) throws IOException, ParseException {
File file = new File(filename);
BufferedReader reader = new BufferedReader(new FileReader(file));
String s = reader.readLine();
List<Commons> result = new ArrayList<>();
while((s = reader.readLine())!=null){
String[] split = s.split("\t");
for (String s1 : split) {
System.out.println(s1);
}
System.out.println("-----------------");
//if(true)continue;
Commons commons = new Commons();
int tail = split.length-2;
//点赞数量
String number = stringTONums(split[tail]);
if(split[tail--].indexOf("赞")!=-1)commons.setLike(0);
else if(number.length()!=0)commons.setLike(Integer.parseInt(number));
else{
throw new IOException("点赞数量有误");
}
String[] dataCity = split[tail--].split(" ");
//城市
//System.out.println("city:"+dataCity[dataCity.length-1]);
commons.setCity(dataCity[dataCity.length-1].substring(2));
System.out.println("dateCity"+dataCity.length);
for(int d = 0;d <dataCity.length;d++){
System.out.println("--------"+dataCity[d]);
}
//时间
String date = "";
Calendar now = Calendar.getInstance();
if(dataCity[1].indexOf("今天")!=-1){
date = String.valueOf(now.get(Calendar.YEAR))+"年"+String.valueOf(now.get(Calendar.MONTH))+"月"+String.valueOf(now.get(Calendar.DATE))+"日";
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy年MM月dd日 HH:mm");
commons.setDate(simpleDateFormat.parse(date+" "+dataCity[2]));
}else if(dataCity[1].indexOf("分钟前")!=-1){
Integer s1 = Integer.parseInt(stringTONums(dataCity[1]));
now.setTimeInMillis(System.currentTimeMillis()-s1*60*1000);
commons.setDate(now.getTime());
}else {
}
//是否为大V
if("false".equals(split[tail--].replace(" ",""))){
commons.setV(false);
}else commons.setV(true);
//文本
for(int k = 1;k<tail;k++){
split[0]+=split[k];
}
commons.setText(split[0]);
result.add(commons);
}
File file1 = new File("C:\\Users\\18745\\Desktop\\output.txt");
printFile(result, file1);
//close
reader.close();
}
private static void printFile(List<Commons> result, File file1) throws IOException {
FileWriter fileWriter = new FileWriter(file1);
for (Commons commons : result) {
fileWriter.write(commons.getText()+"\n");
}
}
private static String stringTONums(String s) {
StringBuffer number = new StringBuffer();
//System.out.println(":"+split[tail]);
for(int i = 0; i< s.length(); i++){
if(s.charAt(i)>='0'&& s.charAt(i)<='9')number.append(s.charAt(i));
}
return number.toString();
}
private static class Commons{
private String text; //评论内容
private boolean isV; //是否为大V
private Date date; //发表日期
private String city; //城市
private Integer like; //点赞数量
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
public boolean isV() {
return isV;
}
public void setV(boolean v) {
isV = v;
}
public Date getDate() {
return date;
}
public void setDate(Date date) {
this.date = date;
}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
public Integer getLike() {
return like;
}
public void setLike(Integer like) {
this.like = like;
}
@Override
public String toString() {
return "Commons{" +
"text='" + text + '\'' +
", isV=" + isV +
", date=" + date +
", city='" + city + '\'' +
", like=" + like +
'}';
}
}
}