刘一辰的软件工程随笔
今日目标:代码总结
今日总结:
import requests
import pymysql
from bs4 import BeautifulSoup
db = pymysql.connect('127.0.0.1',
port=3306,
user='root',
password='123',
db='mytest',
charset='utf8')
cursor = db.cursor()
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
}
url="http://openaccess.thecvf.com/CVPR2019.py"
html=requests.get(url)
soup=BeautifulSoup(html.content,'html.parser')
soup.a.contents=='pdf'
pdfs=soup.findAll(name="a",text="pdf")
lis = []
jianjie=""
for i,pdf in enumerate(pdfs):
pdf_name=pdf["href"].split('/')[-1]
name=pdf_name.split('.')[0].replace("_CVPR_2019_paper","")
link="http://openaccess.thecvf.com/content_CVPR_2019/html/"+name+"_CVPR_2019_paper.html"
url1=link
html1 = requests.get(url1)
soup1 = BeautifulSoup(html1.content, 'html.parser')
weizhi = soup1.find('div', attrs={'id':'abstract'})
if weizhi:
jianjie =weizhi.get_text();
print("这是第"+str(i)+"条数据")
keyword = str(name).split('_')
keywords = ''
for k in range(len(keyword)):
if (k == 0):
keywords += keyword[k]
else:
keywords += ',' + keyword[k]
info = {}
info['title'] = name
info['link'] =link
info['abstract']=jianjie
info['keywords']=keywords
lis.append(info)
cursor = db.cursor()
for i in range(len(lis)):
cols = ", ".join('`{}`'.format(k) for k in lis[i].keys())
print(cols) # '`name`, `age`'
val_cols = ', '.join('%({})s'.format(k) for k in lis[i].keys())
print(val_cols) # '%(name)s, %(age)s'
sql = "insert into lunwen(%s) values(%s)"
res_sql = sql % (cols, val_cols)
print(res_sql)
cursor.execute(res_sql, lis[i]) # 将字典a传入
db.commit()
num=1
print(num)
print("成功")
二、分析、查找关键词
借助Map存储关键词, key为关键词,value为出现的次数。遍历到相同的关键词value值+1,然后根据value值排序。
dao层:
package dao;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.stream.Collectors;
import Bean.copy.*;
import jdbc.Util;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
import com.sun.xml.internal.ws.policy.privateutil.PolicyUtils.Collections;
public class Dao {
public static Mapgetrc()
{
String sql="select * from lunwen";
Mapmap= new HashMap ();
Mapresults= new LinkedHashMap ();
Connection con=null;
Statement state=null;
ResultSet rs=null;
con=Util.getConn();
try {
state=con.createStatement();
rs=state.executeQuery(sql);
while(rs.next())
{
String keywords=rs.getString("keywords");
String[] split = keywords.split(",");
for(int i=0;i {if(map.get(split[i])==null){map.put(split[i],0);}else{map.replace(split[i], map.get(split[i])+1);}}}} catch (SQLException e) {// TODO Auto-generated catch blocke.printStackTrace();}Util.close(rs, state, con);map.entrySet().stream().sorted((p1, p2) -> p2.getValue().compareTo(p1.getValue())).collect(Collectors.toList()).forEach(ele -> results.put(ele.getKey(), ele.getValue()));return results;}public List list(String keywords) { // 查询所有信息List list = new ArrayList(); // 创建集合Connection conn = Util.getConn();String sql = "select * from lunwen where keywords like "+"'%"+keywords+"%'"; // SQL查询语句try {PreparedStatement pst = conn.prepareStatement(sql);ResultSet rs = pst.executeQuery();Data data = null;while (rs.next()) {String title = rs.getString("title");String link = rs.getString("link");String as= rs.getString("abstract");data = new Data(title,link,as,keywords);list.add(data);}rs.close(); // 关闭pst.close(); // 关闭} catch (SQLException e1) {e1.printStackTrace(); // 抛出异常}return list; // 返回一个集合}}servlet层:
package servlet;import java.io.IOException;import java.util.Map;import javax.servlet.ServletException;import javax.servlet.annotation.WebServlet;import javax.servlet.http.HttpServlet;import javax.servlet.http.HttpServletRequest;import javax.servlet.http.HttpServletResponse;import dao.Dao;import net.sf.json.JSONArray;import net.sf.json.JSONObject;@WebServlet("/RcServlet")public class RcServlet extends HttpServlet {private static final long serialVersionUID = 1L;/*** @see HttpServlet#HttpServlet()*/public RcServlet() {super();// TODO Auto-generated constructor stub}/*** @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)*/protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {this.doPost(request, response);}/*** @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)*/protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {request.setCharacterEncoding("utf-8");MapsortMap=Dao.getrc(); JSONArray json =new JSONArray();int k=0;for (Map.Entryentry : sortMap.entrySet()) {JSONObject ob=new JSONObject();ob.put("name", entry.getKey());ob.put("value", entry.getValue());if(!(entry.getKey().equals("for")||entry.getKey().equals("and")||entry.getKey().equals("With")||entry.getKey().equals("of")||entry.getKey().equals("in")||entry.getKey().equals("From")||entry.getKey().equals("A")||entry.getKey().equals("to")||entry.getKey().equals("a")||entry.getKey().equals("the")||entry.getKey().equals("by"))){json.add(ob);k++;}if(k==10)break;}System.out.println(json.toString());response.getWriter().write(json.toString());}}三、生成热词汇云图
<%@ page language="java" contentType="text/html; charset=UTF-8"pageEncoding="UTF-8"%><%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%><%request.setCharacterEncoding("utf-8");response.setCharacterEncoding("utf-8");%>热词云 <script src="js/jquery-3.4.1.min.js"></script><script src="js/echarts.min.js"></script><script src="js/echarts-cloud.js"></script>#main{width: 30%;height: 500px;border:1px solid #ddd;float:right;}#table{overflow-x: auto;overflow-y: auto;width: 70%;height: 500px;float:left;margin-top:100dp;padding-top:100dp;}热词云
论文连接 ${item.title} <script type="text/javascript">var dt;$.ajax({url : "RcServlet",async : true,type : "POST",data : {},dataType : "json",success : function(data) {dt = data;var mydata = new Array(0);for (var i = 0; i < dt.length; i++) {var d = {};d["name"] = dt[i].name;d["value"] = dt[i].value;mydata.push(d);}var myChart = echarts.init(document.getElementById('main'));//设置点击效果myChart.setOption({title: {text: ''},tooltip: {},series: [{type : 'wordCloud', //类型为字符云shape:'smooth', //平滑gridSize : 8, //网格尺寸size : ['50%','50%'],//sizeRange : [ 50, 100 ],rotationRange : [-45, 0, 45, 90], //旋转范围textStyle : {normal : {fontFamily:'微软雅黑',color: function() {return 'rgb(' +Math.round(Math.random() * 255) +', ' + Math.round(Math.random() * 255) +', ' + Math.round(Math.random() * 255) + ')'}},emphasis : {shadowBlur : 5, //阴影距离shadowColor : '#333' //阴影颜色}},left: 'center',top: 'center',right: null,bottom: null,width:'100%',height:'100%',data:mydata}]});myChart.on('click', function (params) {var url = "ClickServlet?keywords=" + params.name;window.location.href = url;});alert("成功!");},error : function() {alert("请求失败");},});</script>
结果: