这里只解析一下代码,所需工具jsoup、HttpClient
httpCLient获取html后,用jsoup解析html,再用java来获取所需要的信息。
之前写的有点问题,今天改了一下。因为通过hash值拼接的地址是个临时地址,存在数据库后一天就失效了,所以我改了一下。先把爬到的歌曲下载到本地,然后上传到七牛云的对象存储空间,再返回这个地址。最后把七牛云上的地址存到数据库中,这样就是永久的了。

HTTPManage:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
package com.after.demo.spider;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
* @author www.xyjz123.xyz
* @description
* @date 2019/4/19 19:06
*/
public class HtmlManage {

public Document manage(String html){
Document doc = Jsoup.parse(html);
return doc;
}
}

HttpGetConnect:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
package com.after.demo.spider;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.BasicHttpClientConnectionManager;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
/**
* @author www.xyjz123.xyz
* @description
* @date 2019/4/19 19:05
*/
public class HttpGetConnect {

/**
* 获取html内容
* @param url
* @param charsetName UTF-8、GB2312
* @return
* @throws IOException
*/
public static String connect(String url,String charsetName) throws IOException{
BasicHttpClientConnectionManager connManager = new BasicHttpClientConnectionManager();

CloseableHttpClient httpclient = HttpClients.custom()
.setConnectionManager(connManager)
.build();
String content = "";

try{
HttpGet httpget = new HttpGet(url);

RequestConfig requestConfig = RequestConfig.custom()
.setSocketTimeout(5000)
.setConnectTimeout(50000)
.setConnectionRequestTimeout(50000)
.build();
httpget.setConfig(requestConfig);
httpget.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
httpget.setHeader("Accept-Encoding", "gzip,deflate,sdch");
httpget.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
httpget.setHeader("Connection", "keep-alive");
httpget.setHeader("Upgrade-Insecure-Requests", "1");
httpget.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36");
httpget.setHeader("cache-control", "max-age=0");

CloseableHttpResponse response = httpclient.execute(httpget);

int status = response.getStatusLine().getStatusCode();
if (status >= 200 && status < 300) {

HttpEntity entity = response.getEntity();
InputStream instream = entity.getContent();
BufferedReader br = new BufferedReader(new InputStreamReader(instream,charsetName));
StringBuffer sbf = new StringBuffer();
String line = null;
while ((line = br.readLine()) != null){
sbf.append(line + "\n");
}

br.close();
content = sbf.toString();
} else {
content = "";
}

}catch(Exception e){
e.printStackTrace();
}finally{
httpclient.close();
}
//log.info("content is " + content);
return content;
}
private static Log log = LogFactory.getLog(HttpGetConnect.class);
}

MusicController:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
package com.after.demo.controller;

import com.after.demo.entity.Music;
import com.after.demo.service.impl.MusicServiceImpl;
import com.after.demo.service.impl.UploadServiceImpl;
import com.after.demo.spider.FileDownload;
import com.after.demo.spider.HtmlManage;
import com.after.demo.spider.HttpGetConnect;
import com.after.demo.utils.GetString;
import com.after.demo.utils.JsonResult;
import com.google.gson.Gson;
import com.qiniu.common.QiniuException;
import com.qiniu.http.Response;
import com.qiniu.storage.model.DefaultPutRet;
import io.swagger.annotations.ApiOperation;
import net.sf.json.JSONObject;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RestController;

import java.io.File;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* @author www.xyjz123.xyz
* @description
* @date 2019/4/19 21:18
*/
@RestController
public class MusicController {

@Autowired
MusicServiceImpl musicService;
@Autowired
UploadServiceImpl uploadService;

public static String FILEPATH = "F:/music/";
public static String mp3 = "https://wwwapi.kugou.com/yy/index.php?r=play/getdata&callback=jQuery191027067069941080546_1546235744250&"
+ "hash=HASH&album_id=0&_=TIME";

public static final String LINK = "https://www.kugou.com/yy/rank/home/PAGE-33164.html?from=rank";

@GetMapping("/music/save")
@ApiOperation("将酷狗歌单爬取存入数据库")
public JsonResult saveMusic() throws IOException{
for(int i = 1 ; i < 10 ; i++){
String url = LINK.replace("PAGE", i + "");
getTitle(url);
}
return JsonResult.ok();
}

@PostMapping("/music/getOne")
@ApiOperation("随机获取一首歌")
public JsonResult getMusic(){
int id = GetString.getId();
Music music = musicService.getMusicById(id);
return JsonResult.ok(music);
}

public String getTitle(String url) throws IOException {
String content = HttpGetConnect.connect(url, "utf-8");
HtmlManage html = new HtmlManage();
Document doc = html.manage(content);
Element ele = doc.getElementsByClass("pc_temp_songlist").get(0);
Elements eles = ele.getElementsByTag("li");
for(int i = 0 ; i < eles.size() ; i++){
Element item = eles.get(i);
String title = item.attr("title").trim();
String link = item.getElementsByTag("a").first().attr("href");
download(link,title);
}
return null;
}

public String download(String url,String name) throws IOException{
String hash = "";
String content = HttpGetConnect.connect(url, "utf-8");
HtmlManage html = new HtmlManage();
String regEx = "\"hash\":\"[0-9A-Z]+\"";
// 编译正则表达式
Pattern pattern = Pattern.compile(regEx);
Matcher matcher = pattern.matcher(content);
if (matcher.find()) {
hash = matcher.group();
hash = hash.replace("\"hash\":\"", "");
hash = hash.replace("\"", "");
}
//爬取歌曲的封面图
Document doc = html.manage(content);
Element ele = doc.getElementsByClass("albumImg").get(0);
String imgUrl = ele.getElementsByTag("img").attr("src");
//利用hash值构造歌曲mp3地址
String item = mp3.replace("HASH", hash);
item = item.replace("TIME", System.currentTimeMillis() + "");

String mp = HttpGetConnect.connect(item, "utf-8");

mp = mp.substring(mp.indexOf("(") + 1, mp.length() - 3);

JSONObject json = JSONObject.fromObject(mp);
String playUrl = json.getJSONObject("data").getString("play_url");

System.out.println(playUrl);
FileDownload fileDownload = new FileDownload();
fileDownload.download(playUrl,FILEPATH + name + ".mp3");

String src = null;
try{
File file = new File(FILEPATH + name + ".mp3");
Response response = uploadService.uploadFile(file);
//解析上传成功的结果
DefaultPutRet putRet = new Gson().fromJson(response.bodyString(), DefaultPutRet.class);
src = "http://www.jie12366.xyz/" + putRet.key;
}catch (QiniuException e){
e.printStackTrace();
}

//如果图片地址或mp3地址为空,则不爬取(歌曲是收费的无法爬取)
if (StringUtils.isNotBlank(src) && StringUtils.isNotBlank(imgUrl)){
musicService.saveMusic(name,imgUrl,src);
}
return playUrl;
}
}

新增FileDownload类:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
package com.after.demo.spider;

import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;

/**
* @author www.xyjz123.xyz
* @description
* @date 2019/4/24 16:37
*/
public class FileDownload {

/**
* 文件下载
* @param url 链接地址
* @param path 要保存的路径及文件名
* @return
*/
public void download(String url,String path){
CloseableHttpClient httpclient = HttpClients.createDefault();
RequestConfig requestConfig = RequestConfig.custom().setSocketTimeout(2000)
.setConnectTimeout(2000).build();

HttpGet get = new HttpGet(url);
get.setConfig(requestConfig);

BufferedInputStream in = null;
BufferedOutputStream out = null;
try{
for(int i=0;i < 3;i++){
CloseableHttpResponse result = httpclient.execute(get);
if(result.getStatusLine().getStatusCode() == 200){
in = new BufferedInputStream(result.getEntity().getContent());
File file = new File(path);
out = new BufferedOutputStream(new FileOutputStream(file));
byte[] buffer = new byte[1024];
int len = -1;
while((len = in.read(buffer,0,1024)) > -1){
out.write(buffer,0,len);
}
break;
}else if(result.getStatusLine().getStatusCode() == 500){
continue ;
}
}

}catch(Exception e){
e.printStackTrace();
}finally{
get.releaseConnection();
try{
if(in != null){
in.close();
}
if(out != null){
out.close();
}
}catch(Exception e){
e.printStackTrace();
}
}
}
}

entiry:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
package com.after.demo.entity;

import com.gitee.sunchenbin.mybatis.actable.annotation.Column;
import com.gitee.sunchenbin.mybatis.actable.annotation.Table;
import com.gitee.sunchenbin.mybatis.actable.constants.MySqlTypeConstant;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

/**
* @author www.xyjz123.xyz
* @description
* @date 2019/4/19 19:20
*/
@Data
@AllArgsConstructor
@NoArgsConstructor
@Table(name = "music")
public class Music {

@Column(name = "id",type = MySqlTypeConstant.INT,isKey = true,isAutoIncrement = true,length = 5)
private int id;

@Column(name = "name",type = MySqlTypeConstant.VARCHAR,isUnique = true)
private String name;

@Column(name = "imgUrl",type = MySqlTypeConstant.VARCHAR,length = 80)
private String imgUrl;

@Column(name = "src",type = MySqlTypeConstant.VARCHAR)
private String src;
}

mapper:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
package com.after.demo.mapper;

import com.after.demo.entity.Music;
import org.apache.ibatis.annotations.Insert;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Select;

import java.util.List;

/**
* @author www.xyjz123.xyz
* @description
* @date 2019/4/19 21:03
*/
@Mapper
public interface MusicMapper {

/**
* 将爬取的歌曲信息存入数据库
* @param name 歌曲名
* @param imgUrl 歌曲封面
* @param src 歌曲地址
* @return 是否成功
*/
@Insert("insert into music(name,imgUrl,src) values(#{name},#{imgUrl},#{src})")
int saveMusic(String name,String imgUrl,String src);

/**
* 获取数据库中的歌曲信息
* @return list
*/
@Select("select * from music")
List<Music> listMusic();

/**
* 根据id随机获取一首歌
* @param id int
* @return Music
*/
@Select("select * from music where id=#{id}")
Music getMusicById(int id);
}

srerivce:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
package com.after.demo.service.impl;

import com.after.demo.entity.Music;
import com.after.demo.mapper.MusicMapper;
import com.after.demo.service.MusicService;
import com.after.demo.utils.GetString;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.CacheConfig;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.stereotype.Service;

import java.util.List;

/**
* @author www.xyjz123.xyz
* @description
* @date 2019/4/19 21:09
*/
@Service
@CacheConfig
public class MusicServiceImpl implements MusicService {

@Autowired
MusicMapper musicMapper;

@Override
public int saveMusic(String name, String imgUrl, String src) {
List<Music> musicList = musicMapper.listMusic();
for (Music music:musicList){
if (music.getName().equals(name)){
return 0;
}
}
return musicMapper.saveMusic(name,imgUrl,src);
}

@Override
@Cacheable(value = "music")
public List<Music> listMusic() {
return musicMapper.listMusic();
}

@Override
public Music getMusicById(int id) {
int maxSize = GetString.MAXSIZE;
if (id <= maxSize){
return musicMapper.getMusicById(id);
}
return null;
}
}

utils:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
package com.after.demo.utils;

/**
* @author 熊义杰
* @date 2019-3-16
*/

public class GetString {

public static final int MAXSIZE = 165;

public static int getId(){
int id = (int)(Math.random() * MAXSIZE);
return id;
}
}

数据库效果:
在这里插入图片描述

随机获取一首歌:
在这里插入图片描述