Skip to content

Instantly share code, notes, and snippets.

@matsumoto-k4zuki
Created July 5, 2018 11:57
Show Gist options
  • Select an option

  • Save matsumoto-k4zuki/76439737e73104502cc33222ccea47c4 to your computer and use it in GitHub Desktop.

Select an option

Save matsumoto-k4zuki/76439737e73104502cc33222ccea47c4 to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"unicode/utf8"
)
func main() {
// 投稿した記事の一覧ページのリンクから、記事のリンクを取得
doc, err := goquery.NewDocument("https://www.fox-hound.tech/author/kazuki-matsumoto/")
if err != nil {
fmt.Println(err)
}
// 投稿した記事のリンクのurlを配列に格納
var urls []string
doc.Find(".title a").Each(func(_ int, s *goquery.Selection) {
url, _ := s.Attr("href")
urls = append(urls, url)
})
//fmt.Println(urls)
for i := 0; i < len(urls); i++ {
fmt.Println(urls[i])
doc, err := goquery.NewDocument(urls[i])
if err != nil {
fmt.Print("url scarapping failed")
}
title := doc.Find(".post--item .title h2").Text()
fmt.Println(title)
text := doc.Find(".post--item > div.post--content").Text()
// 文字数をカウント(日本語を含むため、utf8.RuneCountInStringを利用)
fmt.Printf("文字数: %d 字\n\n", utf8.RuneCountInString(text))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment