[爬虫]ins获取关注者信息 js爬取信息+go下载

如何使用

1.浏览器登录你的ins

2.打开getUsers.js 修改第一行的 username = "你要爬的用户名"

3.打开浏览器ins界面(需要登录好), F12 打开控制台复制脚本全部内容,回车

4.会提示你正在爬取, 请耐心等待20s左右(1000个用户),

5.然后浏览器会自动下载一个users.json文件, 放到本文件夹

6.设置代理,在conf.txt里放你的代理链接

7.运行exe文件 Go! (本文不提供, 只提供源码)

js代码

const username = "fafa_motion";

let followings = [];

(async () => {
  try {
    console.log(`Process started! Give it a couple of seconds`);

    const userQueryRes = await fetch(`https://www.instagram.com/web/search/topsearch/?query=${username}`);

    const userQueryJson = await userQueryRes.json();

    const userId = userQueryJson.users.map((u) => u.user).filter((u) => u.username === username)[0].pk;

    let after = null;
    let has_next = true;

    while (has_next) {
      await fetch(
        `https://www.instagram.com/graphql/query/?query_hash=d04b0a864b4b54837c0d870b0e77e076&variables=` +
          encodeURIComponent(
            JSON.stringify({
              id: userId,
              include_reel: true,
              fetch_mutual: true,
              first: 50,
              after: after,
            })
          )
      )
        .then((res) => res.json())
        .then((res) => {
          has_next = res.data.user.edge_follow.page_info.has_next_page;
          after = res.data.user.edge_follow.page_info.end_cursor;
          followings = followings.concat(
            res.data.user.edge_follow.edges.map(({ node }) => {
              return {
                username: node.username,
                full_name: node.full_name,
                profile_pic_url: node.profile_pic_url,
              };
            })
          );
        });
    }

    console.log({ followings });

    // Create a blob from the followings data
    const blob = new Blob([JSON.stringify(followings, null, 2)], { type: "application/json" });
    const url = URL.createObjectURL(blob);

    // Create a link element to download the file
    const a = document.createElement("a");
    a.href = url;
    a.download = `${username}_followings.json`;
    document.body.appendChild(a);
    a.click();
    document.body.removeChild(a);

    URL.revokeObjectURL(url);
  } catch (err) {
    console.log({ err });
  }
})();

下载器 golang

package main

import (
    "crypto/tls"
    "encoding/json"
    "fmt"
    "io"
    "net/http"
    "net/url"
    "os"
    "path/filepath"
    "sync"
    "time"
)

type User struct {
    Username      string `json:"username"`
    FullName      string `json:"full_name"`
    ProfilePicURL string `json:"profile_pic_url"`
}

func main() {
    proxyURL, err := readProxyURL("conf.txt")
    if err != nil {
        fmt.Printf("Error reading proxy URL: %v\n", err)
        return
    }

    file, err := os.Open("users.json")
    if err != nil {
        fmt.Printf("Error opening JSON file: %v\n", err)
        return
    }
    defer file.Close()

    var users []User
    decoder := json.NewDecoder(file)
    err = decoder.Decode(&users)
    if err != nil {
        fmt.Printf("Error decoding JSON data: %v\n", err)
        return
    }

    outputDir := "output"
    err = os.MkdirAll(outputDir, os.ModePerm)
    if err != nil {
        fmt.Printf("Error creating output directory: %v\n", err)
        return
    }

    proxy, err := url.Parse(proxyURL)
    if err != nil {
        fmt.Printf("Error parsing proxy URL: %v\n", err)
        return
    }

    httpClient := &http.Client{
        Transport: &http.Transport{
            Proxy:                 http.ProxyURL(proxy),
            TLSClientConfig:       &tls.Config{InsecureSkipVerify: true}, // Disable SSL verification for testing
            TLSHandshakeTimeout:   30 * time.Second,
            ResponseHeaderTimeout: 30 * time.Second,
            ExpectContinueTimeout: 1 * time.Second,
        },
    }

    var wg sync.WaitGroup

    for _, user := range users {
        wg.Add(1)
        go func(user User) {
            defer wg.Done()
            err := downloadProfilePic(httpClient, user.ProfilePicURL, filepath.Join(outputDir, user.Username+".jpg"))
            if err != nil {
                fmt.Printf("Error downloading profile picture for %s: %v\n", user.Username, err)
            } else {
                fmt.Printf("Downloaded profile picture for %s\n", user.Username)
            }
        }(user)
    }

    wg.Wait()
}

func readProxyURL(filename string) (string, error) {
    file, err := os.Open(filename)
    if err != nil {
        return "", fmt.Errorf("error opening file: %w", err)
    }
    defer file.Close()

    var proxyURL string
    _, err = fmt.Fscanf(file, "%s", &proxyURL)
    if err != nil {
        return "", fmt.Errorf("error reading file: %w", err)
    }

    return proxyURL, nil
}

func downloadProfilePic(client *http.Client, url, filepath string) error {
    out, err := os.Create(filepath)
    if err != nil {
        return fmt.Errorf("error creating file: %w", err)
    }
    defer out.Close()

    resp, err := client.Get(url)
    if err != nil {
        return fmt.Errorf("error downloading file: %w", err)
    }
    defer resp.Body.Close()

    if resp.StatusCode != http.StatusOK {
        return fmt.Errorf("bad status: %s", resp.Status)
    }

    _, err = io.Copy(out, resp.Body)
    if err != nil {
        return fmt.Errorf("error saving file: %w", err)
    }

    return nil
}

参考链接

https://stackoverflow.com/questions/32407851/instagram-api-how-can-i-retrieve-the-list-of-people-a-user-is-following-on-ins

给TA充电
共{{data.count}}人
人已充电
go编程

[golang] 如何在golang使用tensorflow模型 | Windows

2024-7-5 8:35:32

编程

[npm]自动升级包

2024-7-8 5:30:03

0 条回复 A文章作者 M管理员
    暂无讨论,说说你的看法吧
个人中心
今日签到
搜索