Python爬虫实战3:学习通待做作业集合

[00:00.000] 作曲 : 陶峻汐
[00:01.000] 作词 : 陶峻汐
[00:14.60]我开始把她当成你了
[00:21.66]她说起未来眼眶红了
[00:27.61]谁爱谁 心里面晓得
[00:35.81]说不说 看谁更洒脱
[00:41.97]寂寞它快要把我吞噬
[00:49.02]我只能靠着记忆过活
[00:55.97]我知道 找个人很简单
[01:03.22]也知道 幸福有多难
[01:10.27]模样 已模糊了
[01:17.42]回忆 也走远了
[01:24.27]难过 渐渐好了
[01:31.32]可是你了 却还在心底
[01:36.72]回忆它总是很孤单
[01:41.53]没有人附和
[01:45.83]那首你爱的歌
[01:48.68]如今还哼着
[01:52.58]你在哪儿呢
[01:55.58]身旁有谁呢
[01:59.23]是不是幸福着
[02:02.73]我想你了
[02:05.38]寂寞它总是很纠缠
[02:09.73]不停的拉扯
[02:13.83]关于爱情的歌
[02:16.93]又有了新的
[02:20.63]你在哪儿呢
[02:23.98]身旁有谁呢
[02:27.33]是不是幸福着
[02:31.09]我想你了
[02:49.19]模样 已模糊了
[02:56.29]回忆 也走远了
[03:03.14]难过 渐渐好了
[03:10.50]可是你了 却还在心底
[03:15.91]回忆它总是很孤单
[03:20.36]没有人附和
[03:24.31]那首你爱的歌
[03:28.61]如今还哼着
[03:31.46]你在哪儿呢
[03:34.51]身旁有谁呢
[03:38.02]是不是幸福着
[03:41.92]我想你了
[03:44.67]寂寞它总是很纠缠
[03:49.02]不停的拉扯
[03:52.87]关于爱情的歌
[03:56.07]又有了新的
[03:59.57]你在哪儿呢
[04:03.17]身旁有谁呢
[04:07.52]是不是幸福着
[04:10.17]我想你了
[04:16.67]

起因:

学习通的应用设计有点考虑不周,当老师布置完作业之后,很多同学会选择隔天在做,明日复明日,明日何其多,就这样慢慢的忘记了自己的作业,于是想看哪些作业没做还需要一门一门课程的点开,极其麻烦。

Demo:

http://api.fm90.cn【4.15 加入Demo】

打开学习通扫一扫 即可查看所有待做作业、时间及连接

代码实现 :

第一阶段版本:(构思比较潦草混乱,有能力者自改)

# -*- coding: utf-8 -*-
#   @Time    : 2020/3/28 21:43
#   @Author  : 南国旧梦i
#   @FileName: homework.py
#   @Software: PyCharm
import requests
import re
import json


session = requests.session();
headers = {
    "User-Agent": "Mozilla/5.0 (iPad; CPU OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) "
                  "Mobile/15E148 ChaoXingStudy/ChaoXingStudy_3_4.3.2_ios_phone_201911291130_27 ("
                  "@Kalimdor)_11391565702936108810 "
}

#实力白写+白想
def rotate(input, d):
    Lfirst = input[0: d]
    Lsecond = input[d:]
    Rfirst = input[0: len(input) - d]
    Rsecond = input[len(input) - d:]

#登陆
def login(username,password):
    login_url = 'https://passport2-api.chaoxing.com/v11/loginregister'
    data = {
        'uname':username,
        'code':password
    }
    res_login = session.post(login_url,data,headers)
    res_login = json.loads(res_login.content.decode('utf-8'))
    #获取状态
    login_status = res_login["status"]
    # print(login_status)
    if login_status:
        print("success")
    else:
        print("false")
#获取all作业url first
def get_course():
    host_url = "https://mooc1-1.chaoxing.com"
    # login("15852412569", "shuang*****哈哈你猜猜")
    course_url = 'https://mooc1-1.chaoxing.com/visit/courses'
    res_course = session.get(course_url,headers=headers)
    res_course = res_course.content.decode('utf-8')
    # print(res_course)
    a = re.compile(r'target="_blank" title="(.*?)">')
    course_list = a.findall(res_course)
    # print(course_list)
    b = re.compile(r'href=\'(.*?)\' target="_blank"')
    course_url =b.findall(res_course.replace('/mycourse/studentcourse','https://mooc1-1.chaoxing.com/mycourse/studentcourse'))
    # print(course_url)
    return course_url

#获取对应作业url second
def get_coursework(course_url):
    course_allwork = []
    for course_url in course_url:
        res_work = session.get(course_url,headers=headers)
        res_work = res_work.content.decode("utf-8").replace('&', '&')
        a = re.compile(r'data="(.*?)">作业')
        work_list_url = a.findall(res_work.replace('/work/getAllWork','https://mooc1-1.chaoxing.com/work/getAllWork'))
        if work_list_url:
            course_allwork.append(work_list_url[0])
    return course_allwork

#获取未做课程及时间
def course_info(course_allwork):
    index_id =0
    course_allname = []
    course_allstatus = []
    work_start = []
    work_end = []
    work_name=[]
    for course_allwork in course_allwork:
        res_courseinfo = session.get(course_allwork,headers=headers).text.replace('\n','').replace('\t','').replace(' ','')
        # res_courseinfo = res_courseinfo.content.decode('utf-8')
        # a = re.findall(r'<ul class="clearfix" style="\*width:1020px;">(.*?)</ul>',res_courseinfo,re.S)
        b = re.findall(r'作业状态:</span><strong>(.*?)</strong>',res_courseinfo,re.S)
        c = re.findall(r'title="(.*?)">.*?</a>',res_courseinfo,re.S)[0]
        d =re.findall(r'<spanclass="fl">开始时间:</span>(.*?)</span>',res_courseinfo)
        e =re.findall(r'<spanclass="fl">截止时间:</span>(.*?)</span>',res_courseinfo)


        course_allname.append(c)
        course_allstatus.append(b)
        work_start.append(d)
        work_end.append(e)

    # print(course_allname)
    # print(course_allstatus)
    # print(work_start)
    # print(work_end)
    # print(work_name)
    for i in range(len(course_allname)):
        for one in course_allstatus[i]:
            index_id = 0
            if one == '待做':
                print("待做课程:" + course_allname[i])
                print("开始时间:" + work_start[i][index_id][0:10]+' '+work_start[i][index_id][10:16])
                print("截止时间:" + work_end[i][index_id][0:10]+ ' '+work_end[i][index_id][10:16])
                print("--------------------------------------------------------------")
            index_id += 1


if __name__=='__main__':
    login("手机号", "密码")
    course_info(get_coursework(get_course()))

效果:

第二阶段版本 :(基于Flask框架 得改改才能运行 也是上述Demo中的主体效果 采用二维码方式登陆)

# -*- coding: utf-8 -*-
#   @Time    : 2020/4/5 14:36
#   @Author  : 南国旧梦i
#   @FileName: test1.py
#   @Software: PyCharm
import base64
import json
import time

from PIL import Image
# import cv2
from bs4 import BeautifulSoup, SoupStrainer
from urllib.parse import unquote

# code_url = 'http://passport2.chaoxing.com/createqr?uuid={
# }&xxtrefer=&type=1&mobiletip=JIA%e6%8e%88%e6%9d%83%e8%af%b7%e6%b1%82'.format(str(uuid.uuid4().hex))


headers = {
    "User-Agent": "Mozilla/5.0 (iPad; CPU OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) "
                  "Mobile/15E148 ChaoXingStudy/ChaoXingStudy_3_4.3.2_ios_phone_201911291130_27 ("
                  "@Kalimdor)_11391565702936108810 "
}


def login(conv, user_uuid=0, server=False):
    code_html_url = 'https://passport2.chaoxing.com/cloudscanlogin?mobiletip=JIA%e6%8e%88%e6%9d%83%e8%af%b7%e6%b1%82' \
                    '&pcrefer=http://i.chaoxing.com '
    conv.keep_alive = False
    code_html = conv.get(code_html_url, headers=headers)
    code_html_obj = BeautifulSoup(code_html.content, "lxml")
    login_uuid = code_html_obj.find(id='uuid')['value']
    login_enc = code_html_obj.find(id='enc')['value']
    code_pic_url = code_html_obj.find(id='ewm')['src']
    # print(login_enc)
    # print(login_uuid)
    if not server:
        code_pic = conv.get('https://passport2.chaoxing.com' + code_pic_url, headers=headers)
        with open('tmp.png', 'wb') as f:
            f.write(code_pic.content)

        img = Image.open('tmp.png')
        img.show()

        # todo: 更好的图片展示方式
        """
        code_pic_show = cv2.imread('tmp.png')
        cv2.imshow('NumCode', code_pic_show)
        cv2.waitKey(0)
        """
        while True:
            login_rec_json = conv.post("https://passport2.chaoxing.com/getauthstatus",
                                       {'uuid': login_uuid, 'enc': login_enc}, headers=headers)
            login_rec_dict = json.loads(login_rec_json.content)
            if login_rec_dict['status']:
                # cv2.destroyAllWindows()
                break
            # print("run once")
            # print(login_rec_dict['status'])
            time.sleep(2)
        return conv
    elif server:
        code_pic = conv.get('https://passport2.chaoxing.com' + code_pic_url, headers=headers)
        pic_content = code_pic.content
        pic_base64 = base64.b64encode(pic_content)
        polling_url = r"https://passport2.chaoxing.com/getauthstatus?uuid=" + login_uuid + r"&enc=" + login_enc
        return pic_base64, conv, polling_url


def get_course_list(conv):
    course_list_url_list = []
    course_list_host_url = 'https://mooc1-1.chaoxing.com'
    course_list_url = 'https://mooc1-1.chaoxing.com/visit/courses'

    conv.keep_alive = False

    course_list_html = conv.get(course_list_url, headers=headers)
    html_obj = BeautifulSoup(course_list_html.content.decode('utf-8'), "lxml")
    course_list_all_tag = html_obj.find_all("h3", class_='clearfix')
    for course_one_tag in course_list_all_tag:
        course_one_tag_info = course_one_tag.find("a")
        # print(course_one_tag_info)
        course_one_tag_url = course_list_host_url + course_one_tag_info.attrs['href'].replace(r"&amp;", r"&")
        # print(course_one_tag_url)
        course_one_tag_name = course_one_tag_info.attrs['title']
        couse_one_tag_list = [course_one_tag_name, course_one_tag_url]
        course_list_url_list.append(couse_one_tag_list)
    return course_list_url_list


def get_course_work(conv, course_list):
    host_url = 'https://mooc1-1.chaoxing.com'
    work_list = []
    conv.keep_alive = False
    for course in course_list:
        course_name = course[0]  # 课程名称
        html_content = conv.get(course[1], headers=headers)  # 获取页面内容,response对象
        html_obj = BeautifulSoup(str(html_content.content.decode('utf-8').strip()), 'lxml')  # 解析页面,bs对象
        work_obj = html_obj.find("a", string=r"作业  ")
        work_url = work_obj['data']
        work_url = host_url + work_url

        # TODO:加上直接跳转作业
        # html_content_raw = html_content.content
        # course_id = re.search(r'(?<=classId\s=\s)\d*',html_content_raw)
        # class_id = re.search(r'(?<=courseId\s=\s)\d*',html_content_raw)
        # course_enc = re.search(r'(?<=enc=)\w*',html_content_raw)

        work_info_list = [course_name, work_url]
        work_list.append(work_info_list)

    return work_list
    # worklist = [课程名称,作业url,courseID,classID]


def get_work_info(conv, info_list):
    # cid是课程id

    work_info_obj_list = []

    conv.keep_alive = False

    def is_li_but_has_no_class(tag):
        return tag.name == 'li' and not tag.has_attr('class')

    for work_one_list in info_list:  # 开始处理一门课
        course_name = work_one_list[0]
        html = conv.get(work_one_list[1], headers=headers)  # 请求页面
        html_ul_parse_rule = SoupStrainer("ul", class_="clearfix")
        ul_obj = BeautifulSoup(str(html.content.decode('utf-8').strip()), "lxml",
                               parse_only=html_ul_parse_rule)  # 转换(已经定位到ul)

        undo_work_one_info_list = ul_obj.find_all(is_li_but_has_no_class)

        if undo_work_one_info_list:
            for one in undo_work_one_info_list:
                work_tag_and_name_list = [one, course_name]
                work_info_obj_list.append(work_tag_and_name_list)

    return work_info_obj_list


def parse_work(work_list):
    work_info_list = []

    for one_work in work_list:
        work_info = {}
        work_tag = one_work[0]
        work_course_name = one_work[1]

        work_status = str(work_tag.find('strong').string.strip())

        if work_status == '待做':
            work_name = work_tag.find('a', class_='inspectTask')['title']

            time_list = work_tag.find_all('span', class_='pt5')

            try:
                start_time = time_list[0].contents[1]
            except IndexError:
                start_time = ''
            try:
                end_time = time_list[1].contents[1]
            except IndexError:
                end_time = ''
            # print(start_time)
            # print(end_time)
            # print(work_name)
            # print(work_course_name)
            # print('\n\n')
            work_info['workname'] = work_name
            work_info['coursename'] = work_course_name
            work_info['start'] = start_time
            work_info['end'] = end_time
            work_info_list.append(work_info)

    return work_info_list
def get_course(conv):
    url = 'https://mooc1-api.chaoxing.com/mycourse/backclazzdata?view=json&mcode='
    course_info_dict_list = []
    course_list_obj = conv.get(url, headers=headers)
    course_list_dict = json.loads(course_list_obj.content)
    for one in course_list_dict['channelList']:
        course_info_dict = {}
        course_info_dict['course_id'] = one['content']['id']
        #try:
        #    course_info_dict['image_url'] = one['content']['course']['data'][0]['imageurl']
        #except KeyError:
        #    course_info_dict['image_url'] = None
        course_info_dict['course_name'] = one['content']['name']
        course_info_dict['class_id'] = one['key']
        course_info_dict_list.append(course_info_dict)
    # print(course_info_dict_list)
    return course_info_dict_list


def get_work(conv, course_info):
    work_info = []
    url = 'https://mobilelearn.chaoxing.com/task/getStuWorkAndExamSkipUrl'
    for one_course in course_info:
        # print(one)
        html_obj = conv.get(url, headers=headers,
                            params={'courseId': one_course['course_id'], 'classId': one_course['class_id']})
        html_bs = BeautifulSoup(str(html_obj.content.decode('utf-8')), 'lxml', parse_only=parser_ul)
        #print(html_bs.prettify())
        work_list = html_bs.find_all('li')
        for one_task in work_list:
            if (one_task.find('span').string == '未交'):
                #print(one_task)
                work_name = str(one_task.find('p').string)
                try:
                    left_time = str(one_task.find('span',class_='fr').string)
                except AttributeError:
                    left_time = '不限时'
                course_name = str(one_course['course_name'])
                work_url = unquote(str(one_task['data']))
                work_info.append({'work_name':work_name,'course_name':course_name,'left_time':left_time,'work_url':work_url})
            # print(str(one_task.find('p').string))
    return work_info
if __name__=='__main__':
    conv = requests.session()
    conv_login = func.login(conv=conv)  # 登录
    course_list = mobile_func.get_course(conv_login)#课程信息列表
    print(mobile_func.get_work(conv_login,course_list))
    print("登录成功")

效果展示(输出为纯文字 这里我用html展示效果)

南国旧梦i

南国旧梦i

出生于苏北小镇 习惯了一个人坐在电脑前 喜欢一个人听着音乐 梦想着一天有一趟说走就走的旅行·······目前过着大学生活,开始了程序人生。

1 Comment

  • image

    这个是不是现在不能用了啊…扫完还是登录页面,显示不出来作业列表

留下你的评论

*评论支持代码高亮<pre class="prettyprint linenums">代码</pre>