只看原创   查看文章

从数据库到JavaBean,配置使用

请看官大老爷看代码

package com.neal.main.domain;

import java.io.*;
import java.sql.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

/**
 * 从数据库到JavaBean
 *
 * @author Neal
 */
public class TableConfiguration {
    /**
     * 配置文件位置
     */
    private String propertyName;
    /**
     * 实体类生成所在包的路径
     */
    private String packageOutPath;
    /**
     * 作者名
     */
    private String authorName;
    /**
     * 表名
     */
    private String tableName;
    /**
     * 数据库名
     */
    private String databaseName;
    /**
     * 拿到对应数据库中所有实体类(实体类需要与其他表名做区分)
     */
    private List<String> tableNames;
    /**
     * 列名(字段)集合
     */
    private List<String> columnNames;
    /**
     * 列名类型集合
     */
    private List<String> columnTypeNames;
    /**
     * 是否需要导入java.util.*
     */
    private boolean fUtil = false;
    /**
     * 是否需要导入java.sql.*
     */
    private boolean fSql = false;

    /**
     * 构造,初始化
     */
    private TableConfiguration(String propertyName) {
        this.propertyName = propertyName;
        // 使用Properties类读取reverse.properties配置文件
        Properties properties = new Properties();
        try (InputStream inputStream = getClass().getResourceAsStream(propertyName)) {
            properties.load(inputStream);
        } catch (IOException e) {
            System.out.println("没找到这个配置文件 " + e);
        }
        this.databaseName = properties.getProperty("database");
        this.tableName = properties.getProperty("table");
        this.packageOutPath = properties.getProperty("package");
        this.authorName = properties.getProperty("author");
    }

    /**
     * 创建多个实体类
     */
    private void genEntity(List<String> tableNames, Connection connection) {
        //递归生成文件
        for (String tableName : tableNames) {
            this.genEntity(tableName, connection);
        }
    }

    /**
     * 创建单个实体类
     */
    private void genEntity(String tableName, Connection connection) {
        fUtil = false;
        fSql = false;
        String sql = "SELECT * FROM " + tableName;
        try (PreparedStatement preparedStatement = connection.prepareStatement(sql)) {
            ResultSetMetaData setMetaData = preparedStatement.getMetaData();
            // 统计字段(列)
            int size = setMetaData.getColumnCount();
            columnNames = new ArrayList<>();
            columnTypeNames = new ArrayList<>();

            for (int i = 0; i < size; i++) {
                columnNames.add(setMetaData.getColumnName(i + 1));
                columnTypeNames.add(setMetaData.getColumnTypeName(i + 1));
                if ("DATETIME".equalsIgnoreCase(columnTypeNames.get(i))) {
                    fUtil = true;
                }
                if ("IMAGE".equalsIgnoreCase(columnTypeNames.get(i))
                        || "TEXT".equalsIgnoreCase(columnTypeNames.get(i))
                        || "TIMESTAMP".equalsIgnoreCase(columnTypeNames.get(i))) {
                    fSql = true;
                }
            }
            System.out.println(columnNames);
            System.out.println(columnTypeNames);
        } catch (SQLException e) {
            System.out.println("未拿到字段集" + e);
        }
        // 将代码写入内存中去
        String content = parse(tableName);

        // 写入文件
        try {
            File directory = new File("");
            String outputPath = directory.getAbsolutePath() + "/src/"
                    + this.packageOutPath.replace(".", "/")
                    + "/";
            System.out.println("路径为:" + outputPath);
            //路径检查,不存在则创建
            File path = new File(outputPath);
            if (!path.exists()) {
                if (path.mkdir()) {
                    System.out.println("路径已被创建");
                }
            }
            System.out.println(path.exists());
            outputPath += initSml(initCap(tableName)) + ".java";
            File file = new File(outputPath);
            if (!file.exists()) {
                if (file.createNewFile()) {
                    System.out.println("文件已被创建");
                }
            }
            //写入到磁盘
            FileWriter fw = new FileWriter(file);
            PrintWriter pw = new PrintWriter(fw);
            // 将内存中的数据写入磁盘
            pw.println(content);
            pw.flush();
            pw.close();
            fw.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void getAllEntityTable(Connection connection) {
        ResultSet rs;
        try {
            DatabaseMetaData dmd = connection.getMetaData();
            /*
             * TABLE_CAT String ==> 表类别(可为null)
             * TABLE_MODE String ==> 表模式(可为null)
             * TABLE_NAME String ==> 表名称
             * TABLE_TYPE String ==> 表类型
             * */
            rs = dmd.getTables(null, null, "%", null);
            while (rs.next()) {
                tableNames.add(rs.getString("TABLE_NAME"));
            }
            rs.close();
        } catch (SQLException e) {
            e.printStackTrace();
        }
    }

    /**
     * 写入注释
     *
     * @param tableName 表名
     * @return 注释
     */
    private String parse(String tableName) {
        StringBuffer sb = new StringBuffer();
        sb.append("package ").append(this.packageOutPath).append(";\r\n");
        sb.append("\r\n");
        if (fUtil) {
            sb.append("import java.util.*;\r\n");
        }
        if (fSql) {
            sb.append("import java.sql.*;\r\n");
        }
        sb.append("\r\n");
        //注释部分
        sb.append("/**\r\n");
        sb.append(" * ").append(tableName).append("实体类\r\n");
        sb.append(" *\r");
        sb.append(" * @author ").append(this.authorName).append(" ")
                .append(new Timestamp(System.currentTimeMillis())).append("\r\n");
        sb.append(" */");

        //实体部分
        sb.append("\npublic class ").append(initSml(initCap(tableName))).append(" {\r\n");
        // 实体类属性
        processAllAttrs(sb);
        // get set方法
        processAllMethod(sb);
        sb.append("}\n");
        return sb.toString();
    }

    /**
     * 写入实体类属性
     *
     * @param sb StringBuffer
     */
    private void processAllAttrs(StringBuffer sb) {
        for (int i = 0; i < columnNames.size(); i++) {
            sb.append("\tprivate ").append(sqlTypeToJavaType(columnTypeNames.get(i)))
                    .append(" ").append(initSml(columnNames.get(i))).append(";\r\n");
        }
    }

    /**
     * 写入GET,SET方法
     *
     * @param sb StringBuffer
     */
    private void processAllMethod(StringBuffer sb) {
        for (int i = 0; i < columnNames.size(); i++) {
            sb.append("\r\tpublic void set").append(initSml(initCap(columnNames.get(i) + "(")))
                    .append(sqlTypeToJavaType(columnTypeNames.get(i)))
                    .append(" ").append(initSml(columnNames.get(i))).append(") {\r\n");
            sb.append("\t\tthis.").append(initSml(columnNames.get(i)))
                    .append(" = ").append(initSml(columnNames.get(i)))
                    .append(";\r\n");
            sb.append("\t}\n");
            sb.append("\n");
            sb.append("\tpublic ").append(sqlTypeToJavaType(columnTypeNames.get(i)))
                    .append(" get").append(initSml(initCap(initCap(columnNames.get(i)))))
                    .append("() {\r\n");
            sb.append("\t\treturn ").append(initSml(columnNames.get(i))).append(";\r\n");
            sb.append("\t}\r\n");
        }
    }

    /**
     * 将输入字符串的首字母改成大写
     *
     * @param str 字符串
     * @return 开头大写的字符串
     */
    private String initCap(String str) {
        char[] ch = str.toCharArray();
        final char startChar = 'a';
        final char endChar = 'z';
        if (ch[0] >= startChar && ch[0] <= endChar) {
            ch[0] -= 32;
        }
        return new String(ch);
    }

    /**
     * 将字符串‘_’删除后,后一位转成大写
     *
     * @param str 字符串
     * @return String
     */
    private String initSml(String str) {
        char[] ch = str.toCharArray();
        List<Character> list = new ArrayList<>();
        for (int i = 0, j = 0; i < ch.length; i++, j++) {
            if (ch[i] == '_') {
                if (i < ch.length - 1) {
                    if (ch[i + 1] >= 'A' && ch[i + 1] <= 'Z') {
                        list.add(ch[i + 1]);
                    } else {
                        list.add((char) (ch[i + 1] - 32));
                    }
                    i++;
                } else {
                    break;
                }
            } else {
                list.add(ch[i]);
            }
        }
        char[] c = new char[list.size()];
        for (int i = 0; i < list.size(); i++) {
            c[i] = list.get(i);
        }
        return new String(c);
    }

    /**
     * 数据库类型映射Java类型
     *
     * @param sqlType 字符串转类型
     * @return String
     */
    private String sqlTypeToJavaType(String sqlType) {
        final String bit = "bit";
        final String tinyint = "tinyint";
        final String smallint = "smallint";
        final String newInt = "int";
        final String bigint = "bigint";
        final String newFloat = "float";
        final String numeric = "numeric";
        final String decimal = "decimal";
        final String real = "real";
        final String money = "money";
        final String varchar = "varchar";
        final String newChar = "char";
        final String nvarchar = "nvarchar";
        final String nchar = "nchar";
        final String text = "text";
        final String datetime = "datetime";
        final String image = "image";
        final String timestamp = "Timestamp";
        if (bit.equalsIgnoreCase(sqlType)) {
            return "boolean";
        } else if (tinyint.equalsIgnoreCase(sqlType)) {
            return "byte";
        } else if (smallint.equalsIgnoreCase(sqlType)) {
            return "short";
        } else if (newInt.equalsIgnoreCase(sqlType)
                || bigint.equalsIgnoreCase(sqlType)) {
            return "long";
        } else if (newFloat.equalsIgnoreCase(sqlType)) {
            return "float";
        } else if (decimal.equalsIgnoreCase(sqlType)
                || numeric.equalsIgnoreCase(sqlType)
                || real.equalsIgnoreCase(sqlType)
                || money.equalsIgnoreCase(sqlType)) {
            return "double";
        } else if (varchar.equalsIgnoreCase(sqlType)
                || newChar.equalsIgnoreCase(sqlType)
                || nvarchar.equalsIgnoreCase(sqlType)
                || nchar.equalsIgnoreCase(sqlType)
                || text.equalsIgnoreCase(sqlType)) {
            return "String";
        } else if (datetime.equalsIgnoreCase(sqlType)) {
            return "Date";
        } else if (image.equalsIgnoreCase(sqlType)) {
            return "Blob";
        } else if (timestamp.equalsIgnoreCase(sqlType)) {
            return "Timestamp";
        }
        return null;
    }

    /**
     * 调用此方法启动
     */
    private void start() {
        Properties properties = new Properties();
        InputStream inputStream = getClass().getResourceAsStream(propertyName);
        try {
            properties.load(inputStream);
        } catch (IOException e) {
            e.printStackTrace();
        }
        String driver = properties.getProperty("driver");
        String user = properties.getProperty("user");
        String url = properties.getProperty("url");
        String pass = properties.getProperty("password");
        Connection conn = null;
        try {
            Class.forName(driver);
            conn = DriverManager.getConnection(url, user, pass);
        } catch (ClassNotFoundException | SQLException e) {
            e.printStackTrace();
        }
        if (databaseName != null && !"".equals(databaseName)
                && tableName != null && !"".equals(tableName)) {
            System.out.println("databaseName 和 tableName 不能同时存在");
        } else {
            // 如果配置文件中有数据库名字,则可以拿到其他其中所有的实体类
            if (databaseName != null && !"".equals(databaseName)) {
                // 获取所有实体表名字
                tableNames = new ArrayList<>();
                if (conn != null) {
                    getAllEntityTable(conn);
                }
                System.out.println(tableNames);
                // 为每个实体表生成实体类
                genEntity(tableNames, conn);
            } else {
                genEntity(tableName, conn);
            }
        }
        try {
            if (conn != null) {
                conn.close();
            }
        } catch (SQLException e) {
            e.printStackTrace();
        }
    }

    /**
     * 出口
     */
    public static void main(String[] args) {
        new TableConfiguration("../resource/reverse.properties").start();
    }
}

当然,你可能需要说明书

#reversetable
将数据库中的表,逆向生成Java实体类(JavaBean)

填充配置文件,名字随意,具体配置如下

`驱动`
driver=com.mysql.jdbc.Driver

`URL地址`
url=jdbc:mysql://localhost:3306/xxx?useUnicode=true&characterEncoding=UTF-8

`数据库用户名`
user=root

`数据库密码`
password=xx

`具体逆向表(目前只能单一导入)`
table=t_article

`用于标识是否将该数据库下的所有表全部逆向生成实体类,
非具体数据库名,具体数据库名在URL需要填写`
database=xxx

`需要存入到的包`
package=com.neal.main.entity

`作者注释信息`
author=Neal

然后再new一个TableConfiguration的带配置文件路径的有参对象,调用start()方法启动

目前支持的字段类型有
"bit"
"tinyint"
"smallint"
"int"
"bigint"
"float"
"numeric"
"decimal"
"real"
"money"
"varchar"
"char"
"nvarchar"
"nchar"
"text"
"datetime"
"image"
"Timestamp"
可自行修改源码进行添加,修改sqlTypeToJavaType()方法,即可

配置如下,当然别忘了导入数据库连接的jar包



原创 Feb 8, 2018 8:19:03 PM 86 3

Java面试常考题

问题:​​A线程输出10次,B线程输出100次,再A线程输出10次, 再B线程线程输入100次,如此循环50次,请写出程序.
public class TraditionalThreadTest {
    private static class SugThread {
        /**
         * 信号
         */
        private boolean isFree = false;

        private synchronized void sub(int i) {
            while (isFree) {
                try {
                    // 主等待 让出锁
                    this.wait();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
            for (int j = 1; j <= 10; j++) {
                System.out.println("sub thread " + j + " is " + i);
            }
            isFree = true;
            // 子 释放锁
            this.notify();
        }

        private synchronized void main(int i) {
            while (!isFree) {
                try {
                    // 子等待 让出锁
                    this.wait();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
            for (int j = 1; j <= 100; j++) {
                System.out.println("main thread " + j + " is " + i);
            }
            isFree = false;
            // 主 释放锁
            this.notify();
        }
    }

    public static void main(String[] args) {
        SugThread sugThread = new SugThread();
        new Thread(() -> {
            for (int i = 1; i <= 50; i++) {
                sugThread.sub(i);
            }
        }).start();
        new Thread(() -> {
            for (int i = 1; i <= 50; i++) {
                sugThread.main(i);
            }
        }).start();
    }
}


原创 Jan 30, 2018 9:54:50 PM 91 1

Java定时器的使用Demo

import java.util.Timer;
import java.util.TimerTask;

/**
 * 定时器的使用
 *
 * @author Neal
 */
public class TimerTaskTest {
    public static void main(String[] args) {
        new Timer().schedule(new TimerTask() {
            @Override
            public void run() {
                System.out.println("boom!!!!");
            }
            // delay:起始时间,period:间隔时间
        }, 2000, 2000);
    }
}


原创 Jan 30, 2018 9:32:56 PM 70 1

动态代理的实现

功能接口

import java.lang.reflect.Method;

/**
 * 功能
 *
 * @author Neal
 */
public interface Advice {
    /**
     * 前切点
     */
    void beforeMethod();

    /**
     * 后切点
     *
     * @param method Method
     */
    void afterMethod(Method method);
}

代理demo

import java.lang.reflect.Proxy;
import java.util.ArrayList;
import java.util.Collection;

/**
 * 动态代理测试
 *
 * @author Neal
 */
public class ProxyTest {
    public static void main(String[] args) {
        ArrayList target = new ArrayList();
        Collection<String> proxy1 = (Collection<String>) getProxy(target, new MyAdvice());
        proxy1.add("jack");
        proxy1.add("neal");
        proxy1.add("rose");
        System.out.println(proxy1.size());
        System.out.println(proxy1.toString());
    }

    /**
     * 动态代理
     *
     * @param target 目标对象
     * @param advice 切面功能
     * @return Object
     */
    private static Object getProxy(final Object target, final Advice advice) {
        return Proxy.newProxyInstance(target.getClass().getClassLoader(), target.getClass().getInterfaces(), (proxy, method, args) -> {
            advice.beforeMethod();
            proxy = method.invoke(target, args);
            advice.afterMethod(method);
            return proxy;
        });
    }
}


原创 Jan 30, 2018 9:26:15 PM 78 1

Java模拟浏览器发送请求

话不多说,看代码

import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;

/**
 * 伪装浏览器
 *
 * @author neal
 */
public class MaskBrowser {
    public static void main(String[] args) throws IOException {
        HttpURLConnection conn = null;
        try {
            URL readUrl = new URL("需要登录的URL");
            conn = (HttpURLConnection) readUrl.openConnection();
        } catch (IOException e) {
            e.printStackTrace();
        }
        //连接不能为空
        assert conn != null;
        //设置Post方法
        conn.setRequestMethod("POST");
        //不适用缓存
        conn.setUseCaches(false);
        //发送POST请求需要设置一下两行
        conn.setDoOutput(true);
        conn.setDoInput(true);
        //读取超时时间
        conn.setReadTimeout(8000);
        //设置连接时间
        conn.setReadTimeout(8000);
        //设置不要302自动跳转
        conn.setInstanceFollowRedirects(false);
        //设置请求头
        conn.setRequestProperty("User-Agent"
                , "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36");
        conn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
        conn.setRequestProperty("Accept-Encoding", "gzip, deflate");
        conn.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.9");
        conn.setRequestProperty("Cache-Control", "max-age=0");
        conn.setRequestProperty("Connection", "keep-alive");
        conn.setRequestProperty("Cookie", "Hm_lvt_d131e80dbe4e2a6df483fc811164f418=1517996742,1518090993; JSESSIONID=021244182EEC605D1A5F2C6B031EBC51");
        conn.setRequestProperty("Host", "请自行输入");
        conn.setRequestProperty("Referer", "请自行输入");
        conn.setRequestProperty("Upgrade-Insecure-Requests", "1");
        //设置Post参数
        String pars = "username=用户&password=你的密码";
        PrintWriter out = new PrintWriter(conn.getOutputStream());
        out.print(pars);
        out.flush();

        int n = conn.getResponseCode();
        if (n == 200) {
            InputStream is = conn.getInputStream();
            BufferedReader in = new BufferedReader(new InputStreamReader(is, "UTF-8"));
            StringBuilder buffer = new StringBuilder();
            String line;
            while ((line = in.readLine()) != null) {
                buffer.append(line).append("\r\n");
            }
            String result = buffer.toString();
            System.out.println(result);
        }
    }
}

只是可以简单模拟一下登录,拿到登录后的内容,无法绕过验证码......有待提高

原创 Feb 8, 2018 9:27:07 PM 58 0

Python3+selenium+urllib爬取知乎的登录验证码

#!/usr/bin/python3
# coding:utf-8
# Filename:get_image.py
# Author:Neal
# Time:2018.03.13 13:53

"""
    自动点击验证码图片进行验证码的刷新,
    每次的验证码保存到本地

    知乎拥有反爬虫措施:
    每个Ip估计就只能访问多少次登录
    解决方案:
    爬取大量IP地址,进行切换爬取

    爬取到的IP地址,需要进行处理,是否可用于代理
"""
import random
import urllib
from urllib.request import urlopen

from selenium import webdriver
import time

from selenium.common.exceptions import NoSuchElementException


def random_name():
    """
        随机生成一个图片名
        最后名为6字母+日期
    :return:str
    """
    word = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
            'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
            's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
    names = ''
    ticks = time.time()
    for index in range(0, 6, 1):
        names += word[random.randint(0, 25)]
    return str((repr(ticks) + names))


def refresh_and_save(web_driver, save_resource_path):
    """
    刷新验证码区域,进行图片保存
    :param save_resource_path: 保存路径
    :param web_driver: webdriver
    :return: void
    """
    # 获取到验证码区域
    try:
        img_area = web_driver.find_element_by_xpath("//div[@class='Captcha-englishContainer']/img")
    except NoSuchElementException:
        img_area = web_driver.find_element_by_xpath("//div[@class='Captcha-chineseContainer']/img")
    if img_area is not None:
        image_url = img_area.get_attribute("src")
        response = urllib.request.urlopen(image_url)
        result = response.read()
        img_name = random_name()
        with open(save_resource_path + img_name + '.jpg', 'wb') as f:
            f.write(result)
            f.flush()
            f.close()
        time.sleep(1)
        img_area.click()

"""
    1.打开连接
    2.0.分析验证码地址,以及验证码刷新按钮
    2.1.有些网站需要填写账号和密码才能有验证码出现
    3.保存验证码
    
    刷新:
    点击图片区域,进行刷新,得到下一个验证码,然后保存,再进行刷新
"""
driver = webdriver.Chrome()
driver.get("https://www.zhihu.com/signup?next=%2F")
# 最大化浏览器
driver.maximize_window()
# 转到登录页面
driver.find_element_by_xpath("//div[@class='SignContainer-switch']/span").click()
# 等待页面加载
time.sleep(1)
input_user = driver.find_element_by_xpath("//input[contains(@placeholder,'手机号或邮箱')]")
input_password = driver.find_element_by_xpath("//input[contains(@placeholder,'密码')]")
input_user.send_keys("13037479765")
input_password.send_keys('99900999')
# 点击登录
driver.find_element_by_xpath("//button[@class='Button SignFlow-submitButton Button--primary Button--blue']").click()
time.sleep(2)
save_path = "C:\\my\\resource\\img\\"

for i in range(0, 100, 1):
    refresh_and_save(driver, save_path)
    time.sleep(1)

# 退出浏览器
driver.quit()

原创 Mar 13, 2018 6:31:11 PM 51 0

Python3自带爬虫库的比较

#!/usr/bin/python3
# coding:utf-8
# Filename:spider_study.py
# Author:Neal
# Time:2018.03.13 9:48
import http.cookiejar
import urllib.request
import urllib.parse

# 传入参数时,data需是bytes类型
"""
    简单打开一个url
"""

data = bytes(urllib.parse.urlencode({'name': 'jack'}), encoding='UTF-8')
response = urllib.request.urlopen("http://www.baidu.com", data=data, timeout=1)

# 第二种,使用更强的Request
"""
    可填充请求头
"""
fast = urllib.request.Request("http://www.baidu.com", headers={
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/65.0.3325.146 Safari/537.36'}, method="POST")

# 这次需要传一个Request对象做为urlopen的参数
response2 = urllib.request.urlopen(fast)

# 第三种,使用高级特性,handler
"""
    可设置代理,cookie,响应的错误
"""
# 添加用户名和密码的处理器,用于处理认证
# auth_handler = urllib.request.HTTPBasicAuthHandler()
# auth_handler.add_password(realm='PDQ Application',
#                           uri="https://mahler:8092/site-updates.py",
#                           user="klem",
#                           passwd="kadidd!ehopper")

# opener = urllib.request.build_opener(auth_handler)
# urllib.request.install_opener(opener)
# urllib.request.urlopen('http://www.example.com/login.html')

# 设置代理处理器
# proxy = urllib.request.ProxyHandler({
#     'http': 'http://120.24.2471.104:80',
# })
# opener2 = urllib.request.build_opener(proxy)
# response3 = opener2.open('http://www.baidu.com')

# 获取到cookie,也直接控制台输出,或者直接保存为txt文本
# cookie = http.cookiejar.CookieJar()

filename = 'cookie.txt'
cookie = http.cookiejar.MozillaCookieJar(filename)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener3 = urllib.request.build_opener(handler)
response3 = opener3.open('http://www.baidu.com')
# 保存到磁盘
cookie.save(ignore_discard=True, ignore_expires=True)

if response.status == 200 and response2.status == 200 and response3.status == 200:
    print("====================Response Content=====================")
    print(response.read().decode("utf-8"))
    print("====================Response Content=====================")
    print(response2.read().decode("utf-8"))
    print("====================Response Content=====================")
    print(response3.read().decode("utf-8"))
    print("===================Headers======================")
    for i in response.getheaders():
        print(i)
    print("==================Cookie=======================")
    for item in cookie:
        print(item.name + "=" + item.value)


原创 Mar 13, 2018 6:33:06 PM 42 0

Python3读取使用本地Cookie

#!/usr/bin/python3
# coding:utf-8
# Filename:spider_study2.py
# Author:Neal
# Time:2018.03.13 10:33

import urllib.request
import http.cookiejar

"""
    读取利用cookie
"""
cookie = http.cookiejar.MozillaCookieJar()
# 加载本地磁盘的cookie文件
cookie.load('cookie.txt', ignore_discard=True, ignore_expires=True)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open('http://www.baidu.com')
if response.status == 200:
    print(response.read().decode("utf-8"))


原创 Mar 13, 2018 6:33:58 PM 54 0

Python3简单使用openCV

#!/usr/bin/python3
# coding:utf-8
# Filename:selenium_cv.py
# Author:Neal
# Time:2018.03.12 18:26

import cv2

"""
    简单使用python-openCV
"""
# 进入色彩读取模式
img = cv2.imread('C:\\Programs\\resource\\img\\color.jpg', cv2.IMREAD_GRAYSCALE)

# 打印图片的一些信息
print(img.shape)
print(img.size)
print(img.dtype)

# 在图片上输入信息
# cv2.putText('')

# 缩放图片并保存
# cv2.resize()

# 图像平移
"""
    图像平移需要构建一个偏移矩阵M
    rows,cols,channel = img.shape
    M=np.float32([1,0,100],[0,1,50])
    dst = cv2.warpAffine(img,M,(cols,rows))
    cv2.imshow('img',dst)
"""
# 图像旋转
"""
    rows,cols = img.shape
    第一个参数为旋转中心,第二个参数为旋转角度,第三个为旋转后的缩放因子
    M = cv2.getRotationMatrix2D((cols/2,rows/2),45,0.06)
    第三个参数为图像的尺寸中心
    dst = cv2.warpAffine(img,M,(2*cols,2*rows))
"""

# 仿射变换
"""
    rows,cols,ch = img.shape
    pts1 = np.float32([[50,50],[200,50],[50,200]])
    pts2 = np.float32([[10,1000],[200,50],[100,250]])
    M = cv2.getAffineTransform(pts1,pts2)
    dst = cv2.warpAffine(img,M,(cols,rows))
    
"""

# 透视变换

# 图像regions of interest

# 通道的拆分/合并处理

# 创建一个窗口,可以指定窗口大小,也可以自动根据图片大小创建
cv2.namedWindow("image", cv2.WINDOW_AUTOSIZE)
# 显示图像,可以创建多个窗口
cv2.imshow("image", img)
# 键盘绑定函数,函数等待特定的几毫秒,看是否由键盘输入
k = cv2.waitKey(0)

# 等待esc键退出
if k == 27:
    cv2.destroyAllWindows()
elif k == ord('s'):
    cv2.imwrite('gray.jpg', img)
    cv2.destroyAllWindows()


原创 Mar 13, 2018 6:35:03 PM 54 0

Python3+selenium简单实现控制台汉译英翻译器

#!/usr/bin/python3
# coding:utf-8
# Filename:selenium_phantomjs.py
# Author:Neal
# Time:2018.03.12 16:55
import time
import sys
from selenium import webdriver

"""
    自动调用百度翻译,进行简单的汉译英操作
"""

# 自定义加载
driver = webdriver.Chrome()

# 设定get url最大等待时间,规定时间内没有响应就报错
driver.implicitly_wait(40)
driver.set_page_load_timeout(40)
# 设置脚本超时时间
driver.set_script_timeout(10)

driver.get("http://fanyi.baidu.com/?aldtype=16047#auto/zh")
# 停止加载
driver.execute_script('window.stop()')
words = str(input('请输入你想要翻译的单词或句子,按回车结束!\n'))
while True:
    # 获取输入文本域
    text_area = driver.find_element_by_xpath("//textarea[@id='baidu_translate_input' and @class='textarea']")
    # 将单词输入到文本域
    text_area.send_keys(words)
    time.sleep(1)
    # 获取输出信息域
    get_translate = driver.find_element_by_xpath("//div[@class='output-bd']/p[2]")
    # 获取翻译后的结果
    print('==================翻译结果=================')
    print(get_translate.text)
    words = str(input('\n退出请按q,清空请按任意键!\n'))
    if words is 'q':
        # 退出
        sys.exit()

    # 清除输入文本域
    clear = driver.find_element_by_xpath("//a[@class='textarea-clear-btn']")
    clear.click()


原创 Mar 13, 2018 6:35:44 PM 65 0

Python3+pillow的简单图像处理

#!/usr/bin/python3
# coding:utf-8
# Filename:image_handle.py
# Author:Neal
# Time:2018.03.13 17:34
"""
    验证码处理
"""
from PIL import Image

image = Image.open("C:\\my\\resource\\img\\1520932974.0316732ldhrmk.jpg")
# 转化为灰度图
image_gray = image.convert('L')

# 打印详细信息
print(image.format, image.size, image.mode)
# 进行切割 crop((left,upper,right,lower))
box = (25, 10, 55, 50)
region = image.crop(box)
region.show()
box = (50, 10, 55, 50)
region = image.crop(box)
region.show()


def get_bin_table(threshold=140):
    """
    获取灰度转二值的映射table
    :return: table
    """
    table = []
    for i in range(256):
        if i < threshold:
            table.append(0)
        else:
            table.append(1)
    return table

table = get_bin_table()
out = image_gray.point(table, '1')
out.show()


原创 Mar 13, 2018 6:38:00 PM 64 0

爬取今日头条100条新闻,并统计出现“霍金”的次数

#!/usr/bin/python3
# coding:utf-8
# Filename:main_spider.py
# Author:Neal
# Time:2018.03.14 14:06

import urllib
from urllib.request import urlopen
import os
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException

"""
    爬取今日头条热点新闻(有些html是转载在今日头条上,
    网站的规则都不一样,所以无法自动提取需要的信息)
    由于该网站使用AJAX,所有每次的加载都是加载新的li标签,
    所以需要从打开完的链接位置开始下一轮的链接打开
"""


def open_url_get_data(links):
    """
    打开链接列表的所有的链接,并返回一个html,用于解析器解析
    该方法需要耗时等待
    :param links:链接列表
    :return:html页面
    """
    for link in links:
        # 封装一个请求头对象
        fast = urllib.request.Request(url=link.get_attribute('href'), headers={
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/65.0.3325.162 Safari/537.36'},
                                      method='GET')
        # 返回一个html页面
        return urllib.request.urlopen(fast).read().decode()


def get_once_page_links(driver, list_len):
    """
    分析获取该页面的所有需要的链接
    注意:因为每次都是记录从头记录的li列表,所以需要进行截取
    :param list_len: 当前列表的长度
    :param driver:
    :return:list
    """
    # 获取该页的所有文章链接
    links = []
    if list_len == 1:
        links = driver.find_elements_by_xpath(
            "/html/body/div/div[4]/div[2]/div[2]/div/div/div/ul/li/div/div[1]/div/div[1]/a")
    else:
        # 根据上一次的长度记录开始
        for index in range(1, 8):
            try:
                link = driver.find_element_by_xpath(
                    "/html/body/div/div[4]/div[2]/div[2]/div/div/div/ul/li[" + str(
                        list_len) + "]/div/div[1]/div/div[1]/a")
            except NoSuchElementException:
                pass
            else:
                links.append(link)
            finally:
                list_len += 1
    if links is not None:
        save_links(links, 'C:\\my\\resource\\profile\\', 'links')
    return links


def save_links(links, file_path, file_name):
    """
    将爬取的链接列表存入到文本(进行追加不覆盖)
    :param links: 链接列表
    :param file_path: 保存路径
    :param file_name: 文本名
    :return:None
    """
    for link in links:
        print(link.text)
        if not os.path.exists(file_path):
            os.makedirs(file_path)
        with open(file_path + file_name + '.txt', 'a+') as f:
            f.write('标题:\t' + link.text + '\t链接地址\t' + link.get_attribute('href') + '\r\n')
            f.flush()
            f.close()


def parse_data():
    """
    解析器,解析html,提取所需要的信息数据
    :return:json
    """
    pass


def do_script_to_next(driver):
    """
    自动滑动到该页面最底部,并且返回一个上次列表的长度(记录历史地址)
    :param driver:
    :return:int
    """
    driver.execute_script('window.scrollTo(0, document.body.scrollHeight)')
    lis = driver.find_elements_by_xpath("/html/body/div/div[4]/div[2]/div[2]/div/div/div/ul/li/div/div[1]/div/div[1]/a")
    # 每次的长度,还应该减去上一次的列表长度,否则将列表越界
    return len(lis)


if __name__ == '__main__':
    browser = webdriver.Chrome()
    browser.get('https://www.toutiao.com/ch/news_hot/')
    # 初始长度为空
    list_last_len = 1
    for i in range(100):
        # 获取该页的所有链接
        link_list = get_once_page_links(browser, list_last_len)
        # 开始滑动到页面底部,加载下一页的连接,并得到上一页的列表长度,防止重复打开链接,需要等待10s
        list_last_len = do_script_to_next(browser)
        time.sleep(5)
        # 打开链接列表的所有链接
        # html_page = open_url_get_data(link_list)
        # print(html_page)

统计

#!/usr/bin/python3
# coding:utf-8
# Filename:count_keywords.py
# Author:Neal
# Time:2018.03.14 17:08

"""
    统计“霍金”在随机50条头条新闻标题中出现的次数
"""

count = 0
index = 0
with open('C:\\my\\resource\\profile\\links.txt') as f:
    while index < 50:
        line = f.readline()
        if not line:
            break
        else:
            if line.count('霍金') > 0:
                count += 1
        index += 1
print(count)


原创 Mar 14, 2018 5:16:19 PM 65 0

我的头像

黑天白夜

你懂的越多,懂你的就越少!

  • 来访数:4,422
  • 总文章:28
  • 原创数:27
  • 点赞数:8