2008-05-22

(转)用httpClient获取hotmail联系人列表

关键字: httpclient, hotmail
http://blog.jiexoo.com/2008/05/21/%e7%94%a8httpclient%e8%8e%b7%e5%8f%96hotmail%e8%81%94%e7%b3%bb%e4%ba%ba%e5%88%97%e8%a1%a8/
第一步,用HttpClient访问http://login.live.com/login.srf?id=2,这个页面会返回一个登录表单

第二步,解析出form中所有的隐含变量和form的action,这些变量是你必须要通过httpClient Post回去,hotmail服务器会验证这些参数,另外,你还必须传递一个PwdPad变量,它的值是IfYouAreReadingThisYouHaveTooMuchFreeTime 从后面截取登录密码长度的值,比如你的密码是123,则PwdPad的值是IfYouAreReadingThisYouHaveTooMuchFreeT

第三、为了知道你接下来导向的地址,你必须解析服务器给你返回的脚本,其中relpace(”***”)中的***即为重定向的地址

第四、得到上一步重定向后的真实的主机地址,联系人列表页面的具体地址就是http://+ hostAddress + /mail/GetContacts.aspx

第五、用正则表达式解析此页面即可

具体代码如下:
  import org.apache.commons.httpclient.HttpClient;
    import org.apache.commons.httpclient.Cookie;
    import org.apache.commons.httpclient.NameValuePair;
    import org.apache.commons.httpclient.methods.GetMethod;
    import org.apache.commons.httpclient.methods.PostMethod;
    import org.apache.commons.httpclient.cookie.CookiePolicy;
    import org.apache.commons.lang.StringUtils;

    import java.util.regex.Pattern;
    import java.util.regex.Matcher;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.io.InputStream;
    import java.io.IOException;
    import java.io.BufferedReader;
    import java.io.InputStreamReader;

    /**
     * User: cjp
     * Date: 2008-4-30
     * Time: 9:26:58
     */
    public class HotmailImporter {
        public static String[] parseContact(String loginname, String password) throws Exception {
            HttpClient client = new HttpClient();
            client.getParams().setCookiePolicy(
                    CookiePolicy.BROWSER_COMPATIBILITY);

            //获取登录页面html
            String hotmailUrl = “http://login.live.com/login.srf?id=2“;
            GetMethod hotmailGet = new GetMethod(hotmailUrl);
            hotmailGet.setRequestHeader(”Accept”, “image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*”);
            hotmailGet.setRequestHeader(”Accept-Language”, “zh-cn”);
            hotmailGet.setRequestHeader(”User-Agent”, “Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 2.0.50727)”);
            hotmailGet.setRequestHeader(”Host”, “www.hotmail.com“);
            hotmailGet.setRequestHeader(”Connection”, “Keep-Alive”);
            client.executeMethod(hotmailGet);

            String responseStr = hotmailGet.getResponseBodyAsString();

            hotmailGet.releaseConnection();

            //传递所有的cookie
            Cookie[] cookies = client.getState().getCookies();
            String cookieStr = “”;
            for (Cookie cookie : cookies) {
                cookieStr += cookie.getName() + “=” + cookie.getValue() + “;”;
            }

            //分析登录页面的HTML,获取action,ppsx,ppft
            String actionUrl = getFormUrl(responseStr);

            NameValuePair loginPair = new NameValuePair(”login”, loginname);
            NameValuePair loginOptionsPair = new NameValuePair(”LoginOptions”, “2″);
            NameValuePair passwdPair = new NameValuePair(”passwd”, password);
            NameValuePair ppsxPair = new NameValuePair(”PPSX”, getInputValue(”ppsx”, responseStr));
            NameValuePair ppftPair = new NameValuePair(”PPFT”, getInputValue(”ppft”, responseStr));

            //算出pwdpad
            String pwdpad = “IfYouAreReadingThisYouHaveTooMuchFreeTime”;
            pwdpad = StringUtils.substring(pwdpad, 0, pwdpad.length() - password.length());
            NameValuePair pwdpadPair = new NameValuePair(”PwdPad”, pwdpad);
            PostMethod loginPost = new PostMethod(actionUrl);
            loginPost.setRequestBody(new NameValuePair[]{loginPair, passwdPair, ppsxPair, ppftPair, loginOptionsPair, pwdpadPair});

            loginPost.setRequestHeader(”Accept”, “image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*”);
            loginPost.setRequestHeader(”Referer”, hotmailGet.getURI().toString());
            loginPost.setRequestHeader(”Accept-Language”, “zh-cn”);
            loginPost.setRequestHeader(”User-Agent”, “Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 2.0.50727)”);
            loginPost.setRequestHeader(”Host”, “login.live.com”);
            loginPost.setRequestHeader(”Connection”, “Keep-Alive”);
            loginPost.setRequestHeader(”Cache-Control”, “no-cache”);

            loginPost.setRequestHeader(”Cookie”, cookieStr);

            client.executeMethod(loginPost);

            String str = loginPost.getResponseBodyAsString();
            loginPost.releaseConnection();

            String toUrl = StringUtils.substringBetween(str, “replace(\”", “\”);}function OnBack()”);

            //获取登录后的跳转页面
            GetMethod getMethod = new GetMethod(toUrl);
            client.executeMethod(getMethod);

            //获取联系人列表
            GetMethod contactMethod = new GetMethod(”http://” + getMethod.getURI().getHost() + “/mail/GetContacts.aspx”);
            getMethod.releaseConnection();

            client.executeMethod(contactMethod);
            List<String> contacts = parseContacts(contactMethod.getResponseBodyAsStream());
            contactMethod.releaseConnection();
            //noinspection ToArrayCallWithZeroLengthArrayArgument
            return contacts.toArray(new String[]{});
        }
        private static List<String> parseContacts(InputStream contactsContent) throws IOException {
            List<String> contacts = new ArrayList<String>();
            BufferedReader in = new BufferedReader(new InputStreamReader(contactsContent));
            String line;
            while ((line = in.readLine()) != null) {
                System.out.println(line);
                String[] values = line.split(”,”);
                if (values.length < 47) continue;
                String email = parseValue(values[46]);
                if (email.length() == 0) continue;
                email = email.toLowerCase();

                if (isEmailAddress(email)) {
                    contacts.add(email);
                }
            }
            return contacts;
        }

        private static String parseValue(String value) {
            if (value.length() > 0 && value.charAt(0) == ‘”‘) {
                value = value.substring(1, value.length() - 1);
            }
            return value;
        }

        private static String getFormUrl(String content) throws Exception {
            content = content.substring(content.indexOf(”<form”) + 5);
            String actionAttribute = content.split(”\\s+”)[5];
            Pattern p = Pattern.compile(”\”(.*?)\”");
            Matcher matcher = p.matcher(actionAttribute);
            if (!matcher.find()) {
                throw new Exception(”hotmail登录界面已改变,无法正常解析”);
            }
            return matcher.group(1);
        }

        private static String getInputValue(String name, String content) throws Exception {
            Pattern p = Pattern.compile(”^.+value=\”([^\\s\"]+)\”");
            int index = content.indexOf(name.toUpperCase()) + name.length() + 2;
            content = content.substring(index, index + 200 > content.length() ? content.length() : index + 200);

            Matcher matcher = p.matcher(content);
            if (!matcher.find()) {
                throw new Exception(”hotmail登录界面已改变,无法正常解析”);
            }
            return matcher.group(1);
        }

        public static boolean isEmailAddress(String email) {
            Pattern emailPattern = Pattern.compile(
                    “^[0-9a-z]([-_.~]?[0-9a-z])*@[0-9a-z]([-.]?[0-9a-z])*\\.[a-z]{2,4}$”
            );
            return emailPattern.matcher(email).matches();
        }

        public static void main(String[] args) {
            try {
                String[] contacts = parseContact(”test@live.cn“, “test”);
                System.out.println(Arrays.toString(contacts));
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
评论
lordhong 2008-05-22
冷。。。brutal force。。。
发表评论

您还没有登录,请登录后发表评论

johnnyhg
搜索本博客
我的相册
07184f93-fa2b-330e-9922-fc26260b2319-thumb
独醉
共 1 张
存档
最新评论