用正则表达式来抓取网页上的手机号码和Email,其实换个正则表达式还可以抓取很多其他的内容,这个是飞鸟制作的,我把代码贴出来,具体的代码如下:
<html> <head> <meta http-equiv="Content-Type" content="text/html; charset=gb2312"> <title></title> <style type="text/css"> <!-- .STYLE2 {color: #666666} --> </style> </head> <body> <% dim mode mode=request("mode") select case mode case "fenxi" call getTEL() end select '================================================== '过程名:getTEL '================================================== sub getTEL() '单位秒 Server.ScriptTimeout=36000 dim str_url,startid,endid str_url=trim(request("str_url")) startid=trim(request("startid")) endid=trim(request("endid")) str_RegExp=trim(request("str_RegExp")) dim i for i=cint(startid) to cint(endid) call fenxi(getData(replace(str_url,"{id}",i)),str_RegExp) next response.Write("<br><div align=center><input onClick=""javascript:window.open('?mode=textbox','_self');"" type=""button"" name=""Submit"" value=""重新提取""> <br/></div>") response.End() end sub public Function getData(byval RemoteFileUrl) dim Retrieval Set Retrieval = Server.CreateObject("MS"&"XM"&"L2.XM"&"LHT"&"TP") With Retrieval .Open "Get", RemoteFileUrl, False, "", "" .Send if .readyState=4 then if .status = 200 then flag=true GetRemoteData = .ResponseBody else flag=false GetRemoteData ="flase" end if else Set Retrieval = Nothing Exit function end if if Err.Number <> 0 then Err.Clear Set Retrieval = Nothing Exit Function end if End With Set Retrieval = Nothing getData=StreamToText(GetRemoteData) end function Public Function StreamToText(byval stream) dim sm If IsNull(stream) Then StreamToText = "" Else Set sm = server.CreateObject("ADO" & "DB.Stre" & "am"):sm.Open:sm.Type = 1 sm.Write(stream) sm.Position = 0 sm.Type = 2 sm.charset = "gb2312" sm.Position = 0 StreamToText = sm.ReadText() sm.Close:Set sm = Nothing End If End Function '================================================== '过程名:fenxi '================================================== sub fenxi(byval content,byval str_RegExp) dim str,objRegExp,strs,Matches,Match str=replace(replace(content,"<html>",""),"</html>","") Set objRegExp = New Regexp'设置配置对象 objRegExp.IgnoreCase = True'忽略大小写 objRegExp.Global = True'设置为全文搜索 objRegExp.Pattern = str_RegExp strs=trim(str) Set Matches =objRegExp.Execute(strs)'开始执行配置 i=0 For Each Match in Matches if InStr(RetStr,Match.Value &",<br/>"&vbcrlf)=0 then i=i+1 RetStr = RetStr & Match.Value &",<br/>"&vbcrlf end if Next response.Write RetStr response.Flush() end sub %> <table border="0" align="center" cellpadding="0" cellspacing="5"> <form name="form1" method="post" action="?"> <input type="hidden" name="mode" value="fenxi"> <tr> <td align="center"> <input name="str_url" type="text" id="str_url" value="http://www.domain.com/show.asp?id={id}" size="50"> <font color="#FF0000">*<span class="STYLE2">{id}为序号</span></font> </td> </tr> <tr> <td align="center">起 <input name="startid" type="text" id="startid" value="1" size="10" onChange="if(/\D/.test(this.value)){alert('“起始ID”只能输入整数!');this.value='';}"/> 止 <input name="endid" type="text" id="endid" value="2" size="10" onChange="if(/\D/.test(this.value)){alert('“结束ID”只能输入整数!');this.value='';}"/> <font color="#FF0000">*</font></td> </tr> <tr> <td align="center"><label> <input name="str_RegExp" type="radio" value="\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*"> EMAIL <input name="str_RegExp" type="radio" value="[1][3|5]\d{9}" checked> 手机 </label></td> </tr> <tr> <td align="center"><input type="submit" name="Submit" value="提交"> <label></label></td> </tr></form> </table> </body> </html>