%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /usr/local/lib/python3.8/urllib/__pycache__/
Upload File :
Create Path :
Current File : //usr/local/lib/python3.8/urllib/__pycache__/robotparser.cpython-38.pyc

U

p©ßaÐ$ã@s\dZddlZddlZddlZdgZe dd¡ZGdd„dƒZGdd„dƒZ	Gd	d
„d
ƒZ
dS)a% robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://www.robotstxt.org/norobots-rfc.txt
éNÚRobotFileParserÚRequestRatezrequests secondsc@sreZdZdZddd„Zdd„Zdd„Zd	d
„Zdd„Zd
d„Z	dd„Z
dd„Zdd„Zdd„Z
dd„Zdd„ZdS)rzs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    ÚcCs2g|_g|_d|_d|_d|_| |¡d|_dS)NFr)ÚentriesÚsitemapsÚ
default_entryÚdisallow_allÚ	allow_allÚset_urlÚlast_checked©ÚselfÚurl©rú./usr/local/lib/python3.8/urllib/robotparser.pyÚ__init__s
zRobotFileParser.__init__cCs|jS)z·Returns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )r©r
rrrÚmtime%szRobotFileParser.mtimecCsddl}| ¡|_dS)zYSets the time the robots.txt file was last fetched to the
        current time.

        rN)Útimer)r
rrrrÚmodified.szRobotFileParser.modifiedcCs&||_tj |¡dd…\|_|_dS)z,Sets the URL referring to a robots.txt file.ééN)rÚurllibÚparseÚurlparseÚhostÚpathrrrrr
6szRobotFileParser.set_urlc
Cs†ztj |j¡}WnRtjjk
rd}z0|jdkr:d|_n|jdkrT|jdkrTd|_W5d}~XYnX| 	¡}| 
| d¡ ¡¡dS)z4Reads the robots.txt URL and feeds it to the parser.)i‘i“TiiôNzutf-8)
rZrequestÚurlopenrÚerrorÚ	HTTPErrorÚcoderr	ÚreadrÚdecodeÚ
splitlines)r
ÚfÚerrÚrawrrrr!;s
zRobotFileParser.readcCs,d|jkr|jdkr(||_n|j |¡dS©NÚ*)Ú
useragentsrrÚappend)r
ÚentryrrrÚ
_add_entryHs

zRobotFileParser._add_entrycCsPd}tƒ}| ¡|D]}|sP|dkr4tƒ}d}n|dkrP| |¡tƒ}d}| d¡}|dkrn|d|…}| ¡}|s|q| dd¡}t|ƒdkr|d ¡ ¡|d<tj	 
|d ¡¡|d<|ddkrú|dkrä| |¡tƒ}|j |d¡d}q|ddkr.|dkr6|j
 t|dd	ƒ¡d}q|dd
krb|dkr6|j
 t|ddƒ¡d}q|ddkr |dkr6|d ¡ ¡ršt|dƒ|_d}q|dd
kr|dkr6|d d¡}t|ƒdkr|d ¡ ¡r|d ¡ ¡rtt|dƒt|dƒƒ|_d}q|ddkr|j |d¡q|dkrL| |¡dS)z”Parse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        rréú#Nú:z
user-agentZdisallowFZallowTzcrawl-delayzrequest-rateú/Zsitemap)ÚEntryrr,ÚfindÚstripÚsplitÚlenÚlowerrrÚunquoter)r*Ú	rulelinesÚRuleLineÚisdigitÚintÚdelayrÚreq_rater)r
ÚlinesÚstater+ÚlineÚiZnumbersrrrrQsj








 ÿ
zRobotFileParser.parsecCs |jr
dS|jrdS|jsdStj tj |¡¡}tj dd|j|j	|j
|jf¡}tj |¡}|sfd}|j
D]}| |¡rl| |¡Sql|jrœ|j |¡SdS)z=using the parsed robots.txt decide if useragent can fetch urlFTrr0)rr	rrrrr7Ú
urlunparserÚparamsZqueryZfragmentÚquoterÚ
applies_toÚ	allowancer)r
Ú	useragentrZ
parsed_urlr+rrrÚ	can_fetchšs*ÿ

zRobotFileParser.can_fetchcCs>| ¡sdS|jD]}| |¡r|jSq|jr:|jjSdS©N)rrrEr<r©r
rGr+rrrÚcrawl_delay·s

zRobotFileParser.crawl_delaycCs>| ¡sdS|jD]}| |¡r|jSq|jr:|jjSdSrI)rrrEr=rrJrrrÚrequest_rateÁs

zRobotFileParser.request_ratecCs|js
dS|jSrI)rrrrrÚ	site_mapsËszRobotFileParser.site_mapscCs,|j}|jdk	r||jg}d tt|ƒ¡S)Nz

)rrÚjoinÚmapÚstr)r
rrrrÚ__str__Ðs
zRobotFileParser.__str__N)r)Ú__name__Ú
__module__Ú__qualname__Ú__doc__rrrr
r!r,rrHrKrLrMrQrrrrrs
		
	I

c@s(eZdZdZdd„Zdd„Zdd„ZdS)	r9zoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.cCs<|dkr|sd}tj tj |¡¡}tj |¡|_||_dS)NrT)rrrBrrDrrF)r
rrFrrrrÚs
zRuleLine.__init__cCs|jdkp| |j¡Sr')rÚ
startswith)r
ÚfilenamerrrrEâszRuleLine.applies_tocCs|jr
dndd|jS)NZAllowZDisallowz: )rFrrrrrrQåszRuleLine.__str__N)rRrSrTrUrrErQrrrrr9×sr9c@s0eZdZdZdd„Zdd„Zdd„Zdd	„Zd
S)r1z?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_d|_d|_dSrI)r)r8r<r=rrrrrëszEntry.__init__cCs‚g}|jD]}| d|›¡q
|jdk	r<| d|j›¡|jdk	rf|j}| d|j›d|j›¡| tt|j	ƒ¡d 
|¡S)NzUser-agent: z
Crawl-delay: zRequest-rate: r0Ú
)r)r*r<r=ZrequestsZsecondsÚextendrOrPr8rN)r
ÚretÚagentZraterrrrQñs


z
Entry.__str__cCsF| d¡d ¡}|jD](}|dkr*dS| ¡}||krdSqdS)z2check if this entry applies to the specified agentr0rr(TF)r4r6r))r
rGr[rrrrEýs
zEntry.applies_tocCs$|jD]}| |¡r|jSqdS)zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)r8rErF)r
rWr@rrrrF
s

zEntry.allowanceN)rRrSrTrUrrQrErFrrrrr1és

r1)rUÚcollectionsZurllib.parserZurllib.requestÚ__all__Ú
namedtuplerrr9r1rrrrÚ<module>sB

Zerion Mini Shell 1.0