o
    JAfÐ$  ã                   @   sZ   	 d dl Z d dlZd dlZdgZe  dd¡ZG dd„ dƒZG dd„ dƒZG dd	„ d	ƒZ	dS )
é    NÚRobotFileParserÚRequestRatezrequests secondsc                   @   sp   e Zd Z	 ddd„Zdd„ Zdd„ Zdd	„ Zd
d„ Zdd„ Zdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ Zdd„ ZdS )r   Ú c                 C   s2   g | _ g | _d | _d| _d| _|  |¡ d| _d S )NFr   )ÚentriesÚsitemapsÚdefault_entryÚdisallow_allÚ	allow_allÚset_urlÚlast_checked©ÚselfÚurl© r   ú)/usr/lib/python3.10/urllib/robotparser.pyÚ__init__   s   

zRobotFileParser.__init__c                 C   s   	 | j S ©N)r   ©r   r   r   r   Úmtime%   s   zRobotFileParser.mtimec                 C   s   	 dd l }|  ¡ | _d S )Nr   )Útimer   )r   r   r   r   r   Úmodified.   s   zRobotFileParser.modifiedc                 C   s(   	 || _ tj |¡dd… \| _| _d S )Né   é   )r   ÚurllibÚparseÚurlparseÚhostÚpathr   r   r   r   r
   6   s    zRobotFileParser.set_urlc              
   C   s¶   	 z	t j | j¡}W n@ t jjyJ } z2|jdv rd| _n|jdkr1|jdk r?d| _W Y d }~d S W Y d }~d S W Y d }~d S W Y d }~d S d }~ww | 	¡ }|  
| d¡ ¡ ¡ d S )N)i‘  i“  Ti  iô  zutf-8)r   ZrequestÚurlopenr   ÚerrorÚ	HTTPErrorÚcoder   r	   Úreadr   ÚdecodeÚ
splitlines)r   ÚfÚerrÚrawr   r   r   r"   ;   s   
ÿÿ€ýzRobotFileParser.readc                 C   s2   d|j v r| jd u r|| _d S d S | j |¡ d S ©NÚ*)Ú
useragentsr   r   Úappend)r   Úentryr   r   r   Ú
_add_entryH   s
   


þzRobotFileParser._add_entryc                 C   sL  	 d}t ƒ }|  ¡  |D ]
}|s)|dkrt ƒ }d}n|dkr)|  |¡ t ƒ }d}| d¡}|dkr8|d |… }| ¡ }|s?q| dd¡}t|ƒdkr|d  ¡  ¡ |d< tj	 
|d  ¡ ¡|d< |d dkr|dkrt|  |¡ t ƒ }|j |d ¡ d}q|d dkr—|dkr–|j t|d dƒ¡ d}q|d d	kr¯|dkr®|j t|d d
ƒ¡ d}q|d dkrË|dkrÊ|d  ¡  ¡ rÈt|d ƒ|_d}q|d dkr|dkr|d  d¡}t|ƒdkr|d  ¡  ¡ r|d  ¡  ¡ rtt|d ƒt|d ƒƒ|_d}q|d dkr| j |d ¡ q|dkr$|  |¡ d S d S )Nr   r   é   ú#ú:z
user-agentZdisallowFZallowTzcrawl-delayzrequest-rateú/Zsitemap)ÚEntryr   r-   ÚfindÚstripÚsplitÚlenÚlowerr   r   Úunquoter*   r+   Ú	rulelinesÚRuleLineÚisdigitÚintÚdelayr   Úreq_rater   )r   ÚlinesÚstater,   ÚlineÚiZnumbersr   r   r   r   Q   sx   	



€€€
 ÿ€€
ÿzRobotFileParser.parsec                 C   s¢   	 | j rdS | jrdS | jsdS tj tj |¡¡}tj dd|j|j	|j
|jf¡}tj |¡}|s4d}| jD ]}| |¡rE| |¡  S q7| jrO| j |¡S dS )NFTr   r1   )r   r	   r   r   r   r   r8   Ú
urlunparser   ÚparamsÚqueryÚfragmentÚquoter   Ú
applies_toÚ	allowancer   )r   Ú	useragentr   Z
parsed_urlr,   r   r   r   Ú	can_fetchš   s*   ÿ

ÿzRobotFileParser.can_fetchc                 C   ó>   |   ¡ sd S | jD ]}| |¡r|j  S q	| jr| jjS d S r   )r   r   rH   r=   r   ©r   rJ   r,   r   r   r   Úcrawl_delay·   ó   


ÿzRobotFileParser.crawl_delayc                 C   rL   r   )r   r   rH   r>   r   rM   r   r   r   Úrequest_rateÁ   rO   zRobotFileParser.request_ratec                 C   s   | j sd S | j S r   )r   r   r   r   r   Ú	site_mapsË   s   zRobotFileParser.site_mapsc                 C   s,   | j }| jd ur|| jg }d tt|ƒ¡S )Nz

)r   r   ÚjoinÚmapÚstr)r   r   r   r   r   Ú__str__Ð   s   
zRobotFileParser.__str__N)r   )Ú__name__Ú
__module__Ú__qualname__r   r   r   r
   r"   r-   r   rK   rN   rP   rQ   rU   r   r   r   r   r      s    
			I

c                   @   s&   e Zd Z	 dd„ Zdd„ Zdd„ ZdS )r:   c                 C   s<   |dkr|sd}t j t j |¡¡}t j |¡| _|| _d S )Nr   T)r   r   rC   r   rG   r   rI   )r   r   rI   r   r   r   r   Ú   s
   
zRuleLine.__init__c                 C   s   | j dkp
| | j ¡S r(   )r   Ú
startswith)r   Úfilenamer   r   r   rH   â   s   zRuleLine.applies_toc                 C   s   | j rdndd | j S )NZAllowZDisallowz: )rI   r   r   r   r   r   rU   å   s   zRuleLine.__str__N)rV   rW   rX   r   rH   rU   r   r   r   r   r:   ×   s
    r:   c                   @   s.   e Zd Z	 dd„ Zdd„ Zdd„ Zdd„ Zd	S )
r2   c                 C   s   g | _ g | _d | _d | _d S r   )r*   r9   r=   r>   r   r   r   r   r   ë   s   
zEntry.__init__c                 C   s‚   g }| j D ]
}| d|› ¡ q| jd ur| d| j› ¡ | jd ur3| j}| d|j› d|j› ¡ | tt| j	ƒ¡ d 
|¡S )NzUser-agent: zCrawl-delay: zRequest-rate: r1   Ú
)r*   r+   r=   r>   ZrequestsZsecondsÚextendrS   rT   r9   rR   )r   ÚretÚagentZrater   r   r   rU   ñ   s   



zEntry.__str__c                 C   sH   	 |  d¡d  ¡ }| jD ]}|dkr dS | ¡ }||v r! dS qdS )Nr1   r   r)   TF)r5   r7   r*   )r   rJ   r^   r   r   r   rH   ý   s   
ÿzEntry.applies_toc                 C   s&   	 | j D ]}| |¡r|j  S qdS )NT)r9   rH   rI   )r   rZ   rA   r   r   r   rI   
  s   


ÿzEntry.allowanceN)rV   rW   rX   r   rU   rH   rI   r   r   r   r   r2   é   s    r2   )
ÚcollectionsÚurllib.parser   Zurllib.requestÚ__all__Ú
namedtupler   r   r:   r2   r   r   r   r   Ú<module>   s     B