[ipxe-devel] [INCOMPLETE PATCH] Need assistance - SRP_I_LOGOUT is never run, needs to be

james harvey jamespharvey20 at gmail.com
Sat Aug 20 10:44:07 UTC 2016


Got a little farther.  Does look like we can have int 13 handle AH
0x46, which is the extension for eject media.  Then, I'm hoping the
bootloader can call that after it's loaded the kernel and initrd,
without any ill effects.  If I understand how all this works, I don't
think the kernel ever uses int13, so I think it's fine for it to stop
working at that point.

So below and attached is an additional in process patch.  (BTW you'll
see this patch also fixes AH 0x44 being listed twice in the comments,
extended seek is AH 0x47.)

The first one, fix_srp_close.patch, and the second one,
add_eject_media.patch, are reaching out toward each other, but I'm
completely missing the link between them.

These two patches I'm just giving to show you where I'm at - not
expecting those as they are to be accepted.  The fix_sbft_endian.patch
should be ready to go pending your approval.

Trying to figure it out, I take a look at int13_read_sectors which
certainly is supported by srp block devices.  It references block_read
which is the function in src/core/blockdev.c, but there too, I don't
see how that function reaches out to the srp block device.  I feel
like there has to be a structure linking the SRP block device's
block_read functionality to here.  But "grep block_read -r *" I'm not
seeing where that would be, except if it's in drivers/block/scsi.c...
And if it's there, it then references scsidev_read which I don't see
how that gets to a SRP device.


Patch is below and attached.

Submitted by: James P. Harvey <jamespharvey20 at gmail.com>

---
 src/arch/x86/include/int13.h          |  4 ++++
 src/arch/x86/interface/pcbios/int13.c | 22 +++++++++++++++++++++-
 src/core/blockdev.c                   | 23 +++++++++++++++++++++++
 src/include/ipxe/blockdev.h           |  3 +++
 4 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/src/arch/x86/include/int13.h b/src/arch/x86/include/int13.h
index f82a583..47e1db2 100644
--- a/src/arch/x86/include/int13.h
+++ b/src/arch/x86/include/int13.h
@@ -39,6 +39,8 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
 #define INT13_EXTENDED_WRITE    0x43
 /** Verify sectors */
 #define INT13_EXTENDED_VERIFY      0x44
+/** Eject media */
+#define INT13_EJECT_MEDIA       0x46
 /** Extended seek */
 #define INT13_EXTENDED_SEEK     0x47
 /** Get extended drive parameters */
@@ -63,6 +65,8 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
 #define INT13_STATUS_READ_ERROR    0x04
 /** Reset failed */
 #define INT13_STATUS_RESET_FAILED  0x05
+/** Valid eject request failed */
+#define INT13_STATUS_EJECT_FAILED  0xB5
 /** Write error */
 #define INT13_STATUS_WRITE_ERROR   0xcc

diff --git a/src/arch/x86/interface/pcbios/int13.c
b/src/arch/x86/interface/pcbios/int13.c
index 6f16904..1210bda 100644
--- a/src/arch/x86/interface/pcbios/int13.c
+++ b/src/arch/x86/interface/pcbios/int13.c
@@ -1132,7 +1132,24 @@ static int int13_extended_verify ( struct
int13_drive *int13,
 }

 /**
- * INT 13, 44 - Extended seek
+ * INT 13, 46 - Eject media
+ *
+ * @v int13      Emulated drive
+ * @v al         0 (reserved)
+ * @ret status      Status code
+ */
+static int int13_eject_media ( struct int13_drive *int13,
+            struct i386_all_regs *ix86 ) {
+  DBGC2 ( int13, "Eject media\n" );
+  if ( block_eject_media (&int13->block )) {
+     return INT13_STATUS_SUCCESS;
+  } else {
+     return INT13_STATUS_EJECT_FAILED;
+  }
+}
+
+/**
+ * INT 13, 47 - Extended seek
  *
  * @v int13      Emulated drive
  * @v ds:si      Disk address packet
@@ -1425,6 +1442,9 @@ static __asmcall void int13 ( struct
i386_all_regs *ix86 ) {
      case INT13_EXTENDED_VERIFY:
         status = int13_extended_verify ( int13, ix86 );
         break;
+     case INT13_EJECT_MEDIA:
+        status = int13_eject_media ( int13, ix86 );
+        break;
      case INT13_EXTENDED_SEEK:
         status = int13_extended_seek ( int13, ix86 );
         break;
diff --git a/src/core/blockdev.c b/src/core/blockdev.c
index c219d96..3310c17 100644
--- a/src/core/blockdev.c
+++ b/src/core/blockdev.c
@@ -141,3 +141,26 @@ void block_capacity ( struct interface *intf,

   intf_put ( dest );
 }
+
+/**
+ * Eject media device capacity
+ *
+ * @control      Control interface
+ * @ret rc    Return status code
+ */
+int block_eject_media ( struct interface *control ) {
+  struct interface *dest;
+  block_eject_media_TYPE ( void * ) *op =
+     intf_get_dest_op ( control, block_eject_media, &dest );
+  void *object = intf_object ( dest );
+  int rc;
+
+  if ( op ) {
+     rc = op ( object );
+  } else {
+     rc = -EOPNOTSUPP;
+  }
+
+  intf_put ( dest );
+  return rc;
+}
diff --git a/src/include/ipxe/blockdev.h b/src/include/ipxe/blockdev.h
index 418c430..cebf8af 100644
--- a/src/include/ipxe/blockdev.h
+++ b/src/include/ipxe/blockdev.h
@@ -51,5 +51,8 @@ extern void block_capacity ( struct interface *intf,
   typeof ( void ( object_type,              \
         struct block_device_capacity *capacity ) )

+extern void block_eject_media ( struct interface *control );
+#define block_eject_media_TYPE( object_type )           \
+  typeof ( int ( object_type, struct interface *data ) )

 #endif /* _IPXE_BLOCKDEV_H */
-- 
2.9.0

On Sat, Aug 20, 2016 at 1:33 AM, james harvey <jamespharvey20 at gmail.com> wrote:
> It's been a long time since I've worked with interrupt hooks.  If I'm
> right that iPXE stays around forever, not sure if it might be possible
> to add functionality to the interrupt hook to signify that it's OK to
> logout from the SRP connection.
>
> How's it work with iscsi booting?  Do the TCP connections just not
> care about a second blasting the first away, unlike on InfiniBand?
>
> On Fri, Aug 19, 2016 at 11:56 PM, james harvey <jamespharvey20 at gmail.com> wrote:
>> This patch is incomplete.  Although it shouldn't hurt to apply, it
>> prepares for a functional change but does not quite get there, and
>> would need a bit of polishing.
>>
>> Problem - Running "dhcp; sanhook <args>; sanboot" propely initiates an
>> SRP connection, and starts the boot loader.  iPXE nevers performs an
>> SRP_I_LOGOUT, even though it's already defined in include/ipxe/srp.h.
>> After the kernel has booted and the sBFT parsed, attempting to start
>> an SRP connection confuses the target machine's kernel, causing SRP
>> commands to abort for about 10-30 seconds.  The only workaround I can
>> find once booted is to continually repeat attempting the SRP
>> connection, waiting a second, and retrying.  The error I get for 10-30
>> seconds is:
>>
>> =====
>> scsi host7: ib_srp: Got failed path rec status -22
>> scsi host7: ib_srp: Path record query failed
>> scsi host7: ib_srp: Connection 0/4 failed
>> scsi host7: ib_srp: Sending CM DREQ failed
>> =====
>>
>> After 10-30 seconds, the target kernel shows:
>>
>> =====
>> [  115.941418] ib_srpt Relogin - closed existing channel
>> 0x1d534b0003c902000002c903004b5275
>> [  115.941470] ib_srpt Received CM DREP message for ch
>> 0x1d534b0003c902000002c903004b5275-522.
>> =====
>>
>> And the new SRP connection is made.  I'm successfully able to make the
>> SRP connection, and successfully boot with the new root.
>>
>> With interesting timing, kernel 4.7 just got released in Arch linux 13
>> days ago.  And, in 4.7, ib_srpt was patched to convert to the generic
>> "RDMA READ/WRITE API" which never closes the existing channel and
>> allows a new connection, but just blocks a new connection forever.
>>
>> So the issue started with < kernel 4.7 as just a 10-30 second delay
>> during bootup, but now with >= 4.7 completely blocks booting.  (I'm
>> going to follow up with a kernel regression, but the best way to
>> handle this is to close the initial iPXE connection.)
>>
>> This patch implements the missing srp_initiator_logout(), and modifies
>> srpdev_close() to call it if srpdev->logged_in is set.
>>
>> However, in a successful "dhcp; sanhook <args>; sanboot" sequence,
>> srpdev_close() is never called.  (The (3) dbg_printf's I added in
>> srpdev_close, and (1) in ib_srp.c, never execute - only temporarily
>> using dbg_printf to avoid building with DEBUG to only see these debug
>> errors.)  The target kernel never gets an SRP_I_LOGOUT command.
>>
>> srpdev_close is only referenced in src/drivers/block/srp.c in these locations:
>> (1) In srpdev_deliver, within the label err.
>> (2) In srp_open(), within the labels err_scsi_open and err_login.
>> (3) In static struct interface_operation srpdev_socket_op[] as
>> INTF_OP ( intf_close, struct srp_device *, srpdev_close )
>> (4) In static struct interface_operation srpdev_scsi_op[] as
>> INTF_OP ( intf_close, struct srp_device *, srpdev_close )
>>
>> srpdev_socket_op is only referenced in srp.c within
>> static struct interface_descriptor srpdev_socket_desc as
>> INTF_DESC_PASSTHRU ( struct srp_device, socket, srpdev_socket_op, scsi )
>>
>> srpdev_scsi_op is only referenced in srp.c within
>> static struct interface_descriptor srpdev_scsi_desc as
>> INTF_DESC_PASSTHRU ( struct srp_device, scsi, srpdev_scsi_op, socket )
>>
>> srpdev_scsi_desc and srpdev_socket_desc are only referenced in
>> srp.c::srp_open, given to intf_init
>>
>> Looking in net/infiniband/ib_srp.c::ib_srp_close(), it does call
>> intf_shutdown( &ib_srp->cmrc, rc ) and intf_shutdown ( &ib_srp->srp,
>> rc ).
>>
>> At about that point, I started thinking that iPXE probably never knows
>> that it should fully exit, and that it's probably left in lowmem
>> forever.  Am I right here?  I'm not sure there's a point where iPXE
>> currently knows it can safely fully exit and disable the interrupt
>> hook.  So, I'm looking for guidance.
>>
>> Do we need to add to the sBFT specification, to include a memory
>> address of new function which would call srp_initiator_logout with its
>> struct srp_device *srpdev, if I'm correct that iPXE is basically
>> staying in low mem forever and never really exiting?  Not sure if I'm
>> understanding that correctly.
>>
>> Patch is below and attached.
>>
>> Submitted by: James P. Harvey <jamespharvey20 at gmail.com>
>>
>> ---
>>  src/drivers/block/srp.c     | 59 +++++++++++++++++++++++++++++++++++++++++++++
>>  src/net/infiniband/ib_srp.c |  1 +
>>  2 files changed, 60 insertions(+)
>>
>> diff --git a/src/drivers/block/srp.c b/src/drivers/block/srp.c
>> index 85c7aa8..8ab4a97 100644
>> --- a/src/drivers/block/srp.c
>> +++ b/src/drivers/block/srp.c
>> @@ -229,15 +229,33 @@ static void srpcmd_close ( struct srp_command
>> *srpcmd, int rc ) {
>>   * @v srpdev     SRP device
>>   * @v rc      Reason for close
>>   */
>> +static int srp_new_tag ( struct srp_device *srpdev );
>> +static int srp_initiator_logout ( struct srp_device *srpdev, uint32_t tag );
>>  static void srpdev_close ( struct srp_device *srpdev, int rc ) {
>>    struct srp_command *srpcmd;
>>    struct srp_command *tmp;
>>
>> +  dbg_printf ( "!srpdev_close()\n" );
>>    if ( rc != 0 ) {
>>       DBGC ( srpdev, "SRP %p closed: %s\n",
>>              srpdev, strerror ( rc ) );
>>    }
>>
>> +  /* If logged in, attempt logging out */
>> +  if ( srpdev->logged_in ) {
>> +     dbg_printf ( "!srpdev->logged_in true\n" );
>> +     int logout_rc;
>> +     int tag;
>> +
>> +     tag = srp_new_tag ( srpdev );
>> +     if ( tag < 0 )
>> +        DBGC ( srpdev, "SRP %p tag %08x invalid tag\n", srpdev, tag );
>> +     else if ( ( logout_rc = srp_initiator_logout ( srpdev, tag ) ) != 0 )
>> +        DBGC ( srpdev, "SRP %p tag %08x cannot logout\n", srpdev, tag );
>> +  } else {
>> +     dbg_printf ( "!srpdev->logged_in false\n" );
>> +  }
>> +
>>    /* Shut down interfaces */
>>    intf_shutdown ( &srpdev->socket, rc );
>>    intf_shutdown ( &srpdev->scsi, rc );
>> @@ -393,6 +411,47 @@ static int srp_login_rej ( struct srp_device *srpdev,
>>  }
>>
>>  /**
>> + * Transmit SRP initiator logout
>> + *
>> + * @v srpdev     SRP device
>> + * @v tag     Command tag
>> + * @ret rc    Return status code
>> + */
>> +static int srp_initiator_logout ( struct srp_device *srpdev, uint32_t tag ) {
>> +  struct io_buffer *iobuf;
>> +  struct srp_i_logout *i_logout;
>> +  int rc;
>> +
>> +  /* Allocate I/O buffer */
>> +  iobuf = xfer_alloc_iob ( &srpdev->socket, sizeof ( *i_logout ) );
>> +  if ( ! iobuf )
>> +     return -ENOMEM;
>> +
>> +  /* Construct initiator logout IU */
>> +  i_logout = iob_put ( iobuf, sizeof ( *i_logout ) );
>> +  memset ( i_logout, 0, sizeof ( *i_logout ) );
>> +  i_logout->type = SRP_I_LOGOUT;
>> +  i_logout->tag.dwords[0] = htonl ( SRP_TAG_MAGIC );
>> +  i_logout->tag.dwords[1] = htonl ( tag );
>> +
>> +  DBGC ( srpdev, "SRP %p tag %08x I_LOGOUT:\n", srpdev, tag );
>> +  DBGC_HDA ( srpdev, 0, iobuf->data, iob_len ( iobuf ) );
>> +
>> +  /* Send initiator logout IU */
>> +  if ( ( rc = xfer_deliver_iob ( &srpdev->socket, iobuf ) ) != 0 ) {
>> +     DBGC ( srpdev, "SRP %p tag %08x could not send I_LOGOUT: "
>> +            "%s\n", srpdev, tag, strerror ( rc ) );
>> +     return rc;
>> +  }
>> +
>> +  /* Mark as logged out */
>> +  srpdev->logged_in = 0;
>> +  DBGC ( srpdev, "SRP %p logged out\n", srpdev );
>> +
>> +  return 0;
>> +}
>> +
>> +/**
>>   * Transmit SRP SCSI command
>>   *
>>   * @v srpdev     SRP device
>> diff --git a/src/net/infiniband/ib_srp.c b/src/net/infiniband/ib_srp.c
>> index 904a99b..2c718ef 100644
>> --- a/src/net/infiniband/ib_srp.c
>> +++ b/src/net/infiniband/ib_srp.c
>> @@ -106,6 +106,7 @@ static void ib_srp_free ( struct refcnt *refcnt ) {
>>   * @v rc      Reason for close
>>   */
>>  static void ib_srp_close ( struct ib_srp_device *ib_srp, int rc ) {
>> +  dbg_printf ("ib_srp_close\n" );
>>    /* Shut down interfaces */
>>    intf_shutdown ( &ib_srp->cmrc, rc );
>>    intf_shutdown ( &ib_srp->srp, rc );
>> --
>> 2.9.0
-------------- next part --------------
A non-text attachment was scrubbed...
Name: add_eject_media.patch
Type: text/x-patch
Size: 3680 bytes
Desc: not available
URL: <http://lists.ipxe.org/pipermail/ipxe-devel/attachments/20160820/e7a0e894/attachment.bin>


More information about the ipxe-devel mailing list